Esempio n. 1
0
def first_difference(left: Worksheet,
                     right: Worksheet) -> Optional[Tuple[Diff, int]]:
    """
    Compare two sheets and return the first difference found, if any,
    plus the index (starting from 1) of the row that differed.

    :param left: The left-hand sheet
    :param right: The right-hand sheet
    :return: The first difference and index or None
    """
    left_rows = left.iter_rows()
    right_rows = right.iter_rows()

    left_row: Union[Row, None]
    right_row: Union[Row, None]

    row_index = 0

    while True:
        left_row = next(left_rows, None)
        right_row = next(right_rows, None)
        row_index += 1

        if left_row is None and right_row is None:
            return None

        if left_row is None:
            return (None, right_row), row_index

        if right_row is None:
            return (left_row, None), row_index

        if not rows_match(left_row, right_row):
            return (left_row, right_row), row_index
Esempio n. 2
0
def cell_fill_down(
    *,
    ws: Worksheet,
    min_row: int,
    max_row: int,
    min_col: int,
    max_col: int
) -> Worksheet:
    """
    Fill empty cell with the value from the cell above

    Parameters
    ----------
    ws : Worksheet
        The worksheet in which to change the case of column(s).
    min_row : int
        The first row in the range to change.
    max_row : int
        The last row in the range to change.
    min_col : int
        The first column in the range to change.
    max_col : int
        The last column in the range to change.

    Returns
    -------
    ws : Worksheet
        The worksheet in which cells were modified.

    Example
    -------
    >>> for column in fill_down_columns:
    >>>     ws = ds.cell_fill_down(
    >>>         ws=ws,
    >>>         min_row=row_below_labels,
    >>>         max_row=ws.max_row,
    >>>         min_col=column_names_numbers[column],
    >>>         max_col=column_names_numbers[column]
    >>>     )
    """
    row_count = 0
    for row in ws.iter_rows(
        min_col=min_col,
        max_col=max_col
    ):
        for cell in row:
            if cell.value:
                row_count += 1
    if row_count > 0:
        for row in ws.iter_rows(
            min_row=min_row + 1,  # start one row below the 'start' row
            max_row=max_row,
            min_col=min_col,
            max_col=max_col
        ):
            for cell in row:
                if cell.value in [None, 'None', '']:
                    cell.value = ws[cell.row - 1][min_col - 1].value
    return ws
Esempio n. 3
0
    def parse_sheet(self, sheet: Worksheet) -> list:
        tables = []
        self.fill_coordinates(sheet)
        for row in sheet.iter_rows():
            table_started = None

            for cell in row:
                if cell.value:
                    if self.in_table(cell):
                        continue

                    if table_started:
                        if cell.column == sheet.max_column:
                            self.finish_table(sheet, cell, table_started)
                            table_started = None
                            continue
                    else:
                        table_started = cell
                else:
                    # print('ts',table_started)
                    if isinstance(cell, MergedCell):
                        continue

                    if table_started:
                        self.finish_table(sheet, cell, table_started)
                        table_started = None

        return tables
Esempio n. 4
0
def sheet_to_actions(sheet: Worksheet):
    iter_ = sheet.iter_rows()
    next(iter_)  # skip header

    # collect actions and paths
    actions = {}
    paths = []
    for row in iter_:
        path, _, _, _, action, target, *_ = map(lambda x: x.value, row)

        path = Path(path)
        target = Path(target) if target else None

        if target and not action:
            raise ValueError(f'Target defined without action: {target}')

        if action:
            action_cls = Action(action, path, target, len(path.parents))
            LOG.debug(f'Found action: {action_cls}')
            actions[path] = action_cls
        else:
            paths.append(path)

    # check if all paths are addressed
    for path in paths:
        has_parent_action = any(p in actions for p in path.parents)

        if not has_parent_action:
            LOG.warning(f'{path} has no action')

    return actions
Esempio n. 5
0
 def _find_drill_sheet_name(self, sheet: Worksheet) -> str:
     for row in sheet.iter_rows(max_col=15):
         for cell in row:
             try:
                 if cell.value != None: return cell.value
             except AttributeError:  #MergedCells have no value attribute
                 pass
    def style(self, ws: Worksheet):
        # Give columns a fixed width so each sheet can print onto a single
        #  A4 in landscape mode.
        for col_name, col_index in self._column_locations.items():
            # column_dimensions requires a column name, not an index
            ws.column_dimensions[
                get_column_letter(col_index)
            ].width = self.DATE_TYPE_COLUMN_WIDTH

        # Then override the first column width (it's like a header)
        ws.column_dimensions["A"].width = self.FIRST_COLUMN_WIDTH
        # Style the first column in a header-like way
        for cell in first(ws.columns):
            cell.style = "40 % - Accent1"

        # Style header row (note the overlap with the name column... we're
        #  intentionally overwriting the style of A1 to be what is below)
        for cell in first(ws.rows):
            cell.style = "Accent1"
            cell.alignment = Alignment(wrap_text=True, horizontal="center")

        # Intended to be double height, with text wrap set in the loop below
        ws.row_dimensions[1].height = self.HEADER_ROW_HEIGHT

        # Style the data cells (non-header cells)
        for row in ws.iter_rows(min_row=2, min_col=2):
            for cell in row:
                cell.alignment = Alignment(horizontal="center")
                cell.border = self.THIN_BORDER
Esempio n. 7
0
def list_nan_worksheet_rows(
    *,
    ws: Worksheet,
    min_row: int
) -> List[int]:
    """
    Create list of row numbers of blank worksheet rows.

    Parameters
    ----------
    ws : Worksheet
        A worksheet from a workbook.
    min_row : int
        Start row for iteration.

    Returns
    -------
    blank_rows : List[int]
        List of row numbers.

    Example
    -------
    >>> import datasense as ds
    >>> ws = wb[sheetname]
    >>> blank_rows = ds.list_nan_worksheet_rows(
    >>>     ws=ws,
    >>>     min_row=2
    >>> )
    """
    blank_rows = []
    for row in ws.iter_rows(min_row=min_row):
        onerow = [cell.value for cell in row]
        if all(item != item for item in onerow):
            blank_rows.append(row[0].row)
    return blank_rows
Esempio n. 8
0
def parse_slot_types_worksheet(worksheet: Worksheet) -> List[SlotType]:
    logger.info(f"{worksheet.__dict__}")

    header_row, header_cols = get_header_indices(
        worksheet, ['Slot Type Name', 'Slot Value'])

    logger.info(f"{header_row=}, {header_cols=}")

    slot_type_dict = {}
    min_row = header_row + 1
    min_col = min(header_cols.values())
    max_col = max(header_cols.values())

    for row in worksheet.iter_rows(min_row=min_row,
                                   min_col=min_col,
                                   max_col=max_col):
        name = row[header_cols['Slot Type Name'] - 1].value
        value = row[header_cols['Slot Value'] - 1].value
        logger.info(f"{name=}, {value=}")
        slot_type_dict.setdefault(name, []).append(value)
        '''
        if name not in slot_type_dict:
            slot_type_dict[name] = []
        slot_type_dict[name].append(value)
        '''

    return [SlotType(name, values) for name, values in slot_type_dict.items()]
Esempio n. 9
0
def load_initial_schedules(ws: Worksheet, monitor_dict: dict):
    """
    指定シートからあらかじめ代入されている予定を読み取り、各監視者のスケジュールを初期化する。

    :param ws: ワークシート
    :param monitor_dict: 監視者の辞書(key:=name, item:=Monitor)
    :return: 監視者のlatestシートにおける列インデックスの辞書(key:=name, item:=column index),
                日付の辞書(key:=行番号, item:=datetime)
    """
    monitor_column_dict = create_monitor_col_dict(ws, monitor_dict)
    num_of_monitors = len(monitor_dict)
    weekday_dict = {}
    holiday_col = find_col_idx_by_val(ws, HEADER_ROW_IDX, 'Holiday')
    for row_idx, row in enumerate(
            ws.iter_rows(min_row=DATA_START_ROW_IDX,
                         max_col=num_of_monitors + 2), DATA_START_ROW_IDX):
        day = row[0].value
        if not day:
            break
        if not is_weekday(day, ws.cell(row=row_idx, column=holiday_col)):
            continue
        weekday_dict[row_idx] = day
        for idx, monitor in enumerate(monitor_dict.values(), 1):
            if val := row[idx].value:
                role = convert_val_to_role(val)
                monitor.schedule[day] = role
Esempio n. 10
0
def parse_slots_worksheet(worksheet: Worksheet) -> Dict[str, List[Slot]]:
    header_row, header_cols = get_header_indices(
        worksheet, ['Intent Name', 'Slot Name', 'Required', 'Type', 'Prompt'])

    slots = {}
    min_row = header_row + 1
    min_col = min(header_cols.values())
    max_col = max(header_cols.values())

    for row in worksheet.iter_rows(min_row=min_row,
                                   min_col=min_col,
                                   max_col=max_col):
        intent_name = row[header_cols['Intent Name'] - 1].value
        slot_name = row[header_cols['Slot Name'] - 1].value
        required = row[header_cols['Required'] - 1].value
        slot_type = row[header_cols['Type'] - 1].value
        prompt = row[header_cols['Prompt'] - 1].value

        if intent_name not in slots:
            slots[intent_name] = []

        slots[intent_name].append(
            Slot(name=slot_name,
                 type=slot_type,
                 prompt=prompt,
                 required=required))

    return slots
Esempio n. 11
0
 def import_sheet(cls, sheet: Worksheet, workbasket: WorkBasket, *args,
                  **kwargs):
     """Import all of the rows from the passed worksheet, ignoring the first
     (header) row."""
     for row in islice(sheet.iter_rows(), 1, None):
         row_model = cls(row, *args, **kwargs)
         yield row_model.import_row(workbasket)
def read_data_pair(sheet: Worksheet) -> list:
    """
    从一个工作表中,读出生成一个漏斗图所需的必备数据对。
    :param sheet:正在处理的工作表
    :return:二元列表[标签项,数据项]为[f"{环节}{相对变化率}%", 对应数量]的元素所构成的二维列表。相对变化率保留1位小数
    """
    # 忠诚读取各行数据
    row_data = [
        list(rowContent) for rowContent in sheet.iter_rows(values_only=True)
    ]
    # 原始数据:环节
    stages = row_data[0][1:]
    # 原始数据:各阶段达成数量
    num = [eval(str(data_item)) for data_item in row_data[1][1:]]

    # 计算各个环节人数相对变化率
    pass_rate = [100]
    # 从第2个元素开始,所以下标始于1
    for index in range(1, len(num)):
        relative_percentage = (num[index] / num[index - 1]) * 100
        # 保留一位小数
        relative_percentage = round(relative_percentage, 1)
        pass_rate.append(relative_percentage)

    # 按照格式组装标签项
    label_item = []
    for common_index, rate_item in enumerate(pass_rate):
        label_item.append(f"{stages[common_index]}{rate_item}%")
    # 组装绘图用的数据对组装
    data_pair = []
    for common_index, num_item in enumerate(num):
        data_pair.append([label_item[common_index], num_item])

    return data_pair
Esempio n. 13
0
    def get_pcm_topic(self, sheet: Worksheet) -> str:
        for row in sheet.iter_rows(min_col=1, max_col=15, max_row=5):
            for cell in row:
                if cell.value is not None:
                    return cell.value

        raise Exception('No PCM topic found in cells searched')
Esempio n. 14
0
def get_rating_rows(sheet: Worksheet,
                    max_row: int,
                    window=None) -> List[tuple]:
    """
    Função que retorna todas as linhas onde o campo de `avaliação
    concreta` é marcado com `SIM`.

    Arguments:
        sheet (Worksheet): planilha que será analisada
        max_row (int): máximo de linhas que serão processadas
    
    Return:
        List[tuple]: lista com todas as linhas que contém avaliação
    """
    print('Verificando as linhas da planilha...')
    window_updater.update(window)

    rated_rows = []

    for index, row in enumerate(sheet.iter_rows(values_only=True)):

        if (index < max_row) and verify_row_contains_rating(row,
                                                            window=window):
            rated_rows.append(row)

    return rated_rows
Esempio n. 15
0
 def _iterate_worksheet_rows(
         self, worksheet: Worksheet) -> Iterator[Tuple[int, List[Any]]]:
     row_index = self.first_data_row_index
     min_row = self.first_data_row_index + 1 if self.first_data_row_index is not None else None
     max_row = self.last_data_row_index + 1 if self.last_data_row_index is not None else None
     for row in worksheet.iter_rows(min_row=min_row, max_row=max_row):
         yield row_index, [cell.value for cell in row]
         row_index += 1
Esempio n. 16
0
 def parseLines(self, sheet: Worksheet):
     result = []
     rows = sheet.iter_rows()
     next(rows)
     for row in rows:
         if row[0].value is not None:
             result.append(self.parseLine(row))
     return result
Esempio n. 17
0
def copy_sheet_header(main_ws: Worksheet, dst_ws: Worksheet):
    # determine max column
    #dst_ws.merge_cells('A1:M7')
    dst_ws.merged_cells.ranges = main_ws.merged_cells.ranges
    for r in main_ws.iter_rows(1, NUM_ROW_HEADER):
        for c in r:
            #dst_ws.cell(c.row, c.column, c.value)
            dst_ws.copy_cell(c.row, c.column, c)
Esempio n. 18
0
def get_headers_from_sheet(sheet: Worksheet) -> Tuple[str]:
    """
    Returns a Tuple with the cell content of the 1st row, empty cells have a
    None value. Example: ("Contract nr.", "Contract Status", None, "Eigenaar")
    """
    headers = []
    for value in sheet.iter_rows(min_row=1, max_row=1, values_only=True):
        headers.append(value)
    return headers[0]
Esempio n. 19
0
def adjust_column_width(sheet: Worksheet):
    """Adjust the columns' width."""
    for column_cells in sheet.columns:
        length = max(len(str(cell.value)) for cell in column_cells)
        sheet.column_dimensions[
            column_cells[0].column_letter].width = length + 3
    for row in sheet.iter_rows(max_col=2):
        for cell in row:
            cell.alignment = Alignment(wrapText=True)
Esempio n. 20
0
    def _finalize_styling(self, sheet: Worksheet):
        sheet.page_setup.paperSize = Worksheet.PAPERSIZE_A4
        sheet.sheet_properties.pageSetUpPr.fitToPage = True
        sheet.page_margins = PageMargins(left=self.excel_config.MARGIN_LENGTH, right=self.excel_config.MARGIN_LENGTH)
        sheet.column_dimensions[self.__LEFT_COLUMNS[0]].width = 4
        sheet.column_dimensions[self.__LEFT_COLUMNS[1]].width = 42
        sheet.column_dimensions[self.__RIGHT_COLUMNS[0]].width = 4
        sheet.column_dimensions[self.__RIGHT_COLUMNS[1]].width = 42

        for rows in sheet.iter_rows(min_row=3):
            for row in rows:
                sheet.row_dimensions[row.row].height = self.excel_config.ROW_HEIGHT
Esempio n. 21
0
def _getText(worksheet: Worksheet) -> List[str]:
    """
    Converts the worksheet into a list of strings.

    Parameters:
        worksheet: The worksheet to be converted.
    """
    result: List[str] = []
    for row in worksheet.iter_rows(values_only=True):
        line = ", ".join([str(value) for value in row])
        result.append(line)
    return result
Esempio n. 22
0
def read_until_empty_row(ws: Worksheet) -> List[List[Any]]:
    """
    Reads a spreadsheet until the first empty line.
    (Helpful because Excel spreadsheets are sometimes seen as having 1048576
    rows when they don't really).
    """
    rows = []  # type: List[List[Any]]
    for row in ws.iter_rows():
        if is_empty_row(row):
            break
        rows.append([cell.value for cell in row])
    return rows
def read_excel(sheet: Worksheet) -> list:
    """
    忠诚地读取出工作表中的原始内容,不做任何处理。

    :param sheet: 正在读取的工作表

    :return: 由各行数据组成的列表,每行也是一个列表
    """
    org_data = [
        list(data_item) for data_item in sheet.iter_rows(values_only=True)
    ]
    return org_data
    def get_xl_table(self, ws: Worksheet):
        xl_table = Box(default_box=True)
        xl_table.headers = []
        xl_table.rows = []

        for row in ws.iter_rows():
            if row[0].value is None or str(row[0].value).strip() == "":
                break
            if xl_table.headers:
                xl_table.rows.append(row)
            else:
                xl_table.headers = row

        return xl_table
def write_variants_sheet(sheet: Worksheet, df_variants: pd.DataFrame):
    """
    Write and format variants sheet in workbook
    """
    # write data
    for r in dataframe_to_rows(df_variants, header=True, index=False):
        sheet.append(r)
    sheet.insert_rows(1)

    # apply conditional filling depending of variant presence
    for row in sheet.iter_rows(
            min_col=8,
            max_col=sheet.max_column,
            min_row=3,
            max_row=sheet.max_row,
    ):
        for cell in row:
            if cell.value == 1:
                cell.fill = PatternFill(start_color="3bbf97",
                                        fill_type="solid")
            cell.value = ""

    # improve style (common columns)
    column_widths = [8, 8, 8, 18, 10, 14, 14]
    for col, w in zip(sheet.iter_cols(max_row=2, max_col=7), column_widths):
        colname = col[0].column_letter
        col[0].value = col[1].value
        col[0].font = Font(name="Calibri", bold=True)
        col[0].border = Border(
            bottom=Side(border_style="medium", color="000000"))
        col[0].alignment = Alignment(horizontal="center")
        sheet.column_dimensions[colname].width = w
        sheet.merge_cells(f"{colname}1:{colname}2")

    # improve style (samples columns)
    sheet.row_dimensions[2].height = 60
    sheet["H1"].value = "samples"
    sheet["H1"].font = Font(name="Calibri", bold=True)
    sheet["H1"].alignment = Alignment(horizontal="center")
    sheet.merge_cells(start_row=1,
                      end_row=1,
                      start_column=8,
                      end_column=sheet.max_column)
    for col in sheet.iter_cols(min_row=2, max_row=2, min_col=8):
        col[0].font = Font(name="Calibri", bold=True)
        col[0].border = Border(
            bottom=Side(border_style="medium", color="000000"))
        col[0].alignment = Alignment(horizontal="center", text_rotation=90)
        sheet.column_dimensions[col[0].column_letter].width = 3
Esempio n. 26
0
def list_rows_with_content(
    *,
    ws: Worksheet,
    min_row: int,
    column: int,
    text: str
) -> List[int]:
    """
    List rows that contain specific text in a specified column.

    Parameters
    ----------
    ws : Worksheet
        A worksheet from a workbook.
    min_row : int
        Start row for iteration.
    column : int
        The column to search.
    text : str
        The text to search.

    Returns
    -------
    List[int]
        A list of row numbers.

    Example
    -------
    >>> rows_with_text = ds.list_rows_with_content(
    >>>     ws=ws,
    >>>     min_row=2,
    >>>     column=11,
    >>>     text='ETA'
    >>> )
    """
    rows_with_text = []
    for row in ws.iter_rows(
        min_row=min_row,
        min_col=column,
        max_col=column
    ):
        for cell in row:
            if cell.value == text:
                rows_with_text.append(row[0].row)
    return rows_with_text
Esempio n. 27
0
def parse_intent_clarification_worksheet(worksheet: Worksheet) -> List[str]:
    header_row, header_cols = get_header_indices(worksheet,
                                                 ['Intent Clarification'])

    intent_clarifications = []
    min_row = header_row + 1
    min_col = min(header_cols.values())
    max_col = max(header_cols.values())

    for row in worksheet.iter_rows(min_row=min_row,
                                   min_col=min_col,
                                   max_col=max_col):
        intent_clarifications.append(row[header_cols['Intent Clarification'] -
                                         1].value)

    logger.info(f"{intent_clarifications=}")

    return intent_clarifications
Esempio n. 28
0
def number_non_empty_rows(
    *,
    ws: Worksheet,
    column_number: int,
    start_row: int,
) -> int:
    """
    Determine the number of non-empty rows for a single column.

    Parameters
    ----------
    ws : Worksheet
        The worksheet to analyze.
    column_number : int
        The desired column number.
    start_row : int
        The row at which to start evaluating cells.

    Returns
    -------
    row_count : int
        The number of non-empty rows.

    Example
    -------
    >>> start_row = 2
    >>> column_number = 1
    >>> row_count = ds.number_non_empty_rows(
    >>>     ws=ws,
    >>>     column_number=column_number,
    >>>     start_row=start_row,
    >>> )
    """
    row_count = 0
    for row in ws.iter_rows(
        min_row=start_row,
        min_col=column_number,
        max_col=column_number
    ):
        for cell in row:
            if cell.value:
                row_count += 1
    return row_count
Esempio n. 29
0
def create_monitor_col_dict(ws: Worksheet, monitor_dict: dict) -> dict:
    """
    監視者のlatestシートにおける列インデックスの辞書を作成する

    :param ws: ワークシート
    :param monitor_dict: 監視者の辞書(key:=name, item:=Monitor)
    :return: 監視者のlatestシートにおける列インデックスの辞書(key:=name, item:=column index)
    """
    monitor_column_dict = {}
    for row in ws.iter_rows(min_row=HEADER_ROW_IDX,
                            max_row=HEADER_ROW_IDX,
                            min_col=2,
                            max_col=len(monitor_dict) + 1):
        for cell in row:
            name = cell.value
            if name not in monitor_dict:
                raise ValueError(f'{name} is not in monitors.')
            monitor_column_dict[name] = cell.column
    return monitor_column_dict
Esempio n. 30
0
 def __init__(self, filter_cls, ws: Worksheet, name_col_idx: int,
              disable_col_idx: int):
     self.filter_cls = filter_cls
     self.filters = set()
     for row in ws.iter_rows(min_row=FilterManager._FILTER_DATA_ST_ROW_IDX,
                             min_col=name_col_idx,
                             max_col=disable_col_idx):
         filter_name = row[0].value
         if filter_name is None:
             break
         try:
             filter_enum = convert_str_to_filter(self.filter_cls,
                                                 filter_name)
         except ValueError as e:
             print(f'{e}')
             continue
         else:
             if row[disable_col_idx - name_col_idx].value != 'Y':
                 self.filters.add(filter_enum)