Beispiel #1
0
def get_rows_with_headers(
        sheet: Sheet) -> Tuple[List[Cell], Generator[List[Cell], None, None]]:
    """
    Since it ends up happening a lot, return
    """
    row_iterator = sheet.get_rows()
    headers = [cell.value for cell in next(row_iterator)]
    return headers, row_iterator
Beispiel #2
0
def commit_from_sheet(ws: Sheet, model: db.Model, **kwargs):
    """Initialize DB table data from XLRD Worksheet.

    Initialize table data from source data associated with corresponding
    data model.

    Args:
        ws (xlrd.sheet.Sheet): XLRD worksheet object.
        model (class): SqlAlchemy model class.
    """
    survey, indicator, characteristic = '', '', ''
    if model == Data:
        survey = kwargs['survey']
        indicator = kwargs['indicator']
        characteristic = kwargs['characteristic']
    header = None

    for i, row in enumerate(ws.get_rows()):
        row = [r.value for r in row]
        if i == 0:
            header = row
        else:
            row_dict = {k: v for k, v in zip(header, row)}
            if model == Data:
                survey_code = row_dict.get('survey_code')
                survey_id = survey.get(survey_code)
                row_dict['survey_id'] = survey_id
                indicator_code = row_dict.get('indicator_code')
                indicator_id = indicator.get(indicator_code)
                row_dict['indicator_id'] = indicator_id
                char1_code = row_dict.get('char1_code')
                char1_id = characteristic.get(char1_code)
                row_dict['char1_id'] = char1_id
                char2_code = row_dict.get('char2_code')
                char2_id = characteristic.get(char2_code)
                row_dict['char2_id'] = char2_id
            try:
                record = model(**row_dict)
            except (DatabaseError, ValueError, AttributeError, KeyError,
                    IntegrityError, Exception) as err:
                msg = 'Error when processing data import.\n' \
                      '- Worksheet name: {}\n' \
                      '- Row number: {}\n' \
                      '- Cell values: {}\n\n' \
                      '- Original Error:\n' + \
                      type(err).__name__ + ': ' + str(err)
                msg = msg.format(ws.name, i + 1, row)
                logging.error(msg)
                raise PmaApiDbInteractionError(msg)

            db.session.add(record)
Beispiel #3
0
def _assert_sheet_content(sheet_name: str, actual_worksheet: Sheet,
                          expected_worksheet: Sheet):
    assert (actual_worksheet.nrows == expected_worksheet.nrows
            ), f"Different number of rows in {sheet_name} sheet"
    assert (actual_worksheet.ncols == expected_worksheet.ncols
            ), f"Different number of columns in {sheet_name} sheet"

    for row_index, actual_row in enumerate(actual_worksheet.get_rows()):
        expected_row = expected_worksheet.row(row_index)
        for cell_index, actual_cell in enumerate(actual_row):
            expected_cell = expected_row[cell_index]
            assert (
                actual_cell.ctype == expected_cell.ctype
            ), f"Different cell type in row {row_index}, col {cell_index} in {sheet_name} sheet"
            assert (
                actual_cell.value == expected_cell.value
            ), f"Different cell content in row {row_index}, col {cell_index} in {sheet_name} sheet"
Beispiel #4
0
def get_boundaries(sheet: Sheet) -> Tuple[int, int]:
    start_row_num: int = -1
    end_row_num: int = -1
    row_idx: int = 0

    for row in sheet.get_rows():
        is_boundary: bool = True
        for cell in row:
            is_boundary = type(
                cell.value) == str and cell.value.startswith('*')
            if not is_boundary:
                break

        if is_boundary:
            if start_row_num < 0:
                start_row_num = row_idx
            elif end_row_num < 0:
                end_row_num = row_idx
        row_idx += 1
    return start_row_num + 1, end_row_num - 1
Beispiel #5
0
def get_headers(sheet: Sheet):
    row_idx: int = 0
    for row in sheet.get_rows():
        is_header_row: bool = True
        if type(row[0].value) == str and row[0].value.startswith('*'):
            for cell in row[1:]:
                if type(cell.value) == str and len(cell.value) == 0:
                    is_header_row = is_header_row and True
                else:
                    is_header_row = False
        else:
            is_header_row = False

        row_idx += 1
        if is_header_row:
            break
    header_names: Tuple[str, ...] = tuple(
        map(
            lambda cell: cell.value.replace('.', '').replace('/', '_').replace(
                ' ', '_').lower(), sheet.row(row_idx)))
    return header_names