def get_rows_with_headers( sheet: Sheet) -> Tuple[List[Cell], Generator[List[Cell], None, None]]: """ Since it ends up happening a lot, return """ row_iterator = sheet.get_rows() headers = [cell.value for cell in next(row_iterator)] return headers, row_iterator
def commit_from_sheet(ws: Sheet, model: db.Model, **kwargs): """Initialize DB table data from XLRD Worksheet. Initialize table data from source data associated with corresponding data model. Args: ws (xlrd.sheet.Sheet): XLRD worksheet object. model (class): SqlAlchemy model class. """ survey, indicator, characteristic = '', '', '' if model == Data: survey = kwargs['survey'] indicator = kwargs['indicator'] characteristic = kwargs['characteristic'] header = None for i, row in enumerate(ws.get_rows()): row = [r.value for r in row] if i == 0: header = row else: row_dict = {k: v for k, v in zip(header, row)} if model == Data: survey_code = row_dict.get('survey_code') survey_id = survey.get(survey_code) row_dict['survey_id'] = survey_id indicator_code = row_dict.get('indicator_code') indicator_id = indicator.get(indicator_code) row_dict['indicator_id'] = indicator_id char1_code = row_dict.get('char1_code') char1_id = characteristic.get(char1_code) row_dict['char1_id'] = char1_id char2_code = row_dict.get('char2_code') char2_id = characteristic.get(char2_code) row_dict['char2_id'] = char2_id try: record = model(**row_dict) except (DatabaseError, ValueError, AttributeError, KeyError, IntegrityError, Exception) as err: msg = 'Error when processing data import.\n' \ '- Worksheet name: {}\n' \ '- Row number: {}\n' \ '- Cell values: {}\n\n' \ '- Original Error:\n' + \ type(err).__name__ + ': ' + str(err) msg = msg.format(ws.name, i + 1, row) logging.error(msg) raise PmaApiDbInteractionError(msg) db.session.add(record)
def _assert_sheet_content(sheet_name: str, actual_worksheet: Sheet, expected_worksheet: Sheet): assert (actual_worksheet.nrows == expected_worksheet.nrows ), f"Different number of rows in {sheet_name} sheet" assert (actual_worksheet.ncols == expected_worksheet.ncols ), f"Different number of columns in {sheet_name} sheet" for row_index, actual_row in enumerate(actual_worksheet.get_rows()): expected_row = expected_worksheet.row(row_index) for cell_index, actual_cell in enumerate(actual_row): expected_cell = expected_row[cell_index] assert ( actual_cell.ctype == expected_cell.ctype ), f"Different cell type in row {row_index}, col {cell_index} in {sheet_name} sheet" assert ( actual_cell.value == expected_cell.value ), f"Different cell content in row {row_index}, col {cell_index} in {sheet_name} sheet"
def get_boundaries(sheet: Sheet) -> Tuple[int, int]: start_row_num: int = -1 end_row_num: int = -1 row_idx: int = 0 for row in sheet.get_rows(): is_boundary: bool = True for cell in row: is_boundary = type( cell.value) == str and cell.value.startswith('*') if not is_boundary: break if is_boundary: if start_row_num < 0: start_row_num = row_idx elif end_row_num < 0: end_row_num = row_idx row_idx += 1 return start_row_num + 1, end_row_num - 1
def get_headers(sheet: Sheet): row_idx: int = 0 for row in sheet.get_rows(): is_header_row: bool = True if type(row[0].value) == str and row[0].value.startswith('*'): for cell in row[1:]: if type(cell.value) == str and len(cell.value) == 0: is_header_row = is_header_row and True else: is_header_row = False else: is_header_row = False row_idx += 1 if is_header_row: break header_names: Tuple[str, ...] = tuple( map( lambda cell: cell.value.replace('.', '').replace('/', '_').replace( ' ', '_').lower(), sheet.row(row_idx))) return header_names