def _get_sheet_data(sh: xlrd.sheet.Sheet, datemode: int) -> pd.DataFrame: """Process a Microsoft Excel sheet, returning a Pandas DataFrame Args: sh: the sheet to be processed datemode: integer to pass as argument to _get_row_data() Returns: pd.DataFrame: all data in the given sheet with normalized names and types """ maturity = sh.cell_value(0, 1) if isinstance(maturity, float): maturity = datetime.datetime(*xlrd.xldate_as_tuple(maturity, datemode)) else: maturity = datetime.datetime.strptime(maturity, "%d/%m/%Y") bond, series = sh.name.rsplit(" ", maxsplit=1) bond = BONDS["aliases"][bond.replace("-", "").lower()] # Fix bonds names header = tuple(c.value for c in sh.row(1) if c.value != "") rows = (r for r in itertools.islice(sh.get_rows(), 2, None) if r[1].ctype != 0 and r[1].value != "") data = (_get_row_data(row, datemode) for row in rows) df = pd.DataFrame.from_records(data, columns=header) df = df.assign( MaturityDate=maturity, BondCode=sh.name, BondName=bond, BondSeries=series, ) return df
def build_choices(self, sheet: xlrd.sheet.Sheet, datemode: int) -> None: """Parse the tab of ODK choices. This function modifies the instance attributes `header` and `choices` if there is something stored in the tab of ODK chocies. Args: sheet: The xlrd sheet object for this sheet datemode: The xlrd datemode for the workbook """ _choices_dict = defaultdict(list) if sheet is not None: try: self.header = self.get_header(sheet, datemode) for i, row in enumerate(sheet.get_rows()): if i == 0: continue row_values = [ self.cell_to_value(cell, datemode) for cell in row ] row_dict = {k: v for k, v in zip(self.header, row_values)} row_list_name = row_dict['list_name'] row_name = row_dict['name'] if str(row_list_name) and str(row_name): choice_row = XlsFormRow(i, row_name, self.header, row_values, row_dict) _choices_dict[row_list_name].append(choice_row) except IndexError: # No header row found. Then no choices. pass for name, choices in _choices_dict.items(): choice_list = ChoiceList(name, choices, sheet.name) self.choices[name] = choice_list
def _init_data(self, sheet: xlrd.sheet.Sheet): flg_first_row = True for row in sheet.get_rows(): # skip the first row if flg_first_row: flg_first_row = False continue new_row = {} for i in range(len(self.fields)): # to prevent bug when there is an empty cell if i < len(row): if row[i].ctype == 3: c = Cell(datetime.datetime(*xlrd.xldate_as_tuple(row[i].value, sheet.book.datemode))) c.style.num_format = 'yyyy/mm/dd' new_row[self.fields[i]] = c else: if isinstance(row[i].value, str): if row[i].value.isnumeric(): print('Warning: Found a number stored in string format, converting...') new_row[self.fields[i]] = Cell(float(row[i].value)) new_row[self.fields[i]] = Cell(row[i].value) else: new_row[self.fields[i]] = '' self.data_rows.append(new_row)
def array2d(sheet: xlrd.sheet.Sheet): return [[cell.value for cell in row] for row in sheet.get_rows()]