def assert_sorted(self, entity_types: List[Type[CoreEntity]]): """Throws if the input |entity_types| list is not in descending order based on class hierarchy. """ for type_1, type_2 in pairwise(entity_types): if not self.is_higher_ranked(type_1, type_2): raise ValueError( f'Unexpected ordering, found {type_1.__name__} before ' f'{type_2.__name__}')
def _split_df(df: pd.DataFrame, indices: List[int]) -> List[pd.DataFrame]: """ Split the fully parsed DataFrame into a new DataFrame based on |indices|. """ end = None dfs = [] for start, end in aggregate_ingest_utils.pairwise(indices): dfs.append(df[start:end].reset_index(drop=True)) if end: dfs.append(df[end:].reset_index(drop=True)) return dfs
def _split_page(df: pd.DataFrame) -> Generator[pd.DataFrame, None, None]: """Create a new DataFrame for each facility listed on a page.""" df = df.dropna(how='all') # bottom_df is parsed offset by one column and needs to be shifted last_column = df[df.columns[-1]] top_df = df[last_column.notnull()] bottom_df = df[last_column.isnull()].shift(1, axis='columns') # Recombine top_df and bottom_df since it's not the correct table division aligned_df = pd.concat([top_df, bottom_df], ignore_index=True) # New table starts when a new facility is listed table_starts = np.where(aligned_df['FACILITY'].notnull())[0] table_starts_and_end = numpy.append(table_starts, len(aligned_df)) for start, end in aggregate_ingest_utils.pairwise(table_starts_and_end): yield aligned_df[start:end]