def main(): """ Runs the entire pipeline. :return: """ # faulthandler.enable() # can print a minimal threaddump in case of external termination config = get_config() process_scrc(config)
def query_supreme_court(self, engine, lang): origin_chamber = "lower_court::json#>>'{chamber}' AS origin_chamber" origin_date = "lower_court::json#>>'{date}' AS origin_date" origin_file_number = "lower_court::json#>>'{file_number}' AS origin_file_number" try: supreme_court_df = next( self.select( engine, lang, columns= f"{origin_chamber}, {origin_date}, {origin_file_number}", where="court = 'CH_BGer'", order_by="origin_date", chunksize=self.get_chunksize())) except StopIteration: raise ValueError("No supreme court rulings found") supreme_court_df = supreme_court_df.dropna( subset=['origin_date', 'origin_chamber']) origin_chambers = list(supreme_court_df.origin_chamber.unique()) self.logger.info( f"Found supreme court rulings with references to lower court rulings " f"from chambers {origin_chambers}") return origin_chambers, supreme_court_df if __name__ == '__main__': config = get_config() criticality_dataset_creator = CriticalityDatasetCreator(config) criticality_dataset_creator.create_dataset()