def __init__(self, opts=None): self.verbose = App.get_option('verbose', default=False, options=opts) self.show_details = App.get_option( 'show_details', default=False, options=opts) and self.verbose self.save_output_files = App.get_option('save_details', default=False, options=opts) self.show_all_columns = App.get_option('show_all_info', default=False, options=opts) self.silent = App.get_option('silent', default=False, options=opts) self.to_folder = App.get_option('to_folder', default=None, options=opts) self.stop_on_error = App.get_option('stop_on_error', default=False, options=opts) self.plot = App.get_option('plot', default=False, options=opts) self.skip_views = App.get_option('skip_views', default=False, options=opts) self.metadata_file = App.get_option('metadata_file', default=None, options=opts) self.part = App.get_option('part', default=False, options=opts) self.ignore_index = App.get_option('ignore_index', default=False, options=opts) self.ignore_metadata = App.get_option('ignore_metadata', default=False, options=opts) if self.to_folder is None: self.to_folder = '' self.last_zip_rows = self.last_gps_rows = None type_of_detectors = TypeDetector.data_detectors()[0] if type_of_detectors != 'Dynamic': App.warn( 'Type Detectors Not Loaded Dynamically from File. ({0} Detectors) ' .format(type_of_detectors))
def profile(self, file_name, skip_rows=0, n_rows=None): # print '-----------------> file_name:', file_name file_rows = None metadata = None self.db_name = None try: metadata, file_rows = ProfilerUtils.init_profiler( file_name, part=self.part, metadata_file=self.metadata_file, ignore_metadata=self.ignore_metadata) self.db_name = metadata['db_name'] # This is (a temporary fix) for any uncatched error that can ocour. TODO: Improve this. self.last_sumary = self.create_error_sumary( 'Something went wrong!', file_rows, metadata=metadata) self.printv('File rows: {0:n}'.format(file_rows - 1)) if file_rows == 0: self.last_sumary = self.create_error_sumary( 'Error: Empty file.', None) return self.printv('Getting metadata.') # metadata = SocrataUtils.metadata_of(self.db_name) if self.check_if_skip_dataset(metadata): App.warn( 'Skipping database analisys: Not primary Socrata Database') self.last_sumary = self.create_error_sumary( Profiler.MSG_SKIP_VIEW, file_rows, metadata) else: self.printv("Loading file: " + file_name) database = PandasUtils.load_database(file_name, skiprows=skip_rows, nrows=n_rows) self.do_profile(database, file_name, skip_rows, n_rows, metadata) except (KeyboardInterrupt, SystemError) as e: ex_type, ex, tb = sys.exc_info() error_msg = '\n'.join(traceback.format_tb(tb)) self.last_sumary = self.create_error_sumary( 'Interrupted by Keyboard', file_rows, metadata=metadata) # logging.exception(e) App.error('Interrupted by Keyboard \n' + error_msg) raise except Exception as e: ex_type, ex, tb = sys.exc_info() error_msg = '\n'.join(traceback.format_tb(tb)) App.error(error_msg) self.last_sumary = self.create_error_sumary('Error: ' + error_msg, file_rows, metadata=metadata) if self.stop_on_error: raise except: self.last_sumary = self.create_error_sumary( 'Unknown Error: {0}'.format(sys.exc_info()), file_rows, metadata=metadata) # logging.exception(sys.exc_info()) App.error(sys.exc_info()) if self.stop_on_error: raise