Beispiel #1
0
    def __init__(self, opts=None):
        self.verbose = App.get_option('verbose', default=False, options=opts)
        self.show_details = App.get_option(
            'show_details', default=False, options=opts) and self.verbose
        self.save_output_files = App.get_option('save_details',
                                                default=False,
                                                options=opts)
        self.show_all_columns = App.get_option('show_all_info',
                                               default=False,
                                               options=opts)
        self.silent = App.get_option('silent', default=False, options=opts)
        self.to_folder = App.get_option('to_folder',
                                        default=None,
                                        options=opts)
        self.stop_on_error = App.get_option('stop_on_error',
                                            default=False,
                                            options=opts)
        self.plot = App.get_option('plot', default=False, options=opts)
        self.skip_views = App.get_option('skip_views',
                                         default=False,
                                         options=opts)
        self.metadata_file = App.get_option('metadata_file',
                                            default=None,
                                            options=opts)
        self.part = App.get_option('part', default=False, options=opts)
        self.ignore_index = App.get_option('ignore_index',
                                           default=False,
                                           options=opts)
        self.ignore_metadata = App.get_option('ignore_metadata',
                                              default=False,
                                              options=opts)

        if self.to_folder is None: self.to_folder = ''

        self.last_zip_rows = self.last_gps_rows = None

        type_of_detectors = TypeDetector.data_detectors()[0]
        if type_of_detectors != 'Dynamic':
            App.warn(
                'Type Detectors Not Loaded Dynamically from File. ({0} Detectors) '
                .format(type_of_detectors))
Beispiel #2
0
    def profile(self, file_name, skip_rows=0, n_rows=None):
        # print '-----------------> file_name:', file_name
        file_rows = None
        metadata = None
        self.db_name = None
        try:
            metadata, file_rows = ProfilerUtils.init_profiler(
                file_name,
                part=self.part,
                metadata_file=self.metadata_file,
                ignore_metadata=self.ignore_metadata)
            self.db_name = metadata['db_name']
            # This is (a temporary fix) for any uncatched error that can ocour. TODO: Improve this.
            self.last_sumary = self.create_error_sumary(
                'Something went wrong!', file_rows, metadata=metadata)

            self.printv('File rows: {0:n}'.format(file_rows - 1))

            if file_rows == 0:
                self.last_sumary = self.create_error_sumary(
                    'Error: Empty file.', None)
                return

            self.printv('Getting metadata.')
            # metadata = SocrataUtils.metadata_of(self.db_name)

            if self.check_if_skip_dataset(metadata):
                App.warn(
                    'Skipping database analisys: Not primary Socrata Database')
                self.last_sumary = self.create_error_sumary(
                    Profiler.MSG_SKIP_VIEW, file_rows, metadata)

            else:
                self.printv("Loading file: " + file_name)
                database = PandasUtils.load_database(file_name,
                                                     skiprows=skip_rows,
                                                     nrows=n_rows)

                self.do_profile(database, file_name, skip_rows, n_rows,
                                metadata)

        except (KeyboardInterrupt, SystemError) as e:
            ex_type, ex, tb = sys.exc_info()
            error_msg = '\n'.join(traceback.format_tb(tb))
            self.last_sumary = self.create_error_sumary(
                'Interrupted by Keyboard', file_rows, metadata=metadata)
            # logging.exception(e)
            App.error('Interrupted by Keyboard \n' + error_msg)
            raise

        except Exception as e:
            ex_type, ex, tb = sys.exc_info()
            error_msg = '\n'.join(traceback.format_tb(tb))
            App.error(error_msg)
            self.last_sumary = self.create_error_sumary('Error: ' + error_msg,
                                                        file_rows,
                                                        metadata=metadata)
            if self.stop_on_error: raise

        except:
            self.last_sumary = self.create_error_sumary(
                'Unknown Error: {0}'.format(sys.exc_info()),
                file_rows,
                metadata=metadata)
            # logging.exception(sys.exc_info())
            App.error(sys.exc_info())
            if self.stop_on_error: raise