def validate_model(self): if self.model_valid: return log.info("Validating model") try: self.model = Model().deserialize(self.model) self.model_valid = True except Invalid as e: raise ModelValidationError(e)
class BaseImporter(object): def __init__(self, data, model, source_file="<stream>"): self.data = data self.model = model self.model_valid = None self.source_file = source_file self.errors = [] self.on_error = lambda e: log.warn(e) self._generate_fields() def run(self, dry_run=False, max_errors=None, max_lines=None, raise_errors=False, build_indices=True): self.dry_run = dry_run self.max_errors = max_errors self.do_build_indices = build_indices self.raise_errors = raise_errors self.validate_model() self.describe_dimensions() self.validator = make_validator(self.fields) self.line_number = 0 for line_number, line in enumerate(self.lines, start=1): if max_lines and line_number > max_lines: break self.line_number = line_number self.process_line(line) if self.line_number == 0: self.add_error("Didn't read any lines of data") self.generate_views() self.build_indices() if self.errors: log.error("Finished import with %d errors:") for err in self.errors: log.error(" - %s", err) else: log.info("Finished import with no errors!") @property def lines(self): raise NotImplementedError("lines not implemented in BaseImporter") @property def mapping(self): return self.model['mapping'] @property def views(self): return self.model.get('views', []) def validate_model(self): if self.model_valid: return log.info("Validating model") try: self.model = Model().deserialize(self.model) self.model_valid = True except Invalid as e: raise ModelValidationError(e) def describe_dimensions(self): if self.dry_run: return False log.info("Describing dimensions") for dimension, mapping in self.mapping.iteritems(): self.loader.create_dimension( dimension, mapping.get("label"), type=mapping.get('type'), datatype=mapping.get('datatype'), fields=mapping.get('fields', []), facet=mapping.get('facet'), description=mapping.get("description") ) def generate_views(self): if self.dry_run: return False log.info("Generating aggregates and views") self.loader.flush_aggregates() for view in self.views: entity = ENTITY_TYPES.get(view.get('entity')) self.loader.create_view( entity, view.get('filters', {}), name=view.get('name'), label=view.get('label'), dimension=view.get('dimension'), breakdown=view.get('breakdown'), view_filters=view.get('view_filters', {}) ) self.loader.compute_aggregates() def build_indices(self): if self.dry_run or not self.do_build_indices: return False log.info("Building search indices") solr.drop_index(self.model['dataset']['name']) solr.build_index(self.model['dataset']['name']) @property def loader(self): if not hasattr(self, '_loader'): dataset = self.model.get('dataset').copy() self._loader = Loader( dataset_name=dataset.get('name'), unique_keys=dataset.get('unique_keys', ['_csv_import_fp']), label=dataset.get('label'), description=dataset.pop('description'), currency=dataset.pop('currency'), time_axis=times.GRANULARITY.get(dataset.get( 'temporal_granularity', 'year' )), metadata=dataset ) return self._loader def process_line(self, line): if self.line_number % 1000 == 0: log.info('Imported %s lines' % self.line_number) try: _line = self.validator.deserialize(line) if not self.dry_run: self.import_line(_line) except (Invalid, ImporterError) as e: self.add_error(e) def import_line(self, line): raise NotImplementedError("load_line not implemented in BaseImporter") def add_error(self, exception): err = DataError(exception=exception, line_number=self.line_number, source_file=self.source_file) if self.raise_errors: raise err self.on_error(err) self.errors.append(err) if self.max_errors and len(self.errors) >= self.max_errors: all_errors = "".join(map(lambda x: "\n " + str(x), self.errors)) raise TooManyErrorsError("The following errors occurred:" + all_errors) def _generate_fields(self): def _field(dimension, mapping, column_name, is_end=False): return { 'dimension': dimension, 'field': mapping.get(column_name), 'datatype': mapping.get('datatype'), 'is_end': is_end } fields = [] for dimension, mapping in self.mapping.items(): if mapping.get('type') == 'value': fields.append(_field(dimension, mapping, 'column')) if mapping.get('end_column'): fields.append(_field(dimension, mapping, 'end_column', True)) else: for field in mapping.get('fields', []): fields.append(_field(dimension, field, 'column')) self.fields = fields