def test_encoder(): e = EtlEncoder() assert e.encode( EtlResult(1, 1, 1)) == '{"__type__": "__EtlResult__", ' \ '"data": {"created": 1, "updated": 1, "unchanged": 1, "deleted": 0, "status": "SUCCESS", "error": null, "processed": 0, "total_records": 0}}'
def test_dumps(): assert etl_dumps( EtlResult(1, 1, 1) ) == '{"__type__": "__EtlResult__", "data": {"created": 1, "updated": 1, "unchanged": 1, "deleted": 0, "status": "SUCCESS", "error": null, "processed": 0, "total_records": 0}}'
def load(self, *, verbosity=0, stdout=None, ignore_dependencies=False, max_records=None, only_delta=True, run_type=RUN_UNKNOWN, **kwargs): self.on_start(run_type) self.results = EtlResult() logger.debug(f"Running loader {self}") lock = self.lock() truncate = self.config.truncate try: if lock: # pragma: no branch if not ignore_dependencies: for requirement in self.config.depends: if requirement.loader.is_running(): raise RequiredIsRunning(requirement) if requirement.loader.need_refresh(self): raise RequiredIsMissing(requirement) else: logger.info(f"Loader {requirement} is uptodate") self.mapping = {} mart_fields = self.model._meta.concrete_fields for field in mart_fields: if field.name not in ['source_id', 'id', 'last_modify_date']: self.mapping[field.name] = field.name if self.config.mapping: # pragma: no branch self.mapping.update(self.config.mapping) self.update_context(today=timezone.now(), max_records=max_records, verbosity=verbosity, records=0, only_delta=only_delta, is_empty=not self.model.objects.exists(), stdout=stdout) sid = transaction.savepoint() try: self.results.context = self.context self.fields_to_compare = [f for f in self.mapping.keys() if f not in ["seen"]] if truncate: self.model.objects.truncate() qs = self.filter_queryset(self.get_queryset()) for record in qs.all(): filters = self.config.key(self, record) values = self.get_values(record) op = self.process_record(filters, values) self.increment_counter(op) if stdout and verbosity > 0: stdout.write("\n") # deleted = self.model.objects.exclude(seen=today).delete()[0] # self.results.deleted = deleted except MaxRecordsException: pass except BaseException: transaction.savepoint_rollback(sid) raise else: logger.info(f"Unable to get lock for {self}") except (RequiredIsMissing, RequiredIsRunning) as e: self.on_end(error=e, retry=True) raise except BaseException as e: self.on_end(e) process_exception(e) raise else: self.on_end(None) finally: if lock: # pragma: no branch try: lock.release() except LockError as e: # pragma: no cover logger.warning(e) return self.results
def test_load_error(loader1): with mock.patch('%s.results' % fqn(loader1), EtlResult(error="error"), create=True): loader1.on_end()
def etl_decoder(obj): if '__type__' in obj: if obj['__type__'] == '__EtlResult__': # pragma: no cover from etools_datamart.apps.etl.loader import EtlResult return EtlResult(**obj['data']) return obj
def load(self, *, verbosity=0, stdout=None, ignore_dependencies=False, max_records=None, only_delta=True, run_type=RUN_UNKNOWN, **kwargs): self.on_start(run_type) self.results = EtlResult() logger.debug(f"Running loader {self}") lock = self.lock() truncate = self.config.truncate try: if lock: # pragma: no branch if not ignore_dependencies: for requirement in self.config.depends: if requirement.loader.is_running(): raise RequiredIsRunning(requirement) if requirement.loader.need_refresh(self): # if not force_requirements: raise RequiredIsMissing(requirement) # else: # logger.info(f"Load required dataset {requirement}") # requirement.loader.task.apply_async( # kwargs={"force_requirements": force_requirements, # "run_type": RUN_AS_REQUIREMENT} # ) # raise RequiredIsQueued(requirement) # logger.info(f"Load required dataset {requirement}") # requirement.loader.load(stdout=stdout, # force_requirements=force_requirements, # run_type=RUN_AS_REQUIREMENT) else: logger.info(f"Loader {requirement} is uptodate") self.mapping = {} mart_fields = self.model._meta.concrete_fields for field in mart_fields: if field.name not in [ 'country_name', 'schema_name', 'area_code', 'source_id', 'id', 'last_modify_date' ]: self.mapping[field.name] = field.name if self.config.mapping: # pragma: no branch self.mapping.update(self.config.mapping) self.update_context(today=timezone.now(), max_records=max_records, verbosity=verbosity, records=0, only_delta=only_delta, is_empty=not self.model.objects.exists(), stdout=stdout) sid = transaction.savepoint() try: self.results.context = self.context self.fields_to_compare = [ f for f in self.mapping.keys() if f not in ["seen"] ] if truncate: self.model.objects.truncate() self.process_country() if self.config.sync_deleted_records(self): self.remove_deleted() if stdout and verbosity > 0: stdout.write("\n") # deleted = self.model.objects.exclude(seen=today).delete()[0] # self.results.deleted = deleted except MaxRecordsException: pass except Exception: transaction.savepoint_rollback(sid) raise else: logger.info(f"Unable to get lock for {self}") except (RequiredIsMissing, RequiredIsRunning) as e: self.on_end(error=e, retry=True) raise except BaseException as e: self.on_end(e) process_exception(e) raise else: self.on_end(None) finally: if lock: # pragma: no branch try: lock.release() except LockError as e: # pragma: no cover logger.warning(e) return self.results
def load(self, *, verbosity=0, stdout=None, ignore_dependencies=False, max_records=None, only_delta=True, run_type=RUN_UNKNOWN, api_token=None, **kwargs): from .models import Organization, Source sources = Source.objects.filter(is_active=True) self.results = EtlResult() with push_scope() as scope: scope.set_tag("loader", "rapidpro.%s" % self.__class__.__name__) try: if api_token: Source.objects.get_or_create(api_token=api_token, defaults={'name': api_token}) sources = sources.filter(api_token=api_token) self.on_start(run_type) for source in sources: if verbosity > 0: stdout.write("Source %s" % source) client = TembaClient(config.RAPIDPRO_ADDRESS, source.api_token) oo = client.get_org() if verbosity > 0: stdout.write(" fetching organization info") org, __ = Organization.objects.get_or_create( source=source, defaults={ 'name': oo.name, 'country': oo.country, 'primary_language': oo.primary_language, 'timezone': oo.timezone, 'date_style': oo.date_style, 'languages': oo.languages, 'anon': oo.anon }) if verbosity > 0: stdout.write(" found organization %s" % oo.name) func = "get_%s" % self.config.source getter = getattr(client, func) args_spec = inspect.getfullargspec(getter) if 'after' in args_spec.args and self.etl_task.last_success: filters = dict(after=self.etl_task.last_success) else: filters = {} data = getter(**filters) self.update_context( today=timezone.now(), max_records=max_records, verbosity=verbosity, records=0, only_delta=only_delta, is_empty=not self.model.objects.exists(), stdout=stdout, organization=source.organization) if verbosity > 0: stdout.write(" fetching data") for page in data.iterfetches(): for entry in page: self.source_record = entry filters = self.config.key(self, entry) values = self.get_values(entry) # values['organization'] = source.organization # filters = {'uuid': values['uuid']} op = self.process_record(filters, values) self.increment_counter(op) except MaxRecordsException: pass except Exception as e: self.on_end(error=e) raise finally: self.on_end() return self.results