예제 #1
0
def test_encoder():
    e = EtlEncoder()
    assert e.encode(
        EtlResult(1, 1, 1)) == '{"__type__": "__EtlResult__", ' \
                               '"data": {"created": 1, "updated": 1, "unchanged": 1, "deleted": 0, "status": "SUCCESS", "error": null, "processed": 0, "total_records": 0}}'
예제 #2
0
def test_dumps():
    assert etl_dumps(
        EtlResult(1, 1, 1)
    ) == '{"__type__": "__EtlResult__", "data": {"created": 1, "updated": 1, "unchanged": 1, "deleted": 0, "status": "SUCCESS", "error": null, "processed": 0, "total_records": 0}}'
예제 #3
0
    def load(self, *, verbosity=0, stdout=None,
             ignore_dependencies=False, max_records=None,
             only_delta=True, run_type=RUN_UNKNOWN, **kwargs):
        self.on_start(run_type)
        self.results = EtlResult()
        logger.debug(f"Running loader {self}")
        lock = self.lock()
        truncate = self.config.truncate
        try:
            if lock:  # pragma: no branch
                if not ignore_dependencies:
                    for requirement in self.config.depends:
                        if requirement.loader.is_running():
                            raise RequiredIsRunning(requirement)
                        if requirement.loader.need_refresh(self):
                            raise RequiredIsMissing(requirement)
                        else:
                            logger.info(f"Loader {requirement} is uptodate")
                self.mapping = {}
                mart_fields = self.model._meta.concrete_fields
                for field in mart_fields:
                    if field.name not in ['source_id', 'id', 'last_modify_date']:
                        self.mapping[field.name] = field.name
                if self.config.mapping:  # pragma: no branch
                    self.mapping.update(self.config.mapping)
                self.update_context(today=timezone.now(),
                                    max_records=max_records,
                                    verbosity=verbosity,
                                    records=0,
                                    only_delta=only_delta,
                                    is_empty=not self.model.objects.exists(),
                                    stdout=stdout)
                sid = transaction.savepoint()
                try:
                    self.results.context = self.context
                    self.fields_to_compare = [f for f in self.mapping.keys() if f not in ["seen"]]
                    if truncate:
                        self.model.objects.truncate()
                    qs = self.filter_queryset(self.get_queryset())
                    for record in qs.all():
                        filters = self.config.key(self, record)
                        values = self.get_values(record)
                        op = self.process_record(filters, values)
                        self.increment_counter(op)

                    if stdout and verbosity > 0:
                        stdout.write("\n")
                    # deleted = self.model.objects.exclude(seen=today).delete()[0]
                    # self.results.deleted = deleted
                except MaxRecordsException:
                    pass
                except BaseException:
                    transaction.savepoint_rollback(sid)
                    raise
            else:
                logger.info(f"Unable to get lock for {self}")

        except (RequiredIsMissing, RequiredIsRunning) as e:
            self.on_end(error=e, retry=True)
            raise
        except BaseException as e:
            self.on_end(e)
            process_exception(e)
            raise
        else:
            self.on_end(None)
        finally:
            if lock:  # pragma: no branch
                try:
                    lock.release()
                except LockError as e:  # pragma: no cover
                    logger.warning(e)

        return self.results
예제 #4
0
def test_load_error(loader1):
    with mock.patch('%s.results' % fqn(loader1),
                    EtlResult(error="error"),
                    create=True):
        loader1.on_end()
예제 #5
0
def etl_decoder(obj):
    if '__type__' in obj:
        if obj['__type__'] == '__EtlResult__':  # pragma: no cover
            from etools_datamart.apps.etl.loader import EtlResult
            return EtlResult(**obj['data'])
    return obj
예제 #6
0
    def load(self,
             *,
             verbosity=0,
             stdout=None,
             ignore_dependencies=False,
             max_records=None,
             only_delta=True,
             run_type=RUN_UNKNOWN,
             **kwargs):
        self.on_start(run_type)
        self.results = EtlResult()
        logger.debug(f"Running loader {self}")
        lock = self.lock()
        truncate = self.config.truncate
        try:
            if lock:  # pragma: no branch
                if not ignore_dependencies:
                    for requirement in self.config.depends:
                        if requirement.loader.is_running():
                            raise RequiredIsRunning(requirement)
                        if requirement.loader.need_refresh(self):
                            # if not force_requirements:
                            raise RequiredIsMissing(requirement)
                            # else:
                            # logger.info(f"Load required dataset {requirement}")
                            # requirement.loader.task.apply_async(
                            #     kwargs={"force_requirements": force_requirements,
                            #             "run_type": RUN_AS_REQUIREMENT}
                            # )
                            # raise RequiredIsQueued(requirement)
                            # logger.info(f"Load required dataset {requirement}")
                            # requirement.loader.load(stdout=stdout,
                            #                         force_requirements=force_requirements,
                            #                         run_type=RUN_AS_REQUIREMENT)
                        else:
                            logger.info(f"Loader {requirement} is uptodate")
                self.mapping = {}
                mart_fields = self.model._meta.concrete_fields
                for field in mart_fields:
                    if field.name not in [
                            'country_name', 'schema_name', 'area_code',
                            'source_id', 'id', 'last_modify_date'
                    ]:
                        self.mapping[field.name] = field.name
                if self.config.mapping:  # pragma: no branch
                    self.mapping.update(self.config.mapping)
                self.update_context(today=timezone.now(),
                                    max_records=max_records,
                                    verbosity=verbosity,
                                    records=0,
                                    only_delta=only_delta,
                                    is_empty=not self.model.objects.exists(),
                                    stdout=stdout)
                sid = transaction.savepoint()
                try:
                    self.results.context = self.context
                    self.fields_to_compare = [
                        f for f in self.mapping.keys() if f not in ["seen"]
                    ]
                    if truncate:
                        self.model.objects.truncate()
                    self.process_country()
                    if self.config.sync_deleted_records(self):
                        self.remove_deleted()
                    if stdout and verbosity > 0:
                        stdout.write("\n")
                    # deleted = self.model.objects.exclude(seen=today).delete()[0]
                    # self.results.deleted = deleted
                except MaxRecordsException:
                    pass
                except Exception:
                    transaction.savepoint_rollback(sid)
                    raise
            else:
                logger.info(f"Unable to get lock for {self}")

        except (RequiredIsMissing, RequiredIsRunning) as e:
            self.on_end(error=e, retry=True)
            raise
        except BaseException as e:
            self.on_end(e)
            process_exception(e)
            raise
        else:
            self.on_end(None)
        finally:
            if lock:  # pragma: no branch
                try:
                    lock.release()
                except LockError as e:  # pragma: no cover
                    logger.warning(e)

        return self.results
예제 #7
0
    def load(self,
             *,
             verbosity=0,
             stdout=None,
             ignore_dependencies=False,
             max_records=None,
             only_delta=True,
             run_type=RUN_UNKNOWN,
             api_token=None,
             **kwargs):
        from .models import Organization, Source
        sources = Source.objects.filter(is_active=True)
        self.results = EtlResult()
        with push_scope() as scope:
            scope.set_tag("loader", "rapidpro.%s" % self.__class__.__name__)
            try:
                if api_token:
                    Source.objects.get_or_create(api_token=api_token,
                                                 defaults={'name': api_token})
                    sources = sources.filter(api_token=api_token)

                self.on_start(run_type)
                for source in sources:
                    if verbosity > 0:
                        stdout.write("Source %s" % source)
                    client = TembaClient(config.RAPIDPRO_ADDRESS,
                                         source.api_token)
                    oo = client.get_org()
                    if verbosity > 0:
                        stdout.write("  fetching organization info")

                    org, __ = Organization.objects.get_or_create(
                        source=source,
                        defaults={
                            'name': oo.name,
                            'country': oo.country,
                            'primary_language': oo.primary_language,
                            'timezone': oo.timezone,
                            'date_style': oo.date_style,
                            'languages': oo.languages,
                            'anon': oo.anon
                        })
                    if verbosity > 0:
                        stdout.write("  found organization %s" % oo.name)

                    func = "get_%s" % self.config.source
                    getter = getattr(client, func)

                    args_spec = inspect.getfullargspec(getter)
                    if 'after' in args_spec.args and self.etl_task.last_success:
                        filters = dict(after=self.etl_task.last_success)
                    else:
                        filters = {}

                    data = getter(**filters)
                    self.update_context(
                        today=timezone.now(),
                        max_records=max_records,
                        verbosity=verbosity,
                        records=0,
                        only_delta=only_delta,
                        is_empty=not self.model.objects.exists(),
                        stdout=stdout,
                        organization=source.organization)
                    if verbosity > 0:
                        stdout.write("  fetching data")
                    for page in data.iterfetches():
                        for entry in page:
                            self.source_record = entry
                            filters = self.config.key(self, entry)
                            values = self.get_values(entry)

                            # values['organization'] = source.organization
                            # filters = {'uuid': values['uuid']}
                            op = self.process_record(filters, values)
                            self.increment_counter(op)

            except MaxRecordsException:
                pass
            except Exception as e:
                self.on_end(error=e)
                raise
            finally:
                self.on_end()
        return self.results