Example #1
0
    def __call__(self, *args, **kwargs) -> Iterator[ExportContext]:
        self.logger.info("Exportation data")

        fake_data = [[getattr(fake, col)() for col in self.columns]
                     for _ in range(self.rows)]

        for data in chunker(fake_data, (int(self.rows / 5) or 1)):
            yield ExportContext(
                columns=self.columns,
                data=data,
                data_orient=DataOrient.values,
            )
Example #2
0
    def __call__(self, *args, **kwargs) -> Iterator[ExportContext]:
        self.logger.info("Exportation data")

        self.export: "PostgresExportPolicy" = self.model_templating(
            *args, model=self.notebook.export)

        db = peewee.PostgresqlDatabase(
            database=self.export.database,
            user=self.export.user,
            password=self.export.password,
            host=self.export.host,
            port=self.export.port,
        )
        db.connect()

        for sql in self.export.sql_before:
            db.execute_sql(sql)

        if self.export.sql is None:
            query = "SELECT {columns}\nFROM {table}{where}{order_by}".format(
                columns=",".join(self.export.columns),
                table=self.export.table,
                where=self.export.where,
                order_by=self.export.order_by,
            )
        else:
            query = self.export.sql
        cursor = db.execute_sql(query)
        d = list(cursor.fetchall())
        for chunk in chunker(
                d,
                size=self.export.chunk_size,
        ):
            yield ExportContext(
                columns=self.export.columns,
                data=chunk,
                data_orient=DataOrient.values,
            )

        for sql in self.export.sql_after:
            db.execute_sql(sql)
Example #3
0
    def __call__(self, *args, **kwargs) -> Iterator[ExportContext]:
        self.logger.info("Exportation data")

        params = self.collect_params(*args, **self.export.dict())

        with open(
                params["file_path"],
                mode="r",
                newline=self.export.newline,
                encoding=self.export.encoding,
        ) as file:
            if self.export.with_columns is None:
                data_orient = DataOrient.values
                row_iterator = csv.reader(file, delimiter=self.export.sep)
            else:
                data_orient = DataOrient.dict
                row_iterator = csv.DictReader(file,
                                              fieldnames=self.export.columns,
                                              delimiter=self.export.sep)

            # Skip begin lines.
            for _ in range(self.export.skip_begin_lines):
                try:
                    next(row_iterator)
                except StopIteration:
                    ...

            # Skip columns.
            if self.export.with_columns:
                next(row_iterator)

            for chunk in chunker(
                    row_iterator,
                    size=self.export.chunk_size,
            ):
                yield ExportContext(
                    export_kwargs={},
                    columns=self.export.columns,
                    data=chunk,
                    data_orient=data_orient,
                )
Example #4
0
    def __call__(self, *args, **kwargs) -> Iterator[ExportContext]:
        self.logger.info("Exportation data")

        self.export: "SQLiteExportPolicy" = self.model_templating(
            *args, model=self.config.export)

        db = peewee.SqliteDatabase(self.export.db_path)
        db.connect()

        for sql in self.export.sql_before:
            db.execute_sql(sql)

        if self.export.sql is None:
            query = "SELECT {columns}\nFROM {table}{where}{order_by}".format(
                columns=",".join(self.export.columns),
                table=self.export.table,
                where=self.export.where,
                order_by=self.export.order_by,
            )
        else:
            query = self.export.sql
        cursor = db.execute_sql(query)

        for chunk in chunker(
                cursor.fetchall(),
                size=self.export.chunk_size,
        ):
            yield ExportContext(
                export_kwargs={},
                columns=self.export.columns,
                data=chunk,
                data_orient=DataOrient.values,
            )

        for sql in self.export.sql_after:
            db.execute_sql(sql)
Example #5
0
    def __call__(self, start_period: dt.datetime, end_period: dt.datetime,
                 **kwargs) -> Iterator[Union[ExportContext, SleepIteration]]:
        import criteo_marketing_transition as cm
        from criteo_marketing_transition.rest import ApiException
        from flowmaster.operators.etl import DataOrient

        self.logger.info("Exportation data")

        columns = self.export.params.dimensions + self.export.params.metrics
        stats_query_message = cm.StatisticsReportQueryMessage(
            dimensions=self.export.params.dimensions,
            metrics=self.export.params.metrics,
            start_date=start_period.date().isoformat(),
            end_date=end_period.date().isoformat(),
            currency=self.export.params.currency,
            format="json",
        )

        iter_num = 0
        api_error_retries = 0
        while True:
            self.logger.info("Iter export data")
            iter_num += 1
            try:
                (
                    response_content,
                    http_code,
                    response_headers,
                ) = self.analytics_api.get_adset_report_with_http_info(
                    statistics_report_query_message=stats_query_message,
                    async_req=True,
                    _preload_content=False,
                ).get()

            except ApiException as exc:
                if (exc.status == 401 or exc.body.get("error")
                        == "credentials_no_longer_supported"):
                    raise AuthError(exc)

                if exc.status == 403:
                    raise ForbiddenError(exc)

                if exc.status == 429:
                    if api_error_retries:
                        api_error_retries -= 1
                        # https://developers.criteo.com/marketing-solutions/docs/requesting-a-report
                        yield SleepIteration(sleep=60)
                        continue

                if exc.status in (500, 503):
                    # https://developers.criteo.com/marketing-solutions/docs/how-to-handle-api-errors
                    if api_error_retries:
                        api_error_retries -= 1
                        yield SleepIteration(sleep=iter_num * 20)
                        continue

                raise
            else:
                if http_code == 200:
                    if response_content:
                        response_content: HTTPResponse
                        content: str = response_content.read().decode(
                            "utf-8-sig")
                        data = orjson.loads(content)

                        if self.export.chunk_size:
                            for chunk in chunker(
                                    data["Rows"],
                                    size=self.export.chunk_size,
                            ):
                                yield ExportContext(
                                    columns=columns,
                                    data=chunk,
                                    data_orient=DataOrient.dict,
                                    response_kwargs={
                                        "headers": response_headers
                                    },
                                )
                        else:
                            yield ExportContext(
                                columns=columns,
                                data=data["Rows"],
                                data_orient=DataOrient.dict,
                                response_kwargs={"headers": response_headers},
                            )
                    else:
                        self.logger.warning("Didn't receive data")

                    break
                else:
                    if api_error_retries:
                        api_error_retries -= 1
                        yield SleepIteration(sleep=10)

                    raise Exception(
                        f"CriteoError: code={http_code}, headers={response_headers}\n"
                        f"response_content={response_content}")