Beispiel #1
0
    def read(self):

        if len(self.dimensions) == 1:
            yield JSONStream("results_" + self.report_suite_id,
                             self.read_one_dimension())
        elif len(self.dimensions) > 1:
            yield JSONStream("results_" + self.report_suite_id,
                             self.read_through_graph())
Beispiel #2
0
    def read(self):
        if self.date_are_valid:
            if self.request_type == "performance":
                dict_stat, dict_camp, dict_banner = self.__retrieve_all_data()

                complete_daily_content = self.map_campaign_name_to_daily_stat(dict_stat, dict_camp, dict_banner)
                yield JSONStream("mytarget_performance_", self.split_content_by_date(complete_daily_content))
            if self.request_type == "budget":
                res_dates = self.__get_all_results("get_campaign_dates")
                res_budgets = self.__get_all_results("get_campaign_budgets")

                budget_with_dates = self.map_budget_to_date_range(res_dates, res_budgets)
                yield JSONStream("mytarget_budget_", self.__yield_from_list(budget_with_dates))
    def read(self):
        def result_generator():

            watermark_value = None

            if self._watermark_column:
                watermark_value = self._redis_state_service.get(
                    self._name) or self._watermark_init

            if self._object_type:
                self._query = self.build_object_type_query(
                    self._object_type, self._watermark_column)

            if self._watermark_column:
                self._query = self._query.format(
                    **{self._watermark_column: watermark_value})

            records = self._client.query(self._query)

            for rec in records:
                row = self._clean_record(rec)
                yield row

                if self._watermark_column:
                    self._redis_state_service.set(self._name,
                                                  row[self._watermark_column])

        yield JSONStream(self._name, result_generator())
Beispiel #4
0
    def read(self):
        if self.report_type == "ANALYTICS":
            entity_ids = self.get_active_entity_ids()

            total_jobs = (len(entity_ids) // MAX_ENTITY_IDS_PER_JOB) + 1
            logger.info(f"Processing a total of {total_jobs} jobs")

            data = []
            for chunk_entity_ids in split_list(
                    entity_ids, MAX_ENTITY_IDS_PER_JOB * MAX_CONCURRENT_JOBS):
                job_ids = self.get_job_ids(chunk_entity_ids)
                data += self.get_analytics_report(job_ids)

        elif self.report_type == "REACH":
            data = self.get_reach_report()

        elif self.report_type == "ENTITY":
            if self.entity == "CARD":
                data = self.get_cards_report()
            else:
                data = self.get_campaign_management_report()

        def result_generator():
            for record in data:
                yield self.add_request_or_period_dates(record)

        yield JSONStream("results_" + self.account.id, result_generator())
Beispiel #5
0
    def read(self):
        if self.manager_id:
            self.client_customer_ids = self.get_customer_ids(self.manager_id)

        yield JSONStream(
            "results_" + self.report_name + "_" +
            "_".join(self.client_customer_ids), self.format_and_yield())
    def read(self):
        if not self.advertiser_ids:
            self.advertiser_ids = self.sa360_client.get_all_advertisers_of_agency(
                self.agency_id)

        yield JSONStream("results" + "_".join(self.advertiser_ids),
                         self.result_generator())
Beispiel #7
0
    def read(self):

        for prefix in self._prefix_list:

            objects_sorted_by_time = sorted(
                self.list_objects(bucket=self._bucket, prefix=prefix),
                key=lambda o: self.get_timestamp(o),
            )

            for _object in objects_sorted_by_time:

                _object = self.to_object(_object)

                logger.info(
                    f"Found {self._platform} file {self.get_key(_object)}")

                if not self.is_compatible_object(_object):
                    logger.info(
                        f"Wrong extension: Skipping file {self.get_key(_object)}"
                    )
                    continue

                name = self.get_key(_object).split("/",
                                                   self._dest_key_split)[-1]

                yield JSONStream(name, self._result_generator(_object))
    def read(self):
        sheet = self._gc.open_by_key(self._sheet_key).get_worksheet(
            self._page_number)
        list_of_hashes = sheet.get_all_records()

        def result_generator():
            for record in list_of_hashes:
                yield record

        yield JSONStream("gsheet", result_generator())
 def read(self):
     request_type = self.kwargs.get("request_type")
     if request_type == "sdf_request":
         yield FormatDateStream(
             "sdf",
             self.__get_sdf_objects(),
             keys=["Date"],
             date_format=self.kwargs.get("date_format"),
         )
     elif request_type == "creative_request":
         yield JSONStream("advertiser_creatives", self.__get_creatives())
Beispiel #10
0
    def read(self):

        client = gspread.authorize(self._credentials)
        spreadsheet = client.open_by_url(self._url)

        for _sheet_name in self._sheet_name:

            worksheet = spreadsheet.worksheet(_sheet_name)

            def result_generator():
                for record in worksheet.get_all_records():
                    yield record

            yield JSONStream(worksheet.title, result_generator())
Beispiel #11
0
    def read(self):
        """
        :return: stream that returns Radarly posts one by one
        """
        date_ranges_and_posts_volumes: Dict = self.split_date_range()
        logger.info(
            f"API Compliant Date Ranges and Posts Volumes: {date_ranges_and_posts_volumes}"
        )
        api_compliant_date_ranges = list(date_ranges_and_posts_volumes.keys())

        t0 = time.time()
        ingestion_tracker = []

        for i, date_range in enumerate(api_compliant_date_ranges):

            if self.throttle:
                current_time = time.time() - t0
                ingestion_tracker.append(current_time)
                posts_ingested_over_window = (sum(
                    np.array(ingestion_tracker) > current_time -
                    self.api_window) * self.api_date_period_limit)
                if posts_ingested_over_window > self.throttling_threshold_coefficient * self.api_quarterly_posts_limit:
                    sleep_duration = self.api_window * (
                        self.api_date_period_limit /
                        self.api_quarterly_posts_limit)
                    logger.info(
                        f"Throttling activated: waiting for {sleep_duration} seconds..."
                    )
                    time.sleep(sleep_duration)

            all_publications = self.get_publications_iterator(date_range)
            name = f"""radarly_{date_range[0].strftime("%Y-%m-%d-%H-%M-%S")}_{date_range[1].strftime(
                "%Y-%m-%d-%H-%M-%S")}"""

            def result_generator():
                while True:
                    try:
                        pub = next(all_publications)
                        yield dict(pub)
                    except StopIteration:
                        break
                    except Exception:
                        ex_type, ex, tb = sys.exc_info()
                        logger.warning(
                            f"Failed to ingest post with error: {ex}. Traceback: {traceback.print_tb(tb)}"
                        )

            yield JSONStream(name, result_generator())
    def read(self):
        self._get_report_template_id()
        self._create_report_schedule()
        self._wait_for_download_url()
        data = self._download_report()

        def result_generator():
            for record in data:
                yield {
                    k: format_date(v) if k == "Date" else v
                    for k, v in record.items()
                }

        yield JSONStream("results_" + "_".join(self.advertiser_ids),
                         result_generator())

        self._delete_report_schedule()
Beispiel #13
0
    def _run_query(self):
        logger.info(f"Running MySQL query {self._query}")

        rows = self._engine.execute(self._query)

        logger.info(f"MySQL result set contains {rows.rowcount} rows")

        def result_generator():
            row = rows.fetchone()
            while row:
                yield dict(row.items())

                if self._watermark_column:
                    self._redis_state_service.set(self._name,
                                                  row[self._watermark_column])

                row = rows.fetchone()
            rows.close()

        return JSONStream(self._name, result_generator())
 def read(self):
     yield JSONStream("results" + "_".join(self.profile_ids), self.result_generator())
 def read(self):
     yield JSONStream("results_", self._get_aggregated_report_generator())
Beispiel #16
0
 def read(self):
     yield JSONStream(f"results_{self.report_type}",
                      self.result_generator())
 def read(self):
     yield JSONStream("results_CAMPAIGN_OBJECT_REPORT_",
                      self.result_generator())
Beispiel #18
0
    def read(self):

        yield JSONStream(
            "results_" + self.object_type + "_" + "_".join(self.object_ids),
            self.get_data())
def mock_stream(list_dict, name):
    return JSONStream(name, dict_generator(list_dict))
Beispiel #20
0
 def read(self):
     yield JSONStream("result_view_" + "_".join(self.view_ids),
                      self.result_generator())
 def read(self):
     yield JSONStream("search_console_results", self.result_generator())