def read(self): if len(self.dimensions) == 1: yield JSONStream("results_" + self.report_suite_id, self.read_one_dimension()) elif len(self.dimensions) > 1: yield JSONStream("results_" + self.report_suite_id, self.read_through_graph())
def read(self): if self.date_are_valid: if self.request_type == "performance": dict_stat, dict_camp, dict_banner = self.__retrieve_all_data() complete_daily_content = self.map_campaign_name_to_daily_stat(dict_stat, dict_camp, dict_banner) yield JSONStream("mytarget_performance_", self.split_content_by_date(complete_daily_content)) if self.request_type == "budget": res_dates = self.__get_all_results("get_campaign_dates") res_budgets = self.__get_all_results("get_campaign_budgets") budget_with_dates = self.map_budget_to_date_range(res_dates, res_budgets) yield JSONStream("mytarget_budget_", self.__yield_from_list(budget_with_dates))
def read(self): def result_generator(): watermark_value = None if self._watermark_column: watermark_value = self._redis_state_service.get( self._name) or self._watermark_init if self._object_type: self._query = self.build_object_type_query( self._object_type, self._watermark_column) if self._watermark_column: self._query = self._query.format( **{self._watermark_column: watermark_value}) records = self._client.query(self._query) for rec in records: row = self._clean_record(rec) yield row if self._watermark_column: self._redis_state_service.set(self._name, row[self._watermark_column]) yield JSONStream(self._name, result_generator())
def read(self): if self.report_type == "ANALYTICS": entity_ids = self.get_active_entity_ids() total_jobs = (len(entity_ids) // MAX_ENTITY_IDS_PER_JOB) + 1 logger.info(f"Processing a total of {total_jobs} jobs") data = [] for chunk_entity_ids in split_list( entity_ids, MAX_ENTITY_IDS_PER_JOB * MAX_CONCURRENT_JOBS): job_ids = self.get_job_ids(chunk_entity_ids) data += self.get_analytics_report(job_ids) elif self.report_type == "REACH": data = self.get_reach_report() elif self.report_type == "ENTITY": if self.entity == "CARD": data = self.get_cards_report() else: data = self.get_campaign_management_report() def result_generator(): for record in data: yield self.add_request_or_period_dates(record) yield JSONStream("results_" + self.account.id, result_generator())
def read(self): if self.manager_id: self.client_customer_ids = self.get_customer_ids(self.manager_id) yield JSONStream( "results_" + self.report_name + "_" + "_".join(self.client_customer_ids), self.format_and_yield())
def read(self): if not self.advertiser_ids: self.advertiser_ids = self.sa360_client.get_all_advertisers_of_agency( self.agency_id) yield JSONStream("results" + "_".join(self.advertiser_ids), self.result_generator())
def read(self): for prefix in self._prefix_list: objects_sorted_by_time = sorted( self.list_objects(bucket=self._bucket, prefix=prefix), key=lambda o: self.get_timestamp(o), ) for _object in objects_sorted_by_time: _object = self.to_object(_object) logger.info( f"Found {self._platform} file {self.get_key(_object)}") if not self.is_compatible_object(_object): logger.info( f"Wrong extension: Skipping file {self.get_key(_object)}" ) continue name = self.get_key(_object).split("/", self._dest_key_split)[-1] yield JSONStream(name, self._result_generator(_object))
def read(self): sheet = self._gc.open_by_key(self._sheet_key).get_worksheet( self._page_number) list_of_hashes = sheet.get_all_records() def result_generator(): for record in list_of_hashes: yield record yield JSONStream("gsheet", result_generator())
def read(self): request_type = self.kwargs.get("request_type") if request_type == "sdf_request": yield FormatDateStream( "sdf", self.__get_sdf_objects(), keys=["Date"], date_format=self.kwargs.get("date_format"), ) elif request_type == "creative_request": yield JSONStream("advertiser_creatives", self.__get_creatives())
def read(self): client = gspread.authorize(self._credentials) spreadsheet = client.open_by_url(self._url) for _sheet_name in self._sheet_name: worksheet = spreadsheet.worksheet(_sheet_name) def result_generator(): for record in worksheet.get_all_records(): yield record yield JSONStream(worksheet.title, result_generator())
def read(self): """ :return: stream that returns Radarly posts one by one """ date_ranges_and_posts_volumes: Dict = self.split_date_range() logger.info( f"API Compliant Date Ranges and Posts Volumes: {date_ranges_and_posts_volumes}" ) api_compliant_date_ranges = list(date_ranges_and_posts_volumes.keys()) t0 = time.time() ingestion_tracker = [] for i, date_range in enumerate(api_compliant_date_ranges): if self.throttle: current_time = time.time() - t0 ingestion_tracker.append(current_time) posts_ingested_over_window = (sum( np.array(ingestion_tracker) > current_time - self.api_window) * self.api_date_period_limit) if posts_ingested_over_window > self.throttling_threshold_coefficient * self.api_quarterly_posts_limit: sleep_duration = self.api_window * ( self.api_date_period_limit / self.api_quarterly_posts_limit) logger.info( f"Throttling activated: waiting for {sleep_duration} seconds..." ) time.sleep(sleep_duration) all_publications = self.get_publications_iterator(date_range) name = f"""radarly_{date_range[0].strftime("%Y-%m-%d-%H-%M-%S")}_{date_range[1].strftime( "%Y-%m-%d-%H-%M-%S")}""" def result_generator(): while True: try: pub = next(all_publications) yield dict(pub) except StopIteration: break except Exception: ex_type, ex, tb = sys.exc_info() logger.warning( f"Failed to ingest post with error: {ex}. Traceback: {traceback.print_tb(tb)}" ) yield JSONStream(name, result_generator())
def read(self): self._get_report_template_id() self._create_report_schedule() self._wait_for_download_url() data = self._download_report() def result_generator(): for record in data: yield { k: format_date(v) if k == "Date" else v for k, v in record.items() } yield JSONStream("results_" + "_".join(self.advertiser_ids), result_generator()) self._delete_report_schedule()
def _run_query(self): logger.info(f"Running MySQL query {self._query}") rows = self._engine.execute(self._query) logger.info(f"MySQL result set contains {rows.rowcount} rows") def result_generator(): row = rows.fetchone() while row: yield dict(row.items()) if self._watermark_column: self._redis_state_service.set(self._name, row[self._watermark_column]) row = rows.fetchone() rows.close() return JSONStream(self._name, result_generator())
def read(self): yield JSONStream("results" + "_".join(self.profile_ids), self.result_generator())
def read(self): yield JSONStream("results_", self._get_aggregated_report_generator())
def read(self): yield JSONStream(f"results_{self.report_type}", self.result_generator())
def read(self): yield JSONStream("results_CAMPAIGN_OBJECT_REPORT_", self.result_generator())
def read(self): yield JSONStream( "results_" + self.object_type + "_" + "_".join(self.object_ids), self.get_data())
def mock_stream(list_dict, name): return JSONStream(name, dict_generator(list_dict))
def read(self): yield JSONStream("result_view_" + "_".join(self.view_ids), self.result_generator())
def read(self): yield JSONStream("search_console_results", self.result_generator())