def ewah_execute(self, context): time_range = { "since": self.data_from.strftime("%Y-%m-%d"), "until": self.data_until.strftime("%Y-%m-%d"), } FacebookAdsApi.init(**self.credentials) params = { "time_range": time_range, "time_increment": self.time_increment, "level": self.level, "limit": self.pagination_limit, } if self.breakdowns: params.update({"breakdowns": ",".join(self.breakdowns)}) for account_id in self.account_ids: if self.execution_waittime_seconds: self.log.info("Delaying execution by {0} seconds...".format( str(self.execution_waittime_seconds), )) now = datetime_utcnow_with_tz() while datetime_utcnow_with_tz() < (now + timedelta( seconds=self.execution_waittime_seconds)): time.sleep(1) account_object = AdAccount("act_{0}".format(str(account_id))) self.log.info( ("Requesting data for account_id={0} between {1} and {2}." ).format( str(account_id), time_range["since"], time_range["until"], )) async_job = account_object.get_insights_async( fields=self.insight_fields, params=params, ) job_remote_read = async_job.api_get() done_status = [ "Job Completed", "Job Failed", "Job Skipped", ] while not (job_remote_read.get("async_status") in done_status): self.log.info( "Asnyc job completion: {0}% (status: {1})".format( str(job_remote_read.get("async_percent_completion")), str(job_remote_read.get("async_status")), )) time.sleep(self.async_job_read_frequency_seconds) job_remote_read = async_job.api_get() time.sleep(1) assert job_remote_read.get("async_status") == "Job Completed" data = self._clean_response_data( async_job.get_result(params={"limit": self.pagination_limit}, )) self.upload_data(data)
def get_request(account_id, table, params, fields): """account_id: unique id for ad account in format act_<ID> table: The table object found in the models module params: dictionary of parameters for request fields: list of fields for request --> returns requested data from Facebook Marketing API """ my_account = AdAccount(account_id) if table == 'accounts': cursor = my_account.api_get(params=params, fields=fields) return dict(cursor) if table == 'campaigns': cursor = my_account.get_campaigns(params=params, fields=fields) request = [campaign for campaign in cursor] return request if table == 'adsets': request = my_account.get_ad_sets(params=params, fields=fields) return request if table == 'ads_insights': cursor = my_account.get_insights_async(params=params, fields=fields) cursor.api_get() while cursor[AdReportRun.Field.async_status] != "Job Completed": time.sleep(1) cursor.api_get() time.sleep(1) request = cursor.get_result(params={"limit": 1000}) return request if table == 'ads_insights_age_and_gender': cursor = my_account.get_insights_async(params=params, fields=fields) cursor.api_get() while cursor[AdReportRun.Field.async_status] != "Job Completed": time.sleep(1) cursor.api_get() time.sleep(1) request = cursor.get_result(params={"limit": 1000}) return request if table == 'ads_insights_region': cursor = my_account.get_insights_async(params=params, fields=fields) cursor.api_get() while cursor[AdReportRun.Field.async_status] != "Job Completed": time.sleep(1) cursor.api_get() time.sleep(1) request = cursor.get_result(params={"limit": 1000}) return request
def ewah_execute(self, context): if not self.test_if_target_table_exists(): if self.reload_data_from: self.data_from = self.reload_data_from self.data_from = airflow_datetime_adjustments(self.data_from) self.data_until = airflow_datetime_adjustments(self.data_until) self.data_from = self.data_from or context['execution_date'] self.data_until = self.data_until or context['next_execution_date'] time_range = { 'since': self.data_from.strftime('%Y-%m-%d'), 'until': self.data_until.strftime('%Y-%m-%d'), } FacebookAdsApi.init(**self.credentials) params = { 'time_range': time_range, 'time_increment': self.time_increment, 'level': self.level, 'limit': self.pagination_limit, } if self.breakdowns: params.update({'breakdowns': ','.join(self.breakdowns)}) for account_id in self.account_ids: if self.execution_waittime_seconds: self.log.info('Delaying execution by {0} seconds...'.format( str(self.execution_waittime_seconds), )) now = datetime.now() while datetime.now() < \ (now + timedelta(seconds=self.execution_waittime_seconds)): time.sleep(1) account_object = AdAccount('act_{0}'.format(str(account_id))) self.log.info(( 'Requesting data for account_id={0} between {1} and {2}.' ).format( str(account_id), time_range['since'], time_range['until'], )) async_job = account_object.get_insights_async( fields=self.insight_fields, params=params, ) job_remote_read = async_job.api_get() done_status = [ 'Job Completed', 'Job Failed', 'Job Skipped', ] while not (job_remote_read.get('async_status') in done_status): self.log.info('Asnyc job completion: {0}% (status: {1})'.format( str(job_remote_read.get('async_percent_completion')), str(job_remote_read.get('async_status')), )) time.sleep(self.async_job_read_frequency_seconds) job_remote_read = async_job.api_get() time.sleep(1) assert job_remote_read.get('async_status') == 'Job Completed' data = self._clean_response_data(async_job.get_result( params={'limit': self.pagination_limit}, )) self.upload_data(data)
def get_data_in_batches( self, level, fields, data_from, data_until, breakdowns=None, account_id=None, batch_size=10000, ): if hasattr(data_from, "date"): data_from = data_from.date() if hasattr(data_until, "date"): data_until = data_until.date() self.fb_init(account_id=account_id) account_id = str(account_id or self.conn.account_id) if not account_id.startswith("act_"): account_id = "act_{0}".format(account_id) account_object = AdAccount(account_id) params = { "time_increment": 1, "level": level, "limit": 1000, } if breakdowns: if isinstance(breakdowns, list): breakdowns = ",".join(breakdowns) params["breakdowns"] = breakdowns data = [] # maximum request of 90 days at once! while data_from <= data_until: request_until = min(data_until, data_from + timedelta(days=90)) params["time_range"] = { "since": data_from.strftime("%Y-%m-%d"), "until": request_until.strftime("%Y-%m-%d"), } data_from = request_until + timedelta(days=1) self.log.info( "Requesting data for account_id={0} between {1} and {2}.".format( str(account_id), params["time_range"]["since"], params["time_range"]["until"], ) ) async_job = account_object.get_insights_async( fields=fields, params=params, ) job_read = async_job.api_get() while not job_read.get("async_status") in [ "Job Completed", "Job Failed", "Job Skipped", ]: self.log.info( "Asnyc job completion: {0}% (status: {1})".format( str(job_read.get("async_percent_completion")), str(job_read.get("async_status")), ) ) time.sleep(5) job_read = async_job.api_get() time.sleep(1) assert ( job_read.get("async_status") == "Job Completed" ), job_read.get_result() data += [ { k: datetime.strptime(v, "%Y-%m-%d").date() if k in ("date_start", "date_stop") else v for k, v in datum.items() } for datum in list(job_read.get_result(params={"limit": 1000})) ] if len(data) >= batch_size: yield data data = [] if data: yield data