def get_ad_data(api_client: BingReportClient, tmp_dir: Path) -> {}: """Downloads the ad data from the Bing AdWords API Args: api_client: BingAdsApiClient tmp_dir: path to write the temp file in Returns: A dictionary of the form {ad_id: {key: value}} """ ad_data = {} fields = ["TimePeriod", "DeviceType", "AccountId", "AccountName", "AccountNumber", "AccountStatus", "CampaignId", "CampaignName", "CampaignStatus", "AdGroupId", "AdGroupName", "AdGroupStatus", "AdId", "AdTitle", "AdDescription", "AdType", "AdLabels", "Impressions"] # need to include impressions, otherwise API call fails?? report_request_ad = build_ad_performance_request(api_client, current_date=None, fields=fields, all_time=True) report_file_location = submit_and_download(report_request_ad, api_client, str(tmp_dir), 'ad_account_structure_{}.csv'.format(config.output_file_version()), overwrite_if_exists=True, decompress=True) with open(report_file_location, 'r') as f: for i in range(11): # skip header lines next(f) reader = csv.reader(f) report_data = list(reader) relevant_columns = ['AdId', 'AdTitle', 'AdGroupId', 'AdGroupName', 'CampaignId', 'CampaignName', 'AccountId', 'AccountName'] positions = [fields.index(name) for name in relevant_columns] relevant_columns.extend(['attributes']) for row in report_data[:-2]: attributes = parse_labels(row[fields.index("AdLabels")]) new_row = [row[i] for i in positions] new_row.extend([attributes]) ad_data[row[fields.index("AdId")]] = {key: value for key, value in zip(relevant_columns, new_row)} return ad_data
def download_account_structure_data(api_client: BingReportClient): """ Downloads the marketing structure for all accounts Args: api_client: BingAdsApiClient """ filename = Path('bing-account-structure_{}.csv.gz'.format(config.output_file_version())) filepath = ensure_data_directory(filename) print('Start downloading account structure in {}'.format(str(filename))) with tempfile.TemporaryDirectory() as tmp_dir: tmp_filepath = Path(tmp_dir, filename) with gzip.open(str(tmp_filepath), 'wt') as tmp_campaign_structure_file: header = ['AdId', 'AdTitle', 'AdGroupId', 'AdGroupName', 'CampaignId', 'CampaignName', 'AccountId', 'AccountName', 'Attributes'] writer = csv.writer(tmp_campaign_structure_file, delimiter="\t") ad_data = get_ad_data(api_client, tmp_dir) campaign_attributes = get_campaign_attributes(api_client, tmp_dir) writer.writerow(header) for ad_id, ad_data_dict in ad_data.items(): campaign_id = ad_data_dict['CampaignId'] ad_group_id = ad_data_dict['AdGroupId'] attributes = {**campaign_attributes.get(campaign_id, {}), **ad_data_dict['attributes']} ad = [str(ad_id), ad_data_dict['AdTitle'], str(ad_group_id), ad_data_dict['AdGroupName'], str(campaign_id), ad_data_dict['CampaignName'], ad_data_dict['AccountId'], ad_data_dict['AccountName'], json.dumps(attributes) ] writer.writerow(ad) shutil.move(str(tmp_filepath), str(filepath))
def get_campaign_attributes(api_client: BingReportClient, tmp_dir: Path) -> {}: """Downloads the campaign attributes from the Bing AdWords API Args: api_client: BingAdsApiClient tmp_dir: path to write the temp file in Returns: A dictionary of the form {campaign_id: {key: value}} """ campaign_labels = {} fields = ["TimePeriod", "AccountId", "AccountName", "CampaignId", "CampaignName", "CampaignLabels", "Spend"] # fails without adding spend report_request_campaign = build_campaign_performance_request(api_client, current_date=None, fields=fields, all_time=True) report_file_location = submit_and_download(report_request_campaign, api_client, str(tmp_dir), 'campaign_labels_{}.csv'.format(config.output_file_version()), overwrite_if_exists=True, decompress=True) with open(report_file_location, 'r') as f: for i in range(11): # skip header lines next(f) reader = csv.reader(f) report_data = list(reader) for row in report_data[:-2]: attributes = parse_labels(row[fields.index("CampaignLabels")]) campaign_labels[row[fields.index("CampaignId")]] = attributes return campaign_labels
def download_performance_data(api_client: BingReportClient): """ Downloads BingAds Ads performance reports by creating report objects for every day since config.first_date() till today Args: api_client: BingAdsApiClient """ first_date = datetime.datetime.strptime(config.first_date(), '%Y-%m-%d') last_date = datetime.datetime.now() - datetime.timedelta(days=1) current_date = last_date remaining_attempts = config.total_attempts_for_single_day while current_date >= first_date: print(current_date) relative_filepath = Path('{date:%Y/%m/%d}/bing/'.format( date=current_date)) filepath = ensure_data_directory(relative_filepath) overwrite_if_exists = (last_date - current_date).days < 31 if overwrite_if_exists: print('The data for {date:%Y-%m-%d} will be downloaded. Already present files will be overwritten'.format( date=current_date)) report_request_ad = build_ad_performance_request(api_client, current_date) report_request_keyword = build_keyword_performance_request(api_client, current_date) report_request_campaign = build_campaign_performance_request(api_client, current_date) with tempfile.TemporaryDirectory() as tmp_dir: tmp_filepath = Path(tmp_dir, relative_filepath) tmp_filepath.parent.mkdir(exist_ok=True, parents=True) try: start_time = time.time() print('About to download ad data for {date:%Y-%m-%d}' .format(date=current_date)) submit_and_download(report_request_ad, api_client, str(filepath), 'ad_performance_{}.csv.gz'.format(config.output_file_version()), overwrite_if_exists) print('Successfully downloaded ad data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) start_time = time.time() print('About to download keyword data for {date:%Y-%m-%d}' .format(date=current_date)) submit_and_download(report_request_keyword, api_client, str(filepath), 'keyword_performance_{}.csv.gz'.format(config.output_file_version()), overwrite_if_exists) print('Successfully downloaded keyword data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) print('About to download campaign data for {date:%Y-%m-%d}' .format(date=current_date)) submit_and_download(report_request_campaign, api_client, str(filepath), 'campaign_performance_{}.csv.gz'.format(config.output_file_version()), overwrite_if_exists) print('Successfully downloaded campaign data for {date:%Y-%m-%d} in {elapsed:.1f} seconds' .format(date=current_date, elapsed=time.time() - start_time)) # date is decreased only if the download above does not fail current_date -= datetime.timedelta(days=1) remaining_attempts = config.total_attempts_for_single_day except urllib.error.URLError as url_error: if remaining_attempts == 0: print('Too many failed attempts while downloading this day, quitting', file=sys.stderr) raise print('ERROR WHILE DOWNLOADING REPORT, RETRYING in {} seconds, attempt {}#...' .format(config.retry_timeout_interval, remaining_attempts), file=sys.stderr) print(url_error, file=sys.stderr) time.sleep(config.retry_timeout_interval) remaining_attempts -= 1