def test__should__be_able_to_get_all_ads__when__run_multiple_times(self): # Load secrets via env vars execfile("../../secrets.py") conn = GoogleAdWordsConnectionManager( GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager())) ad_util = AdWordsAdGroupAdUtil(conn) ad_util.set_query_to_fetch_all(page_size=10) actual1 = ad_util.download_next_page_as_dict() expected1 = 10 print(actual1[0]) self.assertEqual(expected1, len(actual1)) actual2 = ad_util.download_next_page_as_dict() expected2 = 10 print(actual2[0]) self.assertEqual(expected2, len(actual2)) self.assertNotEqual(actual1[0], actual2[0]) # Does a reset of page position ad_util.set_query_to_fetch_all(page_size=10) actual3 = ad_util.download_next_page_as_dict() expected3 = 10 print(actual3[0]) self.assertEqual(expected3, len(actual3)) self.assertEqual(actual1[0], actual3[0])
def test_the_transformation_works(self): aws_conn = AwsConnectionSettings(region="us-east-1", secrets_manager=None, profile="default") execfile("../../secrets.py") cassandra_conn_setting = Mock() settings = AthenaToAdWordsOfflineConversionSettings( source_database=os.getenv("dummy_athena_database"), source_table=os.getenv("dummy_athena_table"), source_connection_settings=aws_conn, etl_identifier="xxxx", destination_batch_size=100, etl_state_manager_connection=cassandra_conn_setting, etl_state_manager_keyspace="test", transformation_column_mapping={ 'abc': 'googleClickId', 'def1': 'conversionName', 'def2': 'conversionTime', 'def4': 'conversionValue' }, destination_connection_settings=GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()), ) etl = AthenaToAdWordsOfflineConversion(settings) df = DataFrame([{ "abc": "123", "def1": "123", "def2": "123", "def3": "123", "def4": "123", "def5": "123", }, { "abc": "222", "def1": "333", "def2": "444", "def3": "333", "def4": "333", "def5": "333", }]) result = etl._data_frame_to_destination_dict(df) expected = [{ 'conversionName': '123', 'conversionTime': '123', 'conversionValue': '123', 'googleClickId': '123' }, { 'conversionName': '333', 'conversionTime': '444', 'conversionValue': '333', 'googleClickId': '222' }] self.assertListEqual(result, expected)
def test__should__get_correct_estimations__with__etl_get_parallel_payloads( self): aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "something/test" # Load secrets via env vars execfile("../../secrets.py") adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) target_table = "test_adwords_to_athena_table_creation" etl_settings = AdWordsToAthenaSettings( source_query_fragment=ServiceQueryBuilder().Select('Id').OrderBy( 'Id'), source_service="AdGroupAdService", source_service_version="v201809", source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_connection_settings=aws_setting, target_database="dev", target_table=target_table, target_table_ddl_progress=True, is_partitioned_table=True, partition_values=[("abc", "def"), ("pqr", 123)]) etl = AdWordsToAthena(etl_settings) actual_payloads = etl.get_parallel_payloads(page_size=1000, number_of_workers=3) expected_payloads = [{ 'number_of_pages': 393, 'page_size': 1000, 'start_index': 0, 'worker': 0 }, { 'number_of_pages': 393, 'page_size': 1000, 'start_index': 393000, 'worker': 1 }, { 'number_of_pages': 393, 'page_size': 1000, 'start_index': 786000, 'worker': 2 }] self.assertListEqual(expected_payloads, actual_payloads) etl.create_athena_table() conn = AwsConnectionManager(aws_setting) au = AthenaUtil("dev", conn) actual = au.get_glue_table_metadata(target_table) print(actual)
def test_full_integration_with_local_cassandra(self): aws_conn = AwsConnectionSettings(region="us-east-1", secrets_manager=None, profile="default") execfile("../../secrets.py") compose = DockerCompose(filepath=os.path.dirname(base.__file__)) with compose: host = compose.get_service_host("cassandra", 9042) port = int(compose.get_service_port("cassandra", 9042)) cassandra_conn_setting = CassandraConnectionSettings( cluster_ips=[host], port=port, load_balancing_policy=DCAwareRoundRobinPolicy(), secrets_manager=CassandraSecretsManager( source=DictKeyValueSource({ "CASSANDRA_USERNAME": "", "CASSANDRA_PASSWORD": "", })), ) conn = verify_container_is_up(cassandra_conn_setting) # conn.get_session('system').execute(""" DROP TABLE test.etl_sink_record_state""") settings = AthenaToAdWordsOfflineConversionSettings( source_database=os.getenv("dummy_athena_database"), source_table=os.getenv("dummy_athena_table"), source_connection_settings=aws_conn, etl_identifier="test", destination_batch_size=100, etl_state_manager_connection=cassandra_conn_setting, etl_state_manager_keyspace="test", transformation_column_mapping={ 'google_click_id': 'googleClickId', 'conversion_name': 'conversionName', 'conversion_time': 'conversionTime', 'conversion_value': 'conversionValue', 'conversion_currency_code': 'conversionCurrencyCode' }, destination_connection_settings=GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()), ) etl = AthenaToAdWordsOfflineConversion(settings) files_actual = etl.list_source_files() # # self.assertListEqual(files_actual, []) # etl.upload_all() act = etl.upload_all() self.assertListEqual(act, [])
def test__should__be_able_to_estimate_splits__when__run_with_subclass( self): # Load secrets via env vars execfile("../../secrets.py") conn = GoogleAdWordsConnectionManager( GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager())) print(conn) ad_util = AdWordsAdGroupAdUtil(conn)
def test__should__create_s3_file_for_the_given_indices(self): # Load secrets via env vars execfile("../../secrets.py") aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "something/test" conn = AwsConnectionManager(aws_setting) s3u = S3Util(conn=conn, bucket=target_bucket) s3u.delete_recursive(target_key_prefix) adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) adword_to_s3_util = AdWordsToS3(settings=AdWordsToS3Settings( source_query_fragment=ServiceQueryBuilder().Select( # Attributes 'BaseAdGroupId', 'Id', 'CampaignId', 'CampaignName', 'Name', 'Status', 'ContentBidCriterionTypeGroup', 'BaseCampaignId', 'TrackingUrlTemplate', 'FinalUrlSuffix', 'UrlCustomParameters', 'AdGroupType').OrderBy('Id'), source_service="AdGroupService", source_service_version="v201809", source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_file_prefix=None, target_connection_settings=aws_setting)) adword_to_s3_util.build_query(start_index=35000, page_size=1000, num_iterations=1) adword_to_s3_util.transfer_all() actual = s3u.get_keys(target_key_prefix) expected = [ 'tmp/test/hip_data_tools/adwords_to_s3/test/index_35000__35999.parquet' ] self.assertListEqual(expected, actual)
def test_adwords_data_upload_for_offline_conversion(self): # Load secrets via env vars execfile("../../secrets.py") adwords_util = AdWordsOfflineConversionUtil( GoogleAdWordsConnectionManager( GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()))) uploaded, failed = adwords_util.upload_conversions([ { 'googleClickId': 'xxx', 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074357 UTC', 'conversionValue': 17.0, 'conversionCurrencyCode': 'AUD', }, { 'googleClickId': 'Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn' '-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB', 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074353 UTC', 'conversionValue': 17.0, 'conversionCurrencyCode': 'AUD', }, { 'googleClickId': 'Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn' '-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB', 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 023001 UTC', 'conversionValue': 17.0, 'conversionCurrencyCode': 'AUD', }, ]) print(uploaded, failed) self.assertEqual(len(uploaded), 2) self.assertEqual(len(failed), 1)
def test__should__get_correct_estimations__with__etl_get_parallel_payloads( self): # Load secrets via env vars execfile("../../secrets.py") aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "something/test" adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) etl_settings = AdWordsToS3Settings( source_query_fragment=ServiceQueryBuilder().Select('Id').OrderBy( 'Id'), source_service="AdGroupAdService", source_service_version="v201809", source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_file_prefix=None, target_connection_settings=aws_setting) etl = AdWordsToS3(etl_settings) actual_payloads = etl.get_parallel_payloads(page_size=1000, number_of_workers=3) expected_payloads = [{ 'worker': 0, 'start_index': 0, 'number_of_pages': 435, 'page_size': 1000 }, { 'worker': 1, 'start_index': 435000, 'number_of_pages': 435, 'page_size': 1000 }, { 'worker': 2, 'start_index': 870000, 'number_of_pages': 435, 'page_size': 1000 }] self.assertListEqual(expected_payloads, actual_payloads)
def test__should__create_table__with__a_general_report(self): aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "something/test" # Load secrets via env vars execfile("../../secrets.py") adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) target_table = "test_adwords_negative_report" etl_settings = AdWordsReportsToAthenaSettings( source_query=(ReportQueryBuilder().Select( 'AccountDescriptiveName', 'CampaignId', 'CampaignName', 'CampaignStatus', 'Id', 'KeywordMatchType', 'Criteria').From( 'CAMPAIGN_NEGATIVE_KEYWORDS_PERFORMANCE_REPORT').Build()), source_include_zero_impressions=True, source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_connection_settings=aws_setting, target_database="dev", target_table=target_table, target_table_ddl_progress=True, is_partitioned_table=True, partition_values=[("abc", "def"), ("pqr", 123)], target_file_prefix="data", transformation_field_type_mask=None) etl = AdWordsReportsToAthena(etl_settings) etl.transfer() etl.create_athena_table() etl.add_partitions() au = AthenaUtil(database="dev", conn=AwsConnectionManager(aws_setting), output_bucket=os.environ["S3_TEST_BUCKET"]) actual = au.run_query(query_string=""" select * from dev.test_adwords_negative_report limit 10 """, return_result=True) print(actual) expected = 11 self.assertEqual(expected, len(actual["ResultSet"]["Rows"]))
def test__should__be_able_to_get_all_campaigns__with_one_account(self): # Load secrets via env vars execfile("../../secrets.py") util = AdWordsCampaignUtil( GoogleAdWordsConnectionManager( GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()))) util.set_query_to_fetch_all() actual = util.download_all_as_dict() expected = 2581 print(actual) self.assertEqual(expected, len(actual))
def test_local_credentials_are_able_to_connect_to_adwords(self): # Load secrets via env vars execfile("../../secrets.py") adwords_util = AdWordsCustomerUtil( GoogleAdWordsConnectionManager( GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=None, secrets_manager=GoogleAdWordsSecretsManager()))) expected = 3 cust = adwords_util.get_customers() print(cust) self.assertEqual(len(cust), expected)
def test__should__be_able_to_get_report_fields__when__choosing_one_report_type( self): # Load secrets via env vars execfile("../../secrets.py") conn = GoogleAdWordsConnectionManager( GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager())) ad_util = AdWordsReportDefinitionReader(conn=conn) actual = ad_util.get_report_fields( "CAMPAIGN_NEGATIVE_KEYWORDS_PERFORMANCE_REPORT") expected = 13 self.assertEqual(expected, len(actual))
def test__should__be_able_to_get_report_stream__when__choosing_one_query( self): # Load secrets via env vars execfile("../../secrets.py") conn = GoogleAdWordsConnectionManager( GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager())) ad_util = AdWordsReportReader(conn=conn) report_query = (adwords.ReportQueryBuilder().Select( 'AdNetworkType1', 'Impressions', 'Clicks').From( 'CAMPAIGN_PERFORMANCE_REPORT').During('YESTERDAY').Build()) actual = ad_util.awql_to_dataframe(query=report_query) print(actual) expected = (17046, 3) self.assertEqual(expected, actual.shape)
def test__negative_keyword_reports(self): # Load secrets via env vars execfile("../../secrets.py") conn = GoogleAdWordsConnectionManager( GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager())) ad_util = AdWordsReportReader(conn) report_query = (adwords.ReportQueryBuilder().Select( 'AccountDescriptiveName', 'CampaignId', 'CampaignName', 'CampaignStatus', 'Id', 'KeywordMatchType', 'Criteria').From( 'CAMPAIGN_NEGATIVE_KEYWORDS_PERFORMANCE_REPORT').Build()) actual = ad_util.awql_to_dataframe(query=report_query) print(actual) expected = (125493, 7) self.assertEqual(expected, actual.shape)
def test__should__read_all_accounts__with__parent_id(self): # Load secrets via env vars execfile("../../secrets.py") conn = GoogleAdWordsConnectionManager( GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv( "adwords_client_root_customer_id"), secrets_manager=GoogleAdWordsSecretsManager())) ad_util = AdWordsManagedCustomerUtil(conn) all_accounts = ad_util.get_all_accounts() print(all_accounts) expected = 58 self.assertEqual(expected, len(all_accounts)) actual_frame = ad_util.get_all_accounts_as_dataframe() print(actual_frame) self.assertEqual((58, 8), actual_frame.shape)
def test__should__transfer_correct_amount_of_files__with__one_parallel_fragment( self): # Load secrets via env vars execfile("../../secrets.py") aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "tmp/test/hip_data_tools/adwords_to_s3/test" conn = AwsConnectionManager(aws_setting) s3u = S3Util(conn=conn, bucket=target_bucket) s3u.delete_recursive(target_key_prefix) adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) etl_settings = AdWordsToS3Settings( source_query_fragment=ServiceQueryBuilder().Select('Id').OrderBy( 'Id'), source_service="AdGroupAdService", source_service_version="v201809", source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_file_prefix=None, target_connection_settings=aws_setting) etl = AdWordsToS3(etl_settings) etl.build_query(start_index=0, page_size=5, num_iterations=2) etl.transfer_all() actual = s3u.get_keys(target_key_prefix) print(actual) expected = [ 'tmp/test/hip_data_tools/adwords_to_s3/test/index_0__4.parquet', 'tmp/test/hip_data_tools/adwords_to_s3/test/index_5__9.parquet' ] self.assertListEqual(expected, actual)
def test_adwords_upload_with_duplicates_in_same_batch(self): aws_conn = AwsConnectionSettings(region="us-east-1", secrets_manager=None, profile="default") execfile("../../secrets.py") compose = DockerCompose(filepath=os.path.dirname(base.__file__)) with compose: host = compose.get_service_host("cassandra", 9042) port = int(compose.get_service_port("cassandra", 9042)) cassandra_conn_setting = CassandraConnectionSettings( cluster_ips=[host], port=port, load_balancing_policy=DCAwareRoundRobinPolicy(), secrets_manager=CassandraSecretsManager( source=DictKeyValueSource({ "CASSANDRA_USERNAME": "", "CASSANDRA_PASSWORD": "", })), ) verify_container_is_up(cassandra_conn_setting) settings = AthenaToAdWordsOfflineConversionSettings( source_database=os.getenv("dummy_athena_database"), source_table=os.getenv("dummy_athena_table"), source_connection_settings=aws_conn, etl_identifier="xxxx", destination_batch_size=100, etl_state_manager_connection=cassandra_conn_setting, etl_state_manager_keyspace="test", transformation_column_mapping={ 'googleClickId': 'googleClickId', 'conversionName': 'conversionName', 'conversionTime': 'conversionTime', 'conversionValue': 'conversionValue', 'conversionCurrencyCode': 'conversionCurrencyCode' }, destination_connection_settings=GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()), ) etl = AthenaToAdWordsOfflineConversion(settings) test_df = DataFrame([ { 'googleClickId': 'xxx', 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074357 UTC', 'conversionValue': 17.0, 'conversionCurrencyCode': 'AUD', }, { 'googleClickId': "Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn" "-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB", 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074353 UTC', 'conversionValue': 17.0, 'conversionCurrencyCode': 'AUD', }, { 'googleClickId': "Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn" "-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB", 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074353 UTC', # Duplicate with same time 'conversionValue': 14.0, 'conversionCurrencyCode': 'AUD', }, { 'googleClickId': "Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn" "-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB", 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 084353 UTC', # Duplicate with diff time 'conversionValue': 14.0, 'conversionCurrencyCode': 'AUD', }, { 'googleClickId': "EAIaIQobChMI6oiGy_vz5wIVkjUrCh3IcgAuEAAYASAAEgLRk_D_BwE", 'conversionName': "claim_attempts_testing", 'conversionTime': '20200309 023001 UTC', 'conversionValue': 17.0, 'conversionCurrencyCode': 'AUD', }, ]) actual = etl._process_data_frame(test_df) expected = [ # The duplicate with same time has been Picked out as an issue { 'error': "Current State 'EtlStates.Processing' cannot transition to " "'EtlStates.Processing'", 'data': { 'googleClickId': 'Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn' '-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB', 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074353 UTC', 'conversionValue': 14.0, 'conversionCurrencyCode': 'AUD' } }, ] self.assertListEqual(actual, expected)
def test_multiple_runs_of_same_data_and_verify_deduplication(self): aws_conn = AwsConnectionSettings(region="us-east-1", secrets_manager=None, profile="default") execfile("../../secrets.py") compose = DockerCompose(filepath=os.path.dirname(base.__file__)) with compose: host = compose.get_service_host("cassandra", 9042) port = int(compose.get_service_port("cassandra", 9042)) cassandra_conn_setting = CassandraConnectionSettings( cluster_ips=[host], port=port, load_balancing_policy=DCAwareRoundRobinPolicy(), secrets_manager=CassandraSecretsManager( source=DictKeyValueSource({ "CASSANDRA_USERNAME": "", "CASSANDRA_PASSWORD": "", })), ) conn = verify_container_is_up(cassandra_conn_setting) # conn.get_session('system').execute(""" DROP TABLE test.etl_sink_record_state""") settings = AthenaToAdWordsOfflineConversionSettings( source_database=os.getenv("dummy_athena_database"), source_table=os.getenv("dummy_athena_table"), source_connection_settings=aws_conn, etl_identifier="test", destination_batch_size=100, etl_state_manager_connection=cassandra_conn_setting, etl_state_manager_keyspace="test", transformation_column_mapping={ 'google_click_id': 'googleClickId', 'conversion_name': 'conversionName', 'conversion_time': 'conversionTime', 'conversion_value': 'conversionValue', 'conversion_currency_code': 'conversionCurrencyCode' }, destination_connection_settings=GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()), ) etl = AthenaToAdWordsOfflineConversion(settings) source_data = [ { 'google_click_id': 'theFirst', 'conversion_name': 'claim_attempts_testing', 'conversion_time': '20200309 074357 UTC', 'conversion_value': 17.0, 'conversion_currency_code': 'AUD', }, { 'google_click_id': 'failedSecond', 'conversion_name': 'claim_attempts_testing', 'conversion_time': '20200309 074357 UTC', 'conversion_value': 17.0, 'conversion_currency_code': 'AUD', }, ] test_df = DataFrame(source_data) # Mock upload_conversions in AdWordsUtil so no actual data is transmitted etl._upload_conversions = MagicMock(return_value=([ { 'googleClickId': 'theFirst', 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074357 UTC', 'conversionValue': 17.0, 'conversionCurrencyCode': 'AUD', }, ], [ { 'fieldPath': 'operations[0].operand', 'fieldPathElements': [{ 'field': 'operations', 'index': 0 }, { 'field': 'operand', 'index': None }], 'trigger': None, 'errorString': 'OfflineConversionError.UNPARSEABLE_GCLID', 'ApiError.Type': 'OfflineConversionError', 'reason': 'UNPARSEABLE_GCLID', 'data': { 'googleClickId': 'failedSecond', 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074357 UTC', 'conversionValue': 17.0, 'conversionCurrencyCode': 'AUD', }, }, ])) # etl._process_data_frame(test_df) first_actual = etl._process_data_frame(test_df) self.assertListEqual(first_actual, []) # Repeat process to cause Duplicates actual = etl._process_data_frame(test_df) # actual = etl.upload_next() expected = [{ 'data': { 'conversionCurrencyCode': 'AUD', 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074357 UTC', 'conversionValue': 17.0, 'googleClickId': 'theFirst' }, 'error': 'Current state is not Ready' }, { 'data': { 'conversionCurrencyCode': 'AUD', 'conversionName': 'claim_attempts_testing', 'conversionTime': '20200309 074357 UTC', 'conversionValue': 17.0, 'googleClickId': 'failedSecond' }, 'error': 'Current state is not Ready' }] self.assertListEqual(actual, expected)
def test__should__create_table__with__geo_performance_report(self): aws_setting = AwsConnectionSettings( region="ap-southeast-2", secrets_manager=AwsSecretsManager( access_key_id_var="SOME_CUSTOM_AWS_ACCESS_KEY_ID", secret_access_key_var="SOME_CUSTOM_AWS_SECRET_ACCESS_KEY", use_session_token=True, aws_session_token_var="SOME_CUSTOM_AWS_SESSION_TOKEN"), profile=None) target_bucket = "test-bucket" target_key_prefix = "something/test" # Load secrets via env vars execfile("../../secrets.py") adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id="1111111111", secrets_manager=GoogleAdWordsSecretsManager()) target_table = "test_adwords_geo_performance_report" etl_settings = AdWordsReportsToAthenaSettings( source_query=( ReportQueryBuilder().Select( # Attributes 'AccountDescriptiveName', 'CampaignId', 'CityCriteriaId', 'CountryCriteriaId', 'CustomerDescriptiveName', 'ExternalCustomerId', 'IsTargetingLocation', 'MetroCriteriaId', 'MostSpecificCriteriaId', 'RegionCriteriaId', # Segments 'Date', # Metrics 'Impressions', 'Clicks', 'ConversionRate', 'Conversions', 'ConversionValue', 'Cost', 'CostPerConversion').From('GEO_PERFORMANCE_REPORT').During( start_date="20200601", end_date="20200701").Build()), source_include_zero_impressions=False, source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_connection_settings=aws_setting, target_database="dev", target_table=target_table, target_table_ddl_progress=True, is_partitioned_table=True, partition_values=[("abc", "def"), ("pqr", 123)], target_file_prefix="data", transformation_field_type_mask={ "country__territory": np.int, "region": np.int, "most_specific_location": np.int }) etl = AdWordsReportsToAthena(etl_settings) etl.transfer() etl.create_athena_table() etl.add_partitions() au = AthenaUtil(database="dev", conn=AwsConnectionManager(aws_setting), output_bucket=os.environ["S3_TEST_BUCKET"]) actual = au.run_query(query_string=""" select * from dev.test_adwords_geo_performance_report limit 10 """, return_result=True) print(actual) expected = 11 self.assertEqual(expected, len(actual["ResultSet"]["Rows"]))