예제 #1
0
    def test__should__be_able_to_get_all_ads__when__run_multiple_times(self):
        # Load secrets via env vars
        execfile("../../secrets.py")
        conn = GoogleAdWordsConnectionManager(
            GoogleAdWordsConnectionSettings(
                client_id=os.getenv("adwords_client_id"),
                user_agent="Tester",
                client_customer_id=os.getenv("adwords_client_customer_id"),
                secrets_manager=GoogleAdWordsSecretsManager()))

        ad_util = AdWordsAdGroupAdUtil(conn)
        ad_util.set_query_to_fetch_all(page_size=10)
        actual1 = ad_util.download_next_page_as_dict()
        expected1 = 10
        print(actual1[0])
        self.assertEqual(expected1, len(actual1))
        actual2 = ad_util.download_next_page_as_dict()
        expected2 = 10
        print(actual2[0])
        self.assertEqual(expected2, len(actual2))
        self.assertNotEqual(actual1[0], actual2[0])
        # Does a reset of page position
        ad_util.set_query_to_fetch_all(page_size=10)
        actual3 = ad_util.download_next_page_as_dict()
        expected3 = 10
        print(actual3[0])
        self.assertEqual(expected3, len(actual3))
        self.assertEqual(actual1[0], actual3[0])
예제 #2
0
    def test_the_transformation_works(self):
        aws_conn = AwsConnectionSettings(region="us-east-1",
                                         secrets_manager=None,
                                         profile="default")
        execfile("../../secrets.py")

        cassandra_conn_setting = Mock()

        settings = AthenaToAdWordsOfflineConversionSettings(
            source_database=os.getenv("dummy_athena_database"),
            source_table=os.getenv("dummy_athena_table"),
            source_connection_settings=aws_conn,
            etl_identifier="xxxx",
            destination_batch_size=100,
            etl_state_manager_connection=cassandra_conn_setting,
            etl_state_manager_keyspace="test",
            transformation_column_mapping={
                'abc': 'googleClickId',
                'def1': 'conversionName',
                'def2': 'conversionTime',
                'def4': 'conversionValue'
            },
            destination_connection_settings=GoogleAdWordsConnectionSettings(
                client_id=os.getenv("adwords_client_id"),
                user_agent="Tester",
                client_customer_id=os.getenv("adwords_client_customer_id"),
                secrets_manager=GoogleAdWordsSecretsManager()),
        )
        etl = AthenaToAdWordsOfflineConversion(settings)

        df = DataFrame([{
            "abc": "123",
            "def1": "123",
            "def2": "123",
            "def3": "123",
            "def4": "123",
            "def5": "123",
        }, {
            "abc": "222",
            "def1": "333",
            "def2": "444",
            "def3": "333",
            "def4": "333",
            "def5": "333",
        }])
        result = etl._data_frame_to_destination_dict(df)
        expected = [{
            'conversionName': '123',
            'conversionTime': '123',
            'conversionValue': '123',
            'googleClickId': '123'
        }, {
            'conversionName': '333',
            'conversionTime': '444',
            'conversionValue': '333',
            'googleClickId': '222'
        }]
        self.assertListEqual(result, expected)
예제 #3
0
    def test_full_integration_with_local_cassandra(self):
        aws_conn = AwsConnectionSettings(region="us-east-1",
                                         secrets_manager=None,
                                         profile="default")
        execfile("../../secrets.py")

        compose = DockerCompose(filepath=os.path.dirname(base.__file__))
        with compose:
            host = compose.get_service_host("cassandra", 9042)
            port = int(compose.get_service_port("cassandra", 9042))

            cassandra_conn_setting = CassandraConnectionSettings(
                cluster_ips=[host],
                port=port,
                load_balancing_policy=DCAwareRoundRobinPolicy(),
                secrets_manager=CassandraSecretsManager(
                    source=DictKeyValueSource({
                        "CASSANDRA_USERNAME": "",
                        "CASSANDRA_PASSWORD": "",
                    })),
            )

            conn = verify_container_is_up(cassandra_conn_setting)
            # conn.get_session('system').execute(""" DROP TABLE test.etl_sink_record_state""")

            settings = AthenaToAdWordsOfflineConversionSettings(
                source_database=os.getenv("dummy_athena_database"),
                source_table=os.getenv("dummy_athena_table"),
                source_connection_settings=aws_conn,
                etl_identifier="test",
                destination_batch_size=100,
                etl_state_manager_connection=cassandra_conn_setting,
                etl_state_manager_keyspace="test",
                transformation_column_mapping={
                    'google_click_id': 'googleClickId',
                    'conversion_name': 'conversionName',
                    'conversion_time': 'conversionTime',
                    'conversion_value': 'conversionValue',
                    'conversion_currency_code': 'conversionCurrencyCode'
                },
                destination_connection_settings=GoogleAdWordsConnectionSettings(
                    client_id=os.getenv("adwords_client_id"),
                    user_agent="Tester",
                    client_customer_id=os.getenv("adwords_client_customer_id"),
                    secrets_manager=GoogleAdWordsSecretsManager()),
            )
            etl = AthenaToAdWordsOfflineConversion(settings)
            files_actual = etl.list_source_files()
            #
            # self.assertListEqual(files_actual, [])

            # etl.upload_all()
            act = etl.upload_all()
            self.assertListEqual(act, [])
예제 #4
0
 def test__should__be_able_to_estimate_splits__when__run_with_subclass(
         self):
     # Load secrets via env vars
     execfile("../../secrets.py")
     conn = GoogleAdWordsConnectionManager(
         GoogleAdWordsConnectionSettings(
             client_id=os.getenv("adwords_client_id"),
             user_agent="Tester",
             client_customer_id=os.getenv("adwords_client_customer_id"),
             secrets_manager=GoogleAdWordsSecretsManager()))
     print(conn)
     ad_util = AdWordsAdGroupAdUtil(conn)
예제 #5
0
    def test_adwords_data_upload_for_offline_conversion(self):
        # Load secrets via env vars
        execfile("../../secrets.py")
        adwords_util = AdWordsOfflineConversionUtil(
            GoogleAdWordsConnectionManager(
                GoogleAdWordsConnectionSettings(
                    client_id=os.getenv("adwords_client_id"),
                    user_agent="Tester",
                    client_customer_id=os.getenv("adwords_client_customer_id"),
                    secrets_manager=GoogleAdWordsSecretsManager())))

        uploaded, failed = adwords_util.upload_conversions([
            {
                'googleClickId': 'xxx',
                'conversionName': 'claim_attempts_testing',
                'conversionTime': '20200309 074357 UTC',
                'conversionValue': 17.0,
                'conversionCurrencyCode': 'AUD',
            },
            {
                'googleClickId':
                'Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn'
                '-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB',
                'conversionName':
                'claim_attempts_testing',
                'conversionTime':
                '20200309 074353 UTC',
                'conversionValue':
                17.0,
                'conversionCurrencyCode':
                'AUD',
            },
            {
                'googleClickId':
                'Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn'
                '-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB',
                'conversionName':
                'claim_attempts_testing',
                'conversionTime':
                '20200309 023001 UTC',
                'conversionValue':
                17.0,
                'conversionCurrencyCode':
                'AUD',
            },
        ])

        print(uploaded, failed)
        self.assertEqual(len(uploaded), 2)
        self.assertEqual(len(failed), 1)
예제 #6
0
    def test_local_credentials_are_able_to_connect_to_adwords(self):
        # Load secrets via env vars
        execfile("../../secrets.py")
        adwords_util = AdWordsCustomerUtil(
            GoogleAdWordsConnectionManager(
                GoogleAdWordsConnectionSettings(
                    client_id=os.getenv("adwords_client_id"),
                    user_agent="Tester",
                    client_customer_id=None,
                    secrets_manager=GoogleAdWordsSecretsManager())))

        expected = 3
        cust = adwords_util.get_customers()
        print(cust)
        self.assertEqual(len(cust), expected)
예제 #7
0
 def test__should__be_able_to_get_all_campaigns__with_one_account(self):
     # Load secrets via env vars
     execfile("../../secrets.py")
     util = AdWordsCampaignUtil(
         GoogleAdWordsConnectionManager(
             GoogleAdWordsConnectionSettings(
                 client_id=os.getenv("adwords_client_id"),
                 user_agent="Tester",
                 client_customer_id=os.getenv("adwords_client_customer_id"),
                 secrets_manager=GoogleAdWordsSecretsManager())))
     util.set_query_to_fetch_all()
     actual = util.download_all_as_dict()
     expected = 2581
     print(actual)
     self.assertEqual(expected, len(actual))
예제 #8
0
 def test__should__be_able_to_get_report_fields__when__choosing_one_report_type(
         self):
     # Load secrets via env vars
     execfile("../../secrets.py")
     conn = GoogleAdWordsConnectionManager(
         GoogleAdWordsConnectionSettings(
             client_id=os.getenv("adwords_client_id"),
             user_agent="Tester",
             client_customer_id=os.getenv("adwords_client_customer_id"),
             secrets_manager=GoogleAdWordsSecretsManager()))
     ad_util = AdWordsReportDefinitionReader(conn=conn)
     actual = ad_util.get_report_fields(
         "CAMPAIGN_NEGATIVE_KEYWORDS_PERFORMANCE_REPORT")
     expected = 13
     self.assertEqual(expected, len(actual))
예제 #9
0
 def test__negative_keyword_reports(self):
     # Load secrets via env vars
     execfile("../../secrets.py")
     conn = GoogleAdWordsConnectionManager(
         GoogleAdWordsConnectionSettings(
             client_id=os.getenv("adwords_client_id"),
             user_agent="Tester",
             client_customer_id=os.getenv("adwords_client_customer_id"),
             secrets_manager=GoogleAdWordsSecretsManager()))
     ad_util = AdWordsReportReader(conn)
     report_query = (adwords.ReportQueryBuilder().Select(
         'AccountDescriptiveName', 'CampaignId', 'CampaignName',
         'CampaignStatus', 'Id', 'KeywordMatchType', 'Criteria').From(
             'CAMPAIGN_NEGATIVE_KEYWORDS_PERFORMANCE_REPORT').Build())
     actual = ad_util.awql_to_dataframe(query=report_query)
     print(actual)
     expected = (125493, 7)
     self.assertEqual(expected, actual.shape)
예제 #10
0
 def test__should__be_able_to_get_report_stream__when__choosing_one_query(
         self):
     # Load secrets via env vars
     execfile("../../secrets.py")
     conn = GoogleAdWordsConnectionManager(
         GoogleAdWordsConnectionSettings(
             client_id=os.getenv("adwords_client_id"),
             user_agent="Tester",
             client_customer_id=os.getenv("adwords_client_customer_id"),
             secrets_manager=GoogleAdWordsSecretsManager()))
     ad_util = AdWordsReportReader(conn=conn)
     report_query = (adwords.ReportQueryBuilder().Select(
         'AdNetworkType1', 'Impressions', 'Clicks').From(
             'CAMPAIGN_PERFORMANCE_REPORT').During('YESTERDAY').Build())
     actual = ad_util.awql_to_dataframe(query=report_query)
     print(actual)
     expected = (17046, 3)
     self.assertEqual(expected, actual.shape)
예제 #11
0
    def test__should__read_all_accounts__with__parent_id(self):
        # Load secrets via env vars
        execfile("../../secrets.py")
        conn = GoogleAdWordsConnectionManager(
            GoogleAdWordsConnectionSettings(
                client_id=os.getenv("adwords_client_id"),
                user_agent="Tester",
                client_customer_id=os.getenv(
                    "adwords_client_root_customer_id"),
                secrets_manager=GoogleAdWordsSecretsManager()))
        ad_util = AdWordsManagedCustomerUtil(conn)
        all_accounts = ad_util.get_all_accounts()
        print(all_accounts)
        expected = 58
        self.assertEqual(expected, len(all_accounts))

        actual_frame = ad_util.get_all_accounts_as_dataframe()
        print(actual_frame)
        self.assertEqual((58, 8), actual_frame.shape)
예제 #12
0
    def test_adwords_upload_with_duplicates_in_same_batch(self):
        aws_conn = AwsConnectionSettings(region="us-east-1",
                                         secrets_manager=None,
                                         profile="default")
        execfile("../../secrets.py")

        compose = DockerCompose(filepath=os.path.dirname(base.__file__))
        with compose:
            host = compose.get_service_host("cassandra", 9042)
            port = int(compose.get_service_port("cassandra", 9042))

            cassandra_conn_setting = CassandraConnectionSettings(
                cluster_ips=[host],
                port=port,
                load_balancing_policy=DCAwareRoundRobinPolicy(),
                secrets_manager=CassandraSecretsManager(
                    source=DictKeyValueSource({
                        "CASSANDRA_USERNAME": "",
                        "CASSANDRA_PASSWORD": "",
                    })),
            )

            verify_container_is_up(cassandra_conn_setting)

            settings = AthenaToAdWordsOfflineConversionSettings(
                source_database=os.getenv("dummy_athena_database"),
                source_table=os.getenv("dummy_athena_table"),
                source_connection_settings=aws_conn,
                etl_identifier="xxxx",
                destination_batch_size=100,
                etl_state_manager_connection=cassandra_conn_setting,
                etl_state_manager_keyspace="test",
                transformation_column_mapping={
                    'googleClickId': 'googleClickId',
                    'conversionName': 'conversionName',
                    'conversionTime': 'conversionTime',
                    'conversionValue': 'conversionValue',
                    'conversionCurrencyCode': 'conversionCurrencyCode'
                },
                destination_connection_settings=GoogleAdWordsConnectionSettings(
                    client_id=os.getenv("adwords_client_id"),
                    user_agent="Tester",
                    client_customer_id=os.getenv("adwords_client_customer_id"),
                    secrets_manager=GoogleAdWordsSecretsManager()),
            )
            etl = AthenaToAdWordsOfflineConversion(settings)
            test_df = DataFrame([
                {
                    'googleClickId': 'xxx',
                    'conversionName': 'claim_attempts_testing',
                    'conversionTime': '20200309 074357 UTC',
                    'conversionValue': 17.0,
                    'conversionCurrencyCode': 'AUD',
                },
                {
                    'googleClickId':
                    "Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn"
                    "-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB",
                    'conversionName':
                    'claim_attempts_testing',
                    'conversionTime':
                    '20200309 074353 UTC',
                    'conversionValue':
                    17.0,
                    'conversionCurrencyCode':
                    'AUD',
                },
                {
                    'googleClickId':
                    "Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn"
                    "-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB",
                    'conversionName':
                    'claim_attempts_testing',
                    'conversionTime':
                    '20200309 074353 UTC',  # Duplicate with same time
                    'conversionValue':
                    14.0,
                    'conversionCurrencyCode':
                    'AUD',
                },
                {
                    'googleClickId':
                    "Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn"
                    "-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB",
                    'conversionName':
                    'claim_attempts_testing',
                    'conversionTime':
                    '20200309 084353 UTC',  # Duplicate with diff time
                    'conversionValue':
                    14.0,
                    'conversionCurrencyCode':
                    'AUD',
                },
                {
                    'googleClickId':
                    "EAIaIQobChMI6oiGy_vz5wIVkjUrCh3IcgAuEAAYASAAEgLRk_D_BwE",
                    'conversionName': "claim_attempts_testing",
                    'conversionTime': '20200309 023001 UTC',
                    'conversionValue': 17.0,
                    'conversionCurrencyCode': 'AUD',
                },
            ])

            actual = etl._process_data_frame(test_df)
            expected = [  # The duplicate with same time has been Picked out as an issue
                {
                    'error':
                    "Current State 'EtlStates.Processing' cannot transition to "
                    "'EtlStates.Processing'",
                    'data': {
                        'googleClickId':
                        'Cj0KCQiAqY3zBRDQARIsAJeCVxOIyZ8avQ0he3WIpHPwV6hRn'
                        '-8Y2gDrUBJcc95tDdLcE35TK1mhhmIaAgZGEALw_wcB',
                        'conversionName':
                        'claim_attempts_testing',
                        'conversionTime':
                        '20200309 074353 UTC',
                        'conversionValue':
                        14.0,
                        'conversionCurrencyCode':
                        'AUD'
                    }
                },
            ]

            self.assertListEqual(actual, expected)
예제 #13
0
    def test_multiple_runs_of_same_data_and_verify_deduplication(self):
        aws_conn = AwsConnectionSettings(region="us-east-1",
                                         secrets_manager=None,
                                         profile="default")
        execfile("../../secrets.py")

        compose = DockerCompose(filepath=os.path.dirname(base.__file__))
        with compose:
            host = compose.get_service_host("cassandra", 9042)
            port = int(compose.get_service_port("cassandra", 9042))

            cassandra_conn_setting = CassandraConnectionSettings(
                cluster_ips=[host],
                port=port,
                load_balancing_policy=DCAwareRoundRobinPolicy(),
                secrets_manager=CassandraSecretsManager(
                    source=DictKeyValueSource({
                        "CASSANDRA_USERNAME": "",
                        "CASSANDRA_PASSWORD": "",
                    })),
            )

            conn = verify_container_is_up(cassandra_conn_setting)
            # conn.get_session('system').execute(""" DROP TABLE test.etl_sink_record_state""")

            settings = AthenaToAdWordsOfflineConversionSettings(
                source_database=os.getenv("dummy_athena_database"),
                source_table=os.getenv("dummy_athena_table"),
                source_connection_settings=aws_conn,
                etl_identifier="test",
                destination_batch_size=100,
                etl_state_manager_connection=cassandra_conn_setting,
                etl_state_manager_keyspace="test",
                transformation_column_mapping={
                    'google_click_id': 'googleClickId',
                    'conversion_name': 'conversionName',
                    'conversion_time': 'conversionTime',
                    'conversion_value': 'conversionValue',
                    'conversion_currency_code': 'conversionCurrencyCode'
                },
                destination_connection_settings=GoogleAdWordsConnectionSettings(
                    client_id=os.getenv("adwords_client_id"),
                    user_agent="Tester",
                    client_customer_id=os.getenv("adwords_client_customer_id"),
                    secrets_manager=GoogleAdWordsSecretsManager()),
            )
            etl = AthenaToAdWordsOfflineConversion(settings)
            source_data = [
                {
                    'google_click_id': 'theFirst',
                    'conversion_name': 'claim_attempts_testing',
                    'conversion_time': '20200309 074357 UTC',
                    'conversion_value': 17.0,
                    'conversion_currency_code': 'AUD',
                },
                {
                    'google_click_id': 'failedSecond',
                    'conversion_name': 'claim_attempts_testing',
                    'conversion_time': '20200309 074357 UTC',
                    'conversion_value': 17.0,
                    'conversion_currency_code': 'AUD',
                },
            ]
            test_df = DataFrame(source_data)
            #  Mock upload_conversions in AdWordsUtil so no actual data is transmitted
            etl._upload_conversions = MagicMock(return_value=([
                {
                    'googleClickId': 'theFirst',
                    'conversionName': 'claim_attempts_testing',
                    'conversionTime': '20200309 074357 UTC',
                    'conversionValue': 17.0,
                    'conversionCurrencyCode': 'AUD',
                },
            ], [
                {
                    'fieldPath':
                    'operations[0].operand',
                    'fieldPathElements': [{
                        'field': 'operations',
                        'index': 0
                    }, {
                        'field': 'operand',
                        'index': None
                    }],
                    'trigger':
                    None,
                    'errorString':
                    'OfflineConversionError.UNPARSEABLE_GCLID',
                    'ApiError.Type':
                    'OfflineConversionError',
                    'reason':
                    'UNPARSEABLE_GCLID',
                    'data': {
                        'googleClickId': 'failedSecond',
                        'conversionName': 'claim_attempts_testing',
                        'conversionTime': '20200309 074357 UTC',
                        'conversionValue': 17.0,
                        'conversionCurrencyCode': 'AUD',
                    },
                },
            ]))
            # etl._process_data_frame(test_df)
            first_actual = etl._process_data_frame(test_df)
            self.assertListEqual(first_actual, [])

            # Repeat process to cause Duplicates
            actual = etl._process_data_frame(test_df)
            # actual = etl.upload_next()
            expected = [{
                'data': {
                    'conversionCurrencyCode': 'AUD',
                    'conversionName': 'claim_attempts_testing',
                    'conversionTime': '20200309 074357 UTC',
                    'conversionValue': 17.0,
                    'googleClickId': 'theFirst'
                },
                'error': 'Current state is not Ready'
            }, {
                'data': {
                    'conversionCurrencyCode': 'AUD',
                    'conversionName': 'claim_attempts_testing',
                    'conversionTime': '20200309 074357 UTC',
                    'conversionValue': 17.0,
                    'googleClickId': 'failedSecond'
                },
                'error': 'Current state is not Ready'
            }]

            self.assertListEqual(actual, expected)