def test__should__get_correct_estimations__with__etl_get_parallel_payloads(
            self):
        aws_setting = AwsConnectionSettings(
            region="us-east-1",
            secrets_manager=AwsSecretsManager(),
            profile=None)
        target_bucket = os.getenv('S3_TEST_BUCKET')
        target_key_prefix = "something/test"

        # Load secrets via env vars
        execfile("../../secrets.py")
        adwords_settings = GoogleAdWordsConnectionSettings(
            client_id=os.getenv("adwords_client_id"),
            user_agent="Tester",
            client_customer_id=os.getenv("adwords_client_customer_id"),
            secrets_manager=GoogleAdWordsSecretsManager())
        target_table = "test_adwords_to_athena_table_creation"
        etl_settings = AdWordsToAthenaSettings(
            source_query_fragment=ServiceQueryBuilder().Select('Id').OrderBy(
                'Id'),
            source_service="AdGroupAdService",
            source_service_version="v201809",
            source_connection_settings=adwords_settings,
            target_bucket=target_bucket,
            target_key_prefix=target_key_prefix,
            target_connection_settings=aws_setting,
            target_database="dev",
            target_table=target_table,
            target_table_ddl_progress=True,
            is_partitioned_table=True,
            partition_values=[("abc", "def"), ("pqr", 123)])
        etl = AdWordsToAthena(etl_settings)

        actual_payloads = etl.get_parallel_payloads(page_size=1000,
                                                    number_of_workers=3)
        expected_payloads = [{
            'number_of_pages': 393,
            'page_size': 1000,
            'start_index': 0,
            'worker': 0
        }, {
            'number_of_pages': 393,
            'page_size': 1000,
            'start_index': 393000,
            'worker': 1
        }, {
            'number_of_pages': 393,
            'page_size': 1000,
            'start_index': 786000,
            'worker': 2
        }]
        self.assertListEqual(expected_payloads, actual_payloads)
        etl.create_athena_table()
        conn = AwsConnectionManager(aws_setting)
        au = AthenaUtil("dev", conn)
        actual = au.get_glue_table_metadata(target_table)
        print(actual)
Esempio n. 2
0
    def test__should__create_s3_file_for_the_given_indices(self):
        # Load secrets via env vars
        execfile("../../secrets.py")

        aws_setting = AwsConnectionSettings(
            region="us-east-1",
            secrets_manager=AwsSecretsManager(),
            profile=None)
        target_bucket = os.getenv('S3_TEST_BUCKET')
        target_key_prefix = "something/test"
        conn = AwsConnectionManager(aws_setting)
        s3u = S3Util(conn=conn, bucket=target_bucket)
        s3u.delete_recursive(target_key_prefix)
        adwords_settings = GoogleAdWordsConnectionSettings(
            client_id=os.getenv("adwords_client_id"),
            user_agent="Tester",
            client_customer_id=os.getenv("adwords_client_customer_id"),
            secrets_manager=GoogleAdWordsSecretsManager())

        adword_to_s3_util = AdWordsToS3(settings=AdWordsToS3Settings(
            source_query_fragment=ServiceQueryBuilder().Select(
                # Attributes
                'BaseAdGroupId',
                'Id',
                'CampaignId',
                'CampaignName',
                'Name',
                'Status',
                'ContentBidCriterionTypeGroup',
                'BaseCampaignId',
                'TrackingUrlTemplate',
                'FinalUrlSuffix',
                'UrlCustomParameters',
                'AdGroupType').OrderBy('Id'),
            source_service="AdGroupService",
            source_service_version="v201809",
            source_connection_settings=adwords_settings,
            target_bucket=target_bucket,
            target_key_prefix=target_key_prefix,
            target_file_prefix=None,
            target_connection_settings=aws_setting))
        adword_to_s3_util.build_query(start_index=35000,
                                      page_size=1000,
                                      num_iterations=1)
        adword_to_s3_util.transfer_all()
        actual = s3u.get_keys(target_key_prefix)
        expected = [
            'tmp/test/hip_data_tools/adwords_to_s3/test/index_35000__35999.parquet'
        ]

        self.assertListEqual(expected, actual)
Esempio n. 3
0
    def test__should__get_correct_estimations__with__etl_get_parallel_payloads(
            self):
        # Load secrets via env vars
        execfile("../../secrets.py")

        aws_setting = AwsConnectionSettings(
            region="us-east-1",
            secrets_manager=AwsSecretsManager(),
            profile=None)
        target_bucket = os.getenv('S3_TEST_BUCKET')
        target_key_prefix = "something/test"

        adwords_settings = GoogleAdWordsConnectionSettings(
            client_id=os.getenv("adwords_client_id"),
            user_agent="Tester",
            client_customer_id=os.getenv("adwords_client_customer_id"),
            secrets_manager=GoogleAdWordsSecretsManager())

        etl_settings = AdWordsToS3Settings(
            source_query_fragment=ServiceQueryBuilder().Select('Id').OrderBy(
                'Id'),
            source_service="AdGroupAdService",
            source_service_version="v201809",
            source_connection_settings=adwords_settings,
            target_bucket=target_bucket,
            target_key_prefix=target_key_prefix,
            target_file_prefix=None,
            target_connection_settings=aws_setting)
        etl = AdWordsToS3(etl_settings)

        actual_payloads = etl.get_parallel_payloads(page_size=1000,
                                                    number_of_workers=3)
        expected_payloads = [{
            'worker': 0,
            'start_index': 0,
            'number_of_pages': 435,
            'page_size': 1000
        }, {
            'worker': 1,
            'start_index': 435000,
            'number_of_pages': 435,
            'page_size': 1000
        }, {
            'worker': 2,
            'start_index': 870000,
            'number_of_pages': 435,
            'page_size': 1000
        }]

        self.assertListEqual(expected_payloads, actual_payloads)
    def test__should__create_table__with__a_general_report(self):
        aws_setting = AwsConnectionSettings(
            region="us-east-1",
            secrets_manager=AwsSecretsManager(),
            profile=None)
        target_bucket = os.getenv('S3_TEST_BUCKET')
        target_key_prefix = "something/test"

        # Load secrets via env vars
        execfile("../../secrets.py")
        adwords_settings = GoogleAdWordsConnectionSettings(
            client_id=os.getenv("adwords_client_id"),
            user_agent="Tester",
            client_customer_id=os.getenv("adwords_client_customer_id"),
            secrets_manager=GoogleAdWordsSecretsManager())
        target_table = "test_adwords_negative_report"
        etl_settings = AdWordsReportsToAthenaSettings(
            source_query=(ReportQueryBuilder().Select(
                'AccountDescriptiveName', 'CampaignId', 'CampaignName',
                'CampaignStatus', 'Id', 'KeywordMatchType', 'Criteria').From(
                    'CAMPAIGN_NEGATIVE_KEYWORDS_PERFORMANCE_REPORT').Build()),
            source_include_zero_impressions=True,
            source_connection_settings=adwords_settings,
            target_bucket=target_bucket,
            target_key_prefix=target_key_prefix,
            target_connection_settings=aws_setting,
            target_database="dev",
            target_table=target_table,
            target_table_ddl_progress=True,
            is_partitioned_table=True,
            partition_values=[("abc", "def"), ("pqr", 123)],
            target_file_prefix="data",
            transformation_field_type_mask=None)
        etl = AdWordsReportsToAthena(etl_settings)
        etl.transfer()
        etl.create_athena_table()
        etl.add_partitions()

        au = AthenaUtil(database="dev",
                        conn=AwsConnectionManager(aws_setting),
                        output_bucket=os.environ["S3_TEST_BUCKET"])
        actual = au.run_query(query_string="""
        select * from dev.test_adwords_negative_report limit 10
        """,
                              return_result=True)
        print(actual)
        expected = 11

        self.assertEqual(expected, len(actual["ResultSet"]["Rows"]))
Esempio n. 5
0
    def test__should__transfer_correct_amount_of_files__with__one_parallel_fragment(
            self):
        # Load secrets via env vars
        execfile("../../secrets.py")
        aws_setting = AwsConnectionSettings(
            region="us-east-1",
            secrets_manager=AwsSecretsManager(),
            profile=None)
        target_bucket = os.getenv('S3_TEST_BUCKET')
        target_key_prefix = "tmp/test/hip_data_tools/adwords_to_s3/test"
        conn = AwsConnectionManager(aws_setting)
        s3u = S3Util(conn=conn, bucket=target_bucket)
        s3u.delete_recursive(target_key_prefix)
        adwords_settings = GoogleAdWordsConnectionSettings(
            client_id=os.getenv("adwords_client_id"),
            user_agent="Tester",
            client_customer_id=os.getenv("adwords_client_customer_id"),
            secrets_manager=GoogleAdWordsSecretsManager())

        etl_settings = AdWordsToS3Settings(
            source_query_fragment=ServiceQueryBuilder().Select('Id').OrderBy(
                'Id'),
            source_service="AdGroupAdService",
            source_service_version="v201809",
            source_connection_settings=adwords_settings,
            target_bucket=target_bucket,
            target_key_prefix=target_key_prefix,
            target_file_prefix=None,
            target_connection_settings=aws_setting)
        etl = AdWordsToS3(etl_settings)
        etl.build_query(start_index=0, page_size=5, num_iterations=2)

        etl.transfer_all()

        actual = s3u.get_keys(target_key_prefix)
        print(actual)
        expected = [
            'tmp/test/hip_data_tools/adwords_to_s3/test/index_0__4.parquet',
            'tmp/test/hip_data_tools/adwords_to_s3/test/index_5__9.parquet'
        ]
        self.assertListEqual(expected, actual)
Esempio n. 6
0
data = response.read()  # a `bytes` object
text = data.decode('utf-8')

ts_head = "\n".join(
    ("import {Subsystem, Type as CommandType} from '../unpi/constants';",
     "import ParameterType from './parameterType';",
     "import {MtCmd} from './tstype';", "", "const Definition: {",
     "    [s: number]: MtCmd[];", "}", ""))

py_head = "\n".join(
    ('"""', 'GENERATED BY get_definitions.py', '"""',
     "from zigpy_cc.types import Subsystem, CommandType, ParameterType", "",
     "name = 'name'", "ID = 'ID'", "request = 'request'",
     "response = 'response'", "parameterType = 'parameterType'",
     "type = 'type'", "", "Definition "))

ts_export = "export default Definition;"

text = text.replace(ts_head, py_head)
text = text.replace(ts_export, "")
text = text.replace('//', '#')
text = text.replace('    [Subsystem', '    Subsystem')
text = text.replace(']: [', ': [')

with open(target_path, "w") as text_file:
    text_file.write(text)

print("Check syntax...")
execfile(target_path)

print("Success")
    def test__should__create_table__with__geo_performance_report(self):
        aws_setting = AwsConnectionSettings(
            region="ap-southeast-2",
            secrets_manager=AwsSecretsManager(
                access_key_id_var="SOME_CUSTOM_AWS_ACCESS_KEY_ID",
                secret_access_key_var="SOME_CUSTOM_AWS_SECRET_ACCESS_KEY",
                use_session_token=True,
                aws_session_token_var="SOME_CUSTOM_AWS_SESSION_TOKEN"),
            profile=None)
        target_bucket = "test-bucket"
        target_key_prefix = "something/test"

        # Load secrets via env vars
        execfile("../../secrets.py")
        adwords_settings = GoogleAdWordsConnectionSettings(
            client_id=os.getenv("adwords_client_id"),
            user_agent="Tester",
            client_customer_id="1111111111",
            secrets_manager=GoogleAdWordsSecretsManager())
        target_table = "test_adwords_geo_performance_report"
        etl_settings = AdWordsReportsToAthenaSettings(
            source_query=(
                ReportQueryBuilder().Select(
                    # Attributes
                    'AccountDescriptiveName',
                    'CampaignId',
                    'CityCriteriaId',
                    'CountryCriteriaId',
                    'CustomerDescriptiveName',
                    'ExternalCustomerId',
                    'IsTargetingLocation',
                    'MetroCriteriaId',
                    'MostSpecificCriteriaId',
                    'RegionCriteriaId',

                    # Segments
                    'Date',

                    # Metrics
                    'Impressions',
                    'Clicks',
                    'ConversionRate',
                    'Conversions',
                    'ConversionValue',
                    'Cost',
                    'CostPerConversion').From('GEO_PERFORMANCE_REPORT').During(
                        start_date="20200601", end_date="20200701").Build()),
            source_include_zero_impressions=False,
            source_connection_settings=adwords_settings,
            target_bucket=target_bucket,
            target_key_prefix=target_key_prefix,
            target_connection_settings=aws_setting,
            target_database="dev",
            target_table=target_table,
            target_table_ddl_progress=True,
            is_partitioned_table=True,
            partition_values=[("abc", "def"), ("pqr", 123)],
            target_file_prefix="data",
            transformation_field_type_mask={
                "country__territory": np.int,
                "region": np.int,
                "most_specific_location": np.int
            })
        etl = AdWordsReportsToAthena(etl_settings)
        etl.transfer()
        etl.create_athena_table()
        etl.add_partitions()

        au = AthenaUtil(database="dev",
                        conn=AwsConnectionManager(aws_setting),
                        output_bucket=os.environ["S3_TEST_BUCKET"])
        actual = au.run_query(query_string="""
            select * from dev.test_adwords_geo_performance_report limit 10
            """,
                              return_result=True)
        print(actual)
        expected = 11

        self.assertEqual(expected, len(actual["ResultSet"]["Rows"]))
Esempio n. 8
0
for res in tweepy.Cursor(api.search,
                         q=SEARCH,
                         rpp=100,
                         count=20,
                         result_type="recent",
                         since=FROM,
                         until=TO,
                         include_entities=True,
                         lang="en").items(num):
    i += 1
    f.write(res.user.screen_name)
    f.write(' ')
    f.write('[')
    f.write(res.created_at.strftime("%d/%b/%Y:%H:%M:%S %Z"))
    f.write(']')
    f.write(" ")
    f.write('"')
    f.write(res.text.replace('\n', ''))
    f.write('"')
    f.write(" ")
    f.write(str(res.user.followers_count))
    f.write(" ")
    f.write(str(res.retweet_count))
    f.write('\n')
    if i % 50 == 0:
        print("Tweets retrieved %", ((i / num) * 100))
f.close()
print("Tweets retrieved ", i)

execfile(analyser.__file__)