def test__should__get_correct_estimations__with__etl_get_parallel_payloads( self): aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "something/test" # Load secrets via env vars execfile("../../secrets.py") adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) target_table = "test_adwords_to_athena_table_creation" etl_settings = AdWordsToAthenaSettings( source_query_fragment=ServiceQueryBuilder().Select('Id').OrderBy( 'Id'), source_service="AdGroupAdService", source_service_version="v201809", source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_connection_settings=aws_setting, target_database="dev", target_table=target_table, target_table_ddl_progress=True, is_partitioned_table=True, partition_values=[("abc", "def"), ("pqr", 123)]) etl = AdWordsToAthena(etl_settings) actual_payloads = etl.get_parallel_payloads(page_size=1000, number_of_workers=3) expected_payloads = [{ 'number_of_pages': 393, 'page_size': 1000, 'start_index': 0, 'worker': 0 }, { 'number_of_pages': 393, 'page_size': 1000, 'start_index': 393000, 'worker': 1 }, { 'number_of_pages': 393, 'page_size': 1000, 'start_index': 786000, 'worker': 2 }] self.assertListEqual(expected_payloads, actual_payloads) etl.create_athena_table() conn = AwsConnectionManager(aws_setting) au = AthenaUtil("dev", conn) actual = au.get_glue_table_metadata(target_table) print(actual)
def test__should__create_s3_file_for_the_given_indices(self): # Load secrets via env vars execfile("../../secrets.py") aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "something/test" conn = AwsConnectionManager(aws_setting) s3u = S3Util(conn=conn, bucket=target_bucket) s3u.delete_recursive(target_key_prefix) adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) adword_to_s3_util = AdWordsToS3(settings=AdWordsToS3Settings( source_query_fragment=ServiceQueryBuilder().Select( # Attributes 'BaseAdGroupId', 'Id', 'CampaignId', 'CampaignName', 'Name', 'Status', 'ContentBidCriterionTypeGroup', 'BaseCampaignId', 'TrackingUrlTemplate', 'FinalUrlSuffix', 'UrlCustomParameters', 'AdGroupType').OrderBy('Id'), source_service="AdGroupService", source_service_version="v201809", source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_file_prefix=None, target_connection_settings=aws_setting)) adword_to_s3_util.build_query(start_index=35000, page_size=1000, num_iterations=1) adword_to_s3_util.transfer_all() actual = s3u.get_keys(target_key_prefix) expected = [ 'tmp/test/hip_data_tools/adwords_to_s3/test/index_35000__35999.parquet' ] self.assertListEqual(expected, actual)
def test__should__get_correct_estimations__with__etl_get_parallel_payloads( self): # Load secrets via env vars execfile("../../secrets.py") aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "something/test" adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) etl_settings = AdWordsToS3Settings( source_query_fragment=ServiceQueryBuilder().Select('Id').OrderBy( 'Id'), source_service="AdGroupAdService", source_service_version="v201809", source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_file_prefix=None, target_connection_settings=aws_setting) etl = AdWordsToS3(etl_settings) actual_payloads = etl.get_parallel_payloads(page_size=1000, number_of_workers=3) expected_payloads = [{ 'worker': 0, 'start_index': 0, 'number_of_pages': 435, 'page_size': 1000 }, { 'worker': 1, 'start_index': 435000, 'number_of_pages': 435, 'page_size': 1000 }, { 'worker': 2, 'start_index': 870000, 'number_of_pages': 435, 'page_size': 1000 }] self.assertListEqual(expected_payloads, actual_payloads)
def test__should__create_table__with__a_general_report(self): aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "something/test" # Load secrets via env vars execfile("../../secrets.py") adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) target_table = "test_adwords_negative_report" etl_settings = AdWordsReportsToAthenaSettings( source_query=(ReportQueryBuilder().Select( 'AccountDescriptiveName', 'CampaignId', 'CampaignName', 'CampaignStatus', 'Id', 'KeywordMatchType', 'Criteria').From( 'CAMPAIGN_NEGATIVE_KEYWORDS_PERFORMANCE_REPORT').Build()), source_include_zero_impressions=True, source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_connection_settings=aws_setting, target_database="dev", target_table=target_table, target_table_ddl_progress=True, is_partitioned_table=True, partition_values=[("abc", "def"), ("pqr", 123)], target_file_prefix="data", transformation_field_type_mask=None) etl = AdWordsReportsToAthena(etl_settings) etl.transfer() etl.create_athena_table() etl.add_partitions() au = AthenaUtil(database="dev", conn=AwsConnectionManager(aws_setting), output_bucket=os.environ["S3_TEST_BUCKET"]) actual = au.run_query(query_string=""" select * from dev.test_adwords_negative_report limit 10 """, return_result=True) print(actual) expected = 11 self.assertEqual(expected, len(actual["ResultSet"]["Rows"]))
def test__should__transfer_correct_amount_of_files__with__one_parallel_fragment( self): # Load secrets via env vars execfile("../../secrets.py") aws_setting = AwsConnectionSettings( region="us-east-1", secrets_manager=AwsSecretsManager(), profile=None) target_bucket = os.getenv('S3_TEST_BUCKET') target_key_prefix = "tmp/test/hip_data_tools/adwords_to_s3/test" conn = AwsConnectionManager(aws_setting) s3u = S3Util(conn=conn, bucket=target_bucket) s3u.delete_recursive(target_key_prefix) adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id=os.getenv("adwords_client_customer_id"), secrets_manager=GoogleAdWordsSecretsManager()) etl_settings = AdWordsToS3Settings( source_query_fragment=ServiceQueryBuilder().Select('Id').OrderBy( 'Id'), source_service="AdGroupAdService", source_service_version="v201809", source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_file_prefix=None, target_connection_settings=aws_setting) etl = AdWordsToS3(etl_settings) etl.build_query(start_index=0, page_size=5, num_iterations=2) etl.transfer_all() actual = s3u.get_keys(target_key_prefix) print(actual) expected = [ 'tmp/test/hip_data_tools/adwords_to_s3/test/index_0__4.parquet', 'tmp/test/hip_data_tools/adwords_to_s3/test/index_5__9.parquet' ] self.assertListEqual(expected, actual)
data = response.read() # a `bytes` object text = data.decode('utf-8') ts_head = "\n".join( ("import {Subsystem, Type as CommandType} from '../unpi/constants';", "import ParameterType from './parameterType';", "import {MtCmd} from './tstype';", "", "const Definition: {", " [s: number]: MtCmd[];", "}", "")) py_head = "\n".join( ('"""', 'GENERATED BY get_definitions.py', '"""', "from zigpy_cc.types import Subsystem, CommandType, ParameterType", "", "name = 'name'", "ID = 'ID'", "request = 'request'", "response = 'response'", "parameterType = 'parameterType'", "type = 'type'", "", "Definition ")) ts_export = "export default Definition;" text = text.replace(ts_head, py_head) text = text.replace(ts_export, "") text = text.replace('//', '#') text = text.replace(' [Subsystem', ' Subsystem') text = text.replace(']: [', ': [') with open(target_path, "w") as text_file: text_file.write(text) print("Check syntax...") execfile(target_path) print("Success")
def test__should__create_table__with__geo_performance_report(self): aws_setting = AwsConnectionSettings( region="ap-southeast-2", secrets_manager=AwsSecretsManager( access_key_id_var="SOME_CUSTOM_AWS_ACCESS_KEY_ID", secret_access_key_var="SOME_CUSTOM_AWS_SECRET_ACCESS_KEY", use_session_token=True, aws_session_token_var="SOME_CUSTOM_AWS_SESSION_TOKEN"), profile=None) target_bucket = "test-bucket" target_key_prefix = "something/test" # Load secrets via env vars execfile("../../secrets.py") adwords_settings = GoogleAdWordsConnectionSettings( client_id=os.getenv("adwords_client_id"), user_agent="Tester", client_customer_id="1111111111", secrets_manager=GoogleAdWordsSecretsManager()) target_table = "test_adwords_geo_performance_report" etl_settings = AdWordsReportsToAthenaSettings( source_query=( ReportQueryBuilder().Select( # Attributes 'AccountDescriptiveName', 'CampaignId', 'CityCriteriaId', 'CountryCriteriaId', 'CustomerDescriptiveName', 'ExternalCustomerId', 'IsTargetingLocation', 'MetroCriteriaId', 'MostSpecificCriteriaId', 'RegionCriteriaId', # Segments 'Date', # Metrics 'Impressions', 'Clicks', 'ConversionRate', 'Conversions', 'ConversionValue', 'Cost', 'CostPerConversion').From('GEO_PERFORMANCE_REPORT').During( start_date="20200601", end_date="20200701").Build()), source_include_zero_impressions=False, source_connection_settings=adwords_settings, target_bucket=target_bucket, target_key_prefix=target_key_prefix, target_connection_settings=aws_setting, target_database="dev", target_table=target_table, target_table_ddl_progress=True, is_partitioned_table=True, partition_values=[("abc", "def"), ("pqr", 123)], target_file_prefix="data", transformation_field_type_mask={ "country__territory": np.int, "region": np.int, "most_specific_location": np.int }) etl = AdWordsReportsToAthena(etl_settings) etl.transfer() etl.create_athena_table() etl.add_partitions() au = AthenaUtil(database="dev", conn=AwsConnectionManager(aws_setting), output_bucket=os.environ["S3_TEST_BUCKET"]) actual = au.run_query(query_string=""" select * from dev.test_adwords_geo_performance_report limit 10 """, return_result=True) print(actual) expected = 11 self.assertEqual(expected, len(actual["ResultSet"]["Rows"]))
for res in tweepy.Cursor(api.search, q=SEARCH, rpp=100, count=20, result_type="recent", since=FROM, until=TO, include_entities=True, lang="en").items(num): i += 1 f.write(res.user.screen_name) f.write(' ') f.write('[') f.write(res.created_at.strftime("%d/%b/%Y:%H:%M:%S %Z")) f.write(']') f.write(" ") f.write('"') f.write(res.text.replace('\n', '')) f.write('"') f.write(" ") f.write(str(res.user.followers_count)) f.write(" ") f.write(str(res.retweet_count)) f.write('\n') if i % 50 == 0: print("Tweets retrieved %", ((i / num) * 100)) f.close() print("Tweets retrieved ", i) execfile(analyser.__file__)