def step_show_result_button(context): input_criteria = DataHelper.exclude_dictionary( DataHelper.convert_key_value_table_to_dictionary(context.table)) context.travel_insurance_searched_options = {} if 'Policy Type' in input_criteria: TravelInsurancePage(context.browser).click_and_select_policy_dropdown( input_criteria['Policy Type']) context.travel_insurance_searched_options.update( {'Policy Type': input_criteria['Policy Type']}) if 'Whos Going' in input_criteria: TravelInsurancePage( context.browser).click_and_select_whos_going_dropdown( input_criteria['Whos Going']) context.travel_insurance_searched_options.update( {'Whos Going': input_criteria['Whos Going']}) if 'Destination' in input_criteria: TravelInsurancePage( context.browser).click_and_select_destination_dropdown( input_criteria['Destination']) context.travel_insurance_searched_options.update( {'Destination': input_criteria['Destination']}) if 'Start Date' in input_criteria: start_date = datetime.strptime(input_criteria['Start Date'], '%d-%m-%Y') day = start_date.date().day TravelInsurancePage(context.browser).click_and_select_start_date(day) context.travel_insurance_searched_options.update( {'Start Date': input_criteria['Start Date']}) if 'End Date' in input_criteria: end_date = datetime.strptime(input_criteria['End Date'], '%d-%m-%Y') day = end_date.date().day TravelInsurancePage(context.browser).click_and_select_end_date(day) context.travel_insurance_searched_options.update( {'End Date': input_criteria['End Date']})
def execute(self, context): aws = AwsHook(aws_conn_id=self.aws_conn_id) s3_client = aws.get_client_type('s3', region_name=self.s3_region) s3_resource = aws.get_resource_type('s3', region_name=self.s3_region) data_left = DataHelper.read_tsv_from_s3_to_df(s3_client, self.s3_bucket, self.s3_key_in_left) self.log.info( f"Read tsv file s3://{self.s3_bucket}/{self.s3_key_in} into dataframe." ) data_right = DataHelper.read_tsv_from_s3_to_df(s3_client, self.s3_bucket, self.s3_key_in_right) self.log.info( f"Read tsv file s3://{self.s3_bucket}/{self.s3_key_in} into dataframe." ) joined_data = DataHelper.get_joined_data_from_dfs( data_left, data_right, self.left_on_column, self.right_on_column, self.suffix_name, self.output_columns) DataHelper.write_df_to_tsv_in_s3(s3_resource, joined_data, self.s3_bucket, self.s3_key_out) self.log.info( f"Wrote tsv file with joined columns {self.output_columns} dropped to s3://{self.s3_bucket}/{self.s3_key_out}." )
def execute(self, context): aws = AwsHook(aws_conn_id=self.aws_conn_id) s3_client = aws.get_client_type('s3', region_name=self.s3_region) s3_resource = aws.get_resource_type('s3', region_name=self.s3_region) data = DataHelper.read_tsv_from_s3_to_df(s3_client, self.s3_bucket, self.s3_key_in) self.log.info(f"Read tsv file s3://{self.s3_bucket}/{self.s3_key_in} into dataframe.") unstacked_data = DataHelper.unstack_df_column(data, self.id_column, self. unstack_column) DataHelper.write_df_to_tsv_in_s3(s3_resource, unstacked_data, self.s3_bucket, self.s3_key_out) self.log.info(f"Wrote tsv file with unstacked {self.unstack_column} to s3://{self.s3_bucket}/{self.s3_key_out}.")
def execute(self, context): aws = AwsHook(aws_conn_id=self.aws_conn_id) s3_client = aws.get_client_type('s3', region_name=self.s3_region) s3_resource = aws.get_resource_type('s3', region_name=self.s3_region) engagement_dfs = [] for key in DataHelper.generate_all_keys_from_s3_with_prefix(s3_client, self.s3_bucket, f"{self.engagement_type}/"): df = DataHelper.read_tsv_from_s3_to_df(s3_client, self.s3_bucket, key) self.log.info(f"Read tsv file s3://{self.s3_bucket}/{key} into dataframe.") engagement_dfs.append(df) all_engagement_df = DataHelper.combine_engagement_dfs(engagement_dfs, ['user_id', 'engaged_with_id'], lambda x: 1) DataHelper.write_df_to_tsv_in_s3(s3_resource, all_engagement_df, self.s3_bucket, self.s3_key_out) self.log.info(f"Wrote combined engagement tsv file to s3://{self.s3_bucket}/{self.engagement_type}/{self.s3_key_out}.")
def execute(self, context): aws = AwsHook(aws_conn_id=self.aws_conn_id) s3_client = aws.get_client_type('s3', region_name=self.s3_region) s3_resource = aws.get_resource_type('s3', region_name=self.s3_region) self.log.info( f"Parsing {self.activity} events from s3://{self.s3_bucket}/{self.s3_key_in}." ) with DataHelper.buffer_s3_object_as_file(s3_client, self.s3_bucket, self.s3_key_in) as f: data = DataHelper.parse_activity_json_to_df( json_file, self.activity) DataHelper.write_df_to_tsv_in_s3(s3_resource, data, self.s3_bucket, self.s3_key_out) self.log.info( f"Wrote {self.activity} eveents to s3://{self.s3_bucket}/{self.s3_key_out}." )
def execute(self, context): aws = AwsHook(aws_conn_id=self.aws_conn_id) s3_client = aws.get_client_type('s3', region_name=self.s3_region) s3_resource = aws.get_resource_type('s3', region_name=self.s3_region) data = DataHelper.read_tsv_from_s3_to_df(s3_client, self.s3_bucket, self.s3_key_in) self.log.info( f"Read tsv file s3://{self.s3_bucket}/{self.s3_key_in} into dataframe." ) n_strongest = RecommendationHelper.get_top_n_closest( data, self.n_strongest) DataHelper.write_df_to_tsv_in_s3(s3_resource, n_strongest, self.s3_bucket, self.s3_key_out) self.log.info( f"Wrote strongest connections tsv file to s3://{self.s3_bucket}/{self.s3_key_out}." )
def execute(self, context): aws = AwsHook(aws_conn_id=self.aws_conn_id) s3_client = aws.get_client_type('s3', region_name=self.s3_region) s3_resource = aws.get_resource_type('s3', region_name=self.s3_region) data_sec_deg = DataHelper.read_tsv_from_s3_to_df( s3_client, self.s3_bucket, self.s3_key_in_sec_deg) self.log.info( f"Read tsv file s3://{self.s3_bucket}/{self.s3_key_in_sec_deg} into dataframe." ) recs = RecommendationHelper.get_top_n_recommendations( data_sec_deg, self.n_recs) DataHelper.write_df_to_tsv_in_s3(s3_resource, recs, self.s3_bucket, self.s3_key_out) self.log.info( f"Wrote recommendations tsv file to s3://{self.s3_bucket}/{self.s3_key_out}." )
def execute(self, context): aws = AwsHook(aws_conn_id=self.aws_conn_id) s3_client = aws.get_client_type('s3', region_name=self.s3_region) s3_resource = aws.get_resource_type('s3', region_name=self.s3_region) data = DataHelper.read_csv_from_s3_to_df(s3_client, self.s3_bucket, self.s3_key_in) self.log.info( f"Read csv file s3://{self.s3_bucket}/{self.s3_key_in} into dataframe." ) data_with_dummies = DataHelper.get_dummy_colums(data, self.indicator_column, sep=self.sep) self.log.info( f"Created dummy fields for column {self.indicator_column}.") DataHelper.write_df_to_csv_in_s3(s3_resource, data_with_dummies, self.s3_bucket, self.s3_key_in) self.log.info( f"Wrote updated data back to s3://{self.s3_bucket}/{self.s3_key_out}." )
def execute(self, context): aws = AwsHook(aws_conn_id=self.aws_conn_id) s3_client = aws.get_client_type('s3', region_name=self.s3_region) s3_resource = aws.get_resource_type('s3', region_name=self.s3_region) data_sec_deg = DataHelper.read_tsv_from_s3_to_df( s3_client, self.s3_bucket, self.s3_key_in_sec_deg) self.log.info( f"Read tsv file s3://{self.s3_bucket}/{self.s3_key_in_sec_deg} into dataframe." ) data_existing_conn = DataHelper.read_tsv_from_s3_to_df( s3_client, self.s3_bucket, self.s3_key_existing_conn) self.log.info( f"Read tsv file s3://{self.s3_bucket}/{self.s3_key_existing_conn} into dataframe." ) sec_deg_conn_valid = RecommendationHelper.remove_invalid_recommendations( data_sec_deg, data_existing_conn, conn_type) DataHelper.write_df_to_tsv_in_s3(s3_resource, sec_deg_conn_valid, self.s3_bucket, self.s3_key_out) self.log.info( f"Wrote valid second degree connections tsv file to s3://{self.s3_bucket}/{self.s3_key_out}." )
def execute(self, context): aws = AwsHook(aws_conn_id=self.aws_conn_id) s3_client = aws.get_client_type('s3', region_name=self.s3_region) postgres = PostgresHook(postgres_conn_id=self.postgres_conn_id) self.log.info( f"Loading file s3://{self.s3_bucket}/{self.s3_key} into table {self.table}." ) with DataHelper.buffer_s3_object_as_file(s3_client, self.s3_bucket, self.s3_key) as f: postgres.bulk_load(self.table, f) self.log.info( f"s3://{self.s3_bucket}/{self.s3_key} loaded into table {self.table} sucesfully." )
def create_metaphor_loaders( argument_parser: ArgumentParserHelper, glove_vectors: Vectors): metaphor_train_dataset, metaphor_validation_dataset, metaphor_test_dataset = MetaphorLoader.get_metaphor_datasets( metaphor_dataset_folder=argument_parser.metaphor_dataset_folder, concat_glove=argument_parser.concat_glove, glove_vectors=glove_vectors, elmo_model=argument_parser.elmo_model, lowercase_sentences=argument_parser.lowercase, tokenize_sentences=argument_parser.tokenize, only_news=argument_parser.only_news) pos_weight = metaphor_train_dataset.pos_weight metaphor_train_dataloader, metaphor_validation_dataloader, _ = DataHelper.create_dataloaders( train_dataset=metaphor_train_dataset, validation_dataset=metaphor_validation_dataset, test_dataset=metaphor_test_dataset, batch_size=argument_parser.metaphor_batch_size, shuffle=True) return metaphor_train_dataloader, metaphor_validation_dataloader, pos_weight
def step_verify_3_card_brands(context, result_list): actual_result = TravelInsuranceResultPage(context.browser).get_all_card_brand_names()[0:3] expected_result = DataHelper.get_list_from_string_list(result_list) assert_that(actual_result, equal_to(expected_result), 'Verify top 3 card brand names')