def step_with(name, start, stop, status): return has_properties(name=name, title=name, attrib=all_of( has_entry('start', has_float(greater_than_or_equal_to(start))), has_entry('stop', has_float(less_than_or_equal_to(stop))), has_entry('status', status)))
def test_hourly_team_score_it(self): state_verifier = PipelineStateMatcher(PipelineState.DONE) query = ('SELECT COUNT(*) FROM [%s:%s.%s]' % (self.project, self.dataset.name, self.OUTPUT_TABLE)) bigquery_verifier = BigqueryMatcher(self.project, query, self.DEFAULT_EXPECTED_CHECKSUM) extra_opts = {'input': self.DEFAULT_INPUT_FILE, 'dataset': self.dataset.name, 'window_duration': 1, 'on_success_matcher': all_of(state_verifier, bigquery_verifier)} # Register clean up before pipeline execution # Note that actual execution happens in reverse order. self.addCleanup(self._cleanup_dataset) self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name, self.OUTPUT_TABLE) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. hourly_team_score.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def test_multiple_destinations_transform(self): output_table_1 = '%s%s' % (self.output_table, 1) output_table_2 = '%s%s' % (self.output_table, 2) output_table_3 = '%s%s' % (self.output_table, 3) output_table_4 = '%s%s' % (self.output_table, 4) pipeline_verifiers = [ BigqueryFullResultMatcher( project=self.project, query="SELECT * FROM %s" % output_table_1, data=[(d['name'], d['language']) for d in _ELEMENTS if 'language' in d]), BigqueryFullResultMatcher( project=self.project, query="SELECT * FROM %s" % output_table_2, data=[(d['name'], d['foundation']) for d in _ELEMENTS if 'foundation' in d]), BigqueryFullResultMatcher( project=self.project, query="SELECT * FROM %s" % output_table_3, data=[(d['name'], d['language']) for d in _ELEMENTS if 'language' in d]), BigqueryFullResultMatcher( project=self.project, query="SELECT * FROM %s" % output_table_4, data=[(d['name'], d['foundation']) for d in _ELEMENTS if 'foundation' in d])] args = self.test_pipeline.get_full_options_as_args( on_success_matcher=all_of(*pipeline_verifiers)) with beam.Pipeline(argv=args) as p: input = p | beam.Create(_ELEMENTS) # Get all input in same machine input = (input | beam.Map(lambda x: (None, x)) | beam.GroupByKey() | beam.FlatMap(lambda elm: elm[1])) _ = (input | "WriteWithMultipleDestsFreely" >> bigquery.WriteToBigQuery( table=lambda x: (output_table_1 if 'language' in x else output_table_2), create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED, write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY)) _ = (input | "WriteWithMultipleDests" >> bigquery.WriteToBigQuery( table=lambda x: (output_table_3 if 'language' in x else output_table_4), create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED, write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY, max_file_size=20, max_files_per_bundle=-1))
def test_bigquery_tornadoes_it(self): test_pipeline = TestPipeline(is_integration_test=True) # Set extra options to the pipeline for test purpose project = test_pipeline.get_option('project') dataset = 'BigQueryTornadoesIT' table = 'monthly_tornadoes_%s' % int(round(time.time() * 1000)) output_table = '.'.join([dataset, table]) query = 'SELECT month, tornado_count FROM `%s`' % output_table pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher( project=project, query=query, checksum=self.DEFAULT_CHECKSUM)] extra_opts = {'output': output_table, 'on_success_matcher': all_of(*pipeline_verifiers)} # Register cleanup before pipeline execution. # Note that actual execution happens in reverse order. self.addCleanup(utils.delete_bq_table, project, dataset, table) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. bigquery_tornadoes.run( test_pipeline.get_full_options_as_args(**extra_opts))
def run_bigquery_io_read_pipeline(self, input_size): test_pipeline = TestPipeline(is_integration_test=True) pipeline_verifiers = [PipelineStateMatcher(),] extra_opts = {'input_table': self.DEFAULT_DATASET + "." + self.DEFAULT_TABLE_PREFIX + input_size, 'num_records': self.NUM_RECORDS[input_size], 'on_success_matcher': all_of(*pipeline_verifiers)} bigquery_io_read_pipeline.run(test_pipeline.get_full_options_as_args( **extra_opts))
def test_many_elements(): Box = xmlfied('box', foos=Many(Element(name='foo'))) box = Box(foos=['a', 'b', 'c']) assert_that(etree.tostring(box.toxml()), all_of( string_contains_in_order('<box>', '<foos>', '<foo>', 'a', '</foo>', '</foos>', '</box>'), string_contains_in_order('<box>', '<foos>', '<foo>', 'b', '</foo>', '</foos>', '</box>'), string_contains_in_order('<box>', '<foos>', '<foo>', 'c', '</foo>', '</foos>', '</box>'), ))
def test_step_attach(timed_report_for): report, start, stop = timed_report_for(""" import pytest def test_ololo_pewpew(): with pytest.allure.step(title='withattach'): pytest.allure.attach('myattach', 'abcdef') """) assert_that(report.findall('.//test-case/steps/step'), contains(all_of(step_with('withattach', start, stop, Status.PASSED), has_property('attachments', has_property('attachment', has_entry('title', 'myattach'))))))
def test_one_job_fails_all_jobs_fail(self): # If one of the import jobs fails, then other jobs must not be performed. # This is to avoid reinsertion of some records when a pipeline fails and # is rerun. output_table_1 = '%s%s' % (self.output_table, 1) output_table_2 = '%s%s' % (self.output_table, 2) self.bigquery_client.get_or_create_table( self.project, self.dataset_id, output_table_1.split('.')[1], bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA), None, None) self.bigquery_client.get_or_create_table( self.project, self.dataset_id, output_table_2.split('.')[1], bigquery_tools.parse_table_schema_from_json( self.BIG_QUERY_SCHEMA_2), None, None) pipeline_verifiers = [ BigqueryFullResultMatcher(project=self.project, query="SELECT name, language FROM %s" % output_table_1, data=[]), BigqueryFullResultMatcher(project=self.project, query="SELECT name, foundation FROM %s" % output_table_2, data=[]) ] args = self.test_pipeline.get_full_options_as_args( experiments='use_beam_bq_sink') with self.assertRaises(Exception): with beam.Pipeline(argv=args) as p: input = p | beam.Create(_ELEMENTS) input2 = p | "Broken record" >> beam.Create( ['language_broken_record']) input = (input, input2) | beam.Flatten() _ = (input | "WriteWithMultipleDests" >> bigquery.WriteToBigQuery( table=lambda x: (output_table_1 if 'language' in x else output_table_2), create_disposition=( beam.io.BigQueryDisposition.CREATE_IF_NEEDED), write_disposition=beam.io.BigQueryDisposition. WRITE_APPEND)) hamcrest_assert(p, all_of(*pipeline_verifiers))
def test_leader_board_it(self): state_verifier = PipelineStateMatcher(PipelineState.RUNNING) success_condition = 'total_score=5000 LIMIT 1' users_query = ('SELECT total_score FROM [%s:%s.%s] ' 'WHERE %s' % (self.project, self.dataset.name, self.OUTPUT_TABLE_USERS, success_condition)) bq_users_verifier = BigqueryMatcher(self.project, users_query, self.DEFAULT_EXPECTED_CHECKSUM) teams_query = ('SELECT total_score FROM [%s:%s.%s] ' 'WHERE %s' % (self.project, self.dataset.name, self.OUTPUT_TABLE_TEAMS, success_condition)) bq_teams_verifier = BigqueryMatcher(self.project, teams_query, self.DEFAULT_EXPECTED_CHECKSUM) extra_opts = { 'subscription': self.input_sub.full_name, 'dataset': self.dataset.name, 'topic': self.input_topic.full_name, 'team_window_duration': 1, 'wait_until_finish_duration': self.WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, bq_users_verifier, bq_teams_verifier) } # Register cleanup before pipeline execution. # Note that actual execution happens in reverse order. self.addCleanup(self._cleanup_pubsub) self.addCleanup(self._cleanup_dataset) self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name, self.OUTPUT_TABLE_USERS) self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name, self.OUTPUT_TABLE_TEAMS) # Generate input data and inject to PubSub. test_utils.wait_for_subscriptions_created( [self.input_topic, self.input_sub]) self._inject_pubsub_game_events(self.input_topic, self.DEFAULT_INPUT_COUNT) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. leader_board.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def testNoMatchIfArgumentFailsToSatisfyAllOfManyOtherMatchers(self): self.assert_does_not_match( "matcher in the middle", all_of( equal_to("good"), equal_to("good"), equal_to("good"), equal_to("bad"), equal_to("good"), equal_to("good"), ), "good", )
def test_cyrillic_exc(report_for): report = report_for(u""" # -*- coding: utf-8 -*- def test_foo(): raise Exception(u'русские буквы') """) assert_that(report.findall('.//failure'), contains( all_of(has_property('message', u'Exception: русские буквы'), has_property('stack-trace', u'''def test_foo(): > raise Exception(u'русские буквы') E Exception: русские буквы test_cyrillic_exc.py:3: Exception'''))))
def run_datastore_write(self, limit=None): test_pipeline = TestPipeline(is_integration_test=True) current_time = datetime.now().strftime("%m%d%H%M%S") seed = random.randint(0, 100000) kind = 'testkind%s%d' % (current_time, seed) pipeline_verifiers = [PipelineStateMatcher()] extra_opts = {'kind': kind, 'num_entities': self.NUM_ENTITIES, 'on_success_matcher': all_of(*pipeline_verifiers)} if limit is not None: extra_opts['limit'] = limit datastore_write_it_pipeline.run(test_pipeline.get_full_options_as_args( **extra_opts))
def test_unicode_labels(report_for): report = report_for(u""" # -*- coding: utf-8 -*- import allure @allure.feature(u'русские буквы') @allure.story(u'еще русские буквы') def test_a(): pass """) assert_that(report, all_of( has_label('test_a', 'feature', u'русские буквы'), has_label('test_a', 'story', u'еще русские буквы')))
def _test_streaming(self, with_attributes): """Runs IT pipeline with message verifier. Args: with_attributes: False - Reads and writes message data only. True - Reads and writes message data and attributes. Also verifies id_label and timestamp_attribute features. """ # Set on_success_matcher to verify pipeline state and pubsub output. These # verifications run on a (remote) worker. # Expect the state to be RUNNING since a streaming pipeline is usually # never DONE. The test runner will cancel the pipeline after verification. state_verifier = PipelineStateMatcher(PipelineState.RUNNING) expected_messages = self.EXPECTED_OUTPUT_MESSAGES[self.runner_name] if not with_attributes: expected_messages = [ pubsub_msg.data.decode('utf-8') for pubsub_msg in expected_messages ] if self.runner_name == 'TestDirectRunner': strip_attributes = None else: strip_attributes = [self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE] pubsub_msg_verifier = PubSubMessageMatcher( self.project, self.output_sub.name, expected_messages, timeout=MESSAGE_MATCHER_TIMEOUT_S, with_attributes=with_attributes, strip_attributes=strip_attributes) extra_opts = { 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) } # Generate input data and inject to PubSub. for msg in self.INPUT_MESSAGES[self.runner_name]: self.pub_client.publish(self.input_topic.name, msg.data, **msg.attributes) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. pubsub_it_pipeline.run_pipeline( argv=self.test_pipeline.get_full_options_as_args(**extra_opts), with_attributes=with_attributes, id_label=self.ID_LABEL, timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
def test_big_query_standard_sql(self): verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED) pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher( project=self.project, query=verify_query, checksum=expected_checksum)] extra_opts = {'query': STANDARD_QUERY, 'output': self.output_table, 'output_schema': DIALECT_OUTPUT_SCHEMA, 'use_standard_sql': True, 'on_success_matcher': all_of(*pipeline_verifiers)} options = self.test_pipeline.get_full_options_as_args(**extra_opts) big_query_query_to_table_pipeline.run_bq_pipeline(options)
def test_nested_steps(timed_report_for): report, start, stop = timed_report_for(""" import pytest def test_ololo_pewpew(): with pytest.allure.step(title='outer'): with pytest.allure.step(title='inner'): assert False """) assert_that(report.findall('.//test-case/steps/step'), contains(all_of(step_with('outer', start, stop, Status.FAILED), has_property('steps', has_property('step', step_with('inner', start, stop, Status.FAILED))))))
def test_leader_board_it(self): state_verifier = PipelineStateMatcher(PipelineState.RUNNING) success_condition = 'total_score=5000 LIMIT 1' users_query = ('SELECT total_score FROM [%s:%s.%s] ' 'WHERE %s' % (self.project, self.dataset.name, self.OUTPUT_TABLE_USERS, success_condition)) bq_users_verifier = BigqueryMatcher(self.project, users_query, self.DEFAULT_EXPECTED_CHECKSUM) teams_query = ('SELECT total_score FROM [%s:%s.%s] ' 'WHERE %s' % (self.project, self.dataset.name, self.OUTPUT_TABLE_TEAMS, success_condition)) bq_teams_verifier = BigqueryMatcher(self.project, teams_query, self.DEFAULT_EXPECTED_CHECKSUM) extra_opts = {'subscription': self.input_sub.full_name, 'dataset': self.dataset.name, 'topic': self.input_topic.full_name, 'team_window_duration': 1, 'wait_until_finish_duration': self.WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, bq_users_verifier, bq_teams_verifier)} # Register cleanup before pipeline execution. # Note that actual execution happens in reverse order. self.addCleanup(self._cleanup_pubsub) self.addCleanup(self._cleanup_dataset) self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name, self.OUTPUT_TABLE_USERS) self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name, self.OUTPUT_TABLE_TEAMS) # Generate input data and inject to PubSub. test_utils.wait_for_subscriptions_created([self.input_topic, self.input_sub]) self._inject_pubsub_game_events(self.input_topic, self.DEFAULT_INPUT_COUNT) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. leader_board.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def test_add_environment_in_testcase(report_for, result, environment_xml): report_for(""" import pytest import allure def test_dummy(): allure.environment(foo='bar') assert %s """ % result) assert_that( environment_xml().findall('.//parameter')[0], all_of(has_property('name', 'foo'), has_property('key', 'foo'), has_property('value', 'bar')))
def test_many_elements(): Box = xmlfied('box', foos=WrappedMany(Element(name='foo'))) box = Box(foos=['a', 'b', 'c']) assert_that( etree.tostring(box.toxml()), all_of( string_contains_in_order('<box>', '<foos>', '<foo>', 'a', '</foo>', '</foos>', '</box>'), string_contains_in_order('<box>', '<foos>', '<foo>', 'b', '</foo>', '</foos>', '</box>'), string_contains_in_order('<box>', '<foos>', '<foo>', 'c', '</foo>', '</foos>', '</box>')))
def _run_wordcount_it(self, run_wordcount, **opts): test_pipeline = TestPipeline(is_integration_test=True) extra_opts = {} # Set extra options to the pipeline for test purpose test_output = '/'.join([ test_pipeline.get_option('output'), str(int(time.time() * 1000)), 'results' ]) extra_opts['output'] = test_output test_input = test_pipeline.get_option('input') if test_input: extra_opts['input'] = test_input arg_sleep_secs = test_pipeline.get_option('sleep_secs') sleep_secs = int( arg_sleep_secs) if arg_sleep_secs is not None else None expect_checksum = (test_pipeline.get_option('expect_checksum') or self.DEFAULT_CHECKSUM) pipeline_verifiers = [ PipelineStateMatcher(), FileChecksumMatcher(test_output + '*-of-*', expect_checksum, sleep_secs) ] extra_opts['on_success_matcher'] = all_of(*pipeline_verifiers) extra_opts.update(opts) # Register clean up before pipeline execution self.addCleanup(delete_files, [test_output + '*']) publish_to_bq = bool( test_pipeline.get_option('publish_to_big_query') or False) # Start measure time for performance test start_time = time.time() # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. run_wordcount( test_pipeline.get_full_options_as_args(**extra_opts), save_main_session=False, ) end_time = time.time() run_time = end_time - start_time if publish_to_bq: self._publish_metrics(test_pipeline, run_time)
def test_streaming_wordcount_it(self): # Set extra options to the pipeline for test purpose pipeline_verifiers = [PipelineStateMatcher(PipelineState.RUNNING)] extra_opts = {'input_sub': self.input_sub.full_name, 'output_topic': self.output_topic.full_name, 'on_success_matcher': all_of(*pipeline_verifiers)} # Generate input data and inject to PubSub. test_utils.wait_for_subscriptions_created([self.input_sub]) self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. streaming_wordcount.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def test_big_query_new_types(self): expected_checksum = test_utils.compute_hash(NEW_TYPES_OUTPUT_EXPECTED) verify_query = NEW_TYPES_OUTPUT_VERIFY_QUERY % self.output_table pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher( project=self.project, query=verify_query, checksum=expected_checksum)] self._setup_new_types_env() extra_opts = { 'query': NEW_TYPES_QUERY % (self.dataset_id, NEW_TYPES_INPUT_TABLE), 'output': self.output_table, 'output_schema': NEW_TYPES_OUTPUT_SCHEMA, 'use_standard_sql': False, 'on_success_matcher': all_of(*pipeline_verifiers)} options = self.test_pipeline.get_full_options_as_args(**extra_opts) big_query_query_to_table_pipeline.run_bq_pipeline(options)
def test_run_example_with_setup_file(self): pipeline = TestPipeline(is_integration_test=True) coordinate_output = FileSystems.join( pipeline.get_option('output'), 'juliaset-{}'.format(str(uuid.uuid4())), 'coordinates.txt') extra_args = { 'coordinate_output': coordinate_output, 'grid_size': self.GRID_SIZE, 'setup_file': os.path.normpath( os.path.join(os.path.dirname(__file__), '..', 'setup.py')), 'on_success_matcher': all_of(PipelineStateMatcher(PipelineState.DONE)), } args = pipeline.get_full_options_as_args(**extra_args) juliaset.run(args)
def test_big_query_legacy_sql(self): verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED) pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher( project=self.project, query=verify_query, checksum=expected_checksum)] extra_opts = {'query': LEGACY_QUERY, 'output': self.output_table, 'output_schema': DIALECT_OUTPUT_SCHEMA, 'use_standard_sql': False, 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION_MS, 'on_success_matcher': all_of(*pipeline_verifiers)} options = self.test_pipeline.get_full_options_as_args(**extra_opts) big_query_query_to_table_pipeline.run_bq_pipeline(options)
def run_bigquery_io_read_pipeline(self, input_size): test_pipeline = TestPipeline(is_integration_test=True) pipeline_verifiers = [ PipelineStateMatcher(), ] extra_opts = { 'input_table': self.DEFAULT_DATASET + "." + self.DEFAULT_TABLE_PREFIX + input_size, 'num_records': self.NUM_RECORDS[input_size], 'on_success_matcher': all_of(*pipeline_verifiers) } bigquery_io_read_pipeline.run( test_pipeline.get_full_options_as_args(**extra_opts))
def test_wordcount_it(self): test_pipeline = TestPipeline(is_integration_test=True) # Set extra options to the pipeline for test purpose output = '/'.join([test_pipeline.get_option('output'), test_pipeline.get_option('job_name'), 'results']) pipeline_verifiers = [PipelineStateMatcher(), FileChecksumMatcher(output + '*-of-*', self.DEFAULT_CHECKSUM)] extra_opts = {'output': output, 'on_success_matcher': all_of(*pipeline_verifiers)} # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. wordcount.run(test_pipeline.get_full_options_as_args(**extra_opts))
def test_many_nested(): Item = xmlfied('item', value=Element()) Box = xmlfied('box', items=WrappedMany(Nested())) box = Box(items=[]) box.items.append(Item('a')) box.items.append(Item('a')) box.items.append(Item('a')) assert_that( etree.tostring(box.toxml()), all_of( string_contains_in_order('<box>', '<items>', '<item>', 'a', '</item>', '<item>', 'a', '</item>', '<item>', 'a', '</item>', '</items>', '</box>'), ))
def test_one_job_fails_all_jobs_fail(self): # If one of the import jobs fails, then other jobs must not be performed. # This is to avoid reinsertion of some records when a pipeline fails and # is rerun. output_table_1 = '%s%s' % (self.output_table, 1) output_table_2 = '%s%s' % (self.output_table, 2) self.bigquery_client.get_or_create_table( self.project, self.dataset_id, output_table_1.split('.')[1], bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA), None, None) self.bigquery_client.get_or_create_table( self.project, self.dataset_id, output_table_2.split('.')[1], bigquery_tools.parse_table_schema_from_json(self.BIG_QUERY_SCHEMA_2), None, None) pipeline_verifiers = [ BigqueryFullResultMatcher( project=self.project, query="SELECT * FROM %s" % output_table_1, data=[]), BigqueryFullResultMatcher( project=self.project, query="SELECT * FROM %s" % output_table_2, data=[])] args = self.test_pipeline.get_full_options_as_args( experiments='use_beam_bq_sink') with self.assertRaises(Exception): with beam.Pipeline(argv=args) as p: input = p | beam.Create(_ELEMENTS) input2 = p | "Broken record" >> beam.Create(['language_broken_record']) input = (input, input2) | beam.Flatten() _ = (input | "WriteWithMultipleDests" >> bigquery.WriteToBigQuery( table=lambda x: (output_table_1 if 'language' in x else output_table_2), create_disposition=( beam.io.BigQueryDisposition.CREATE_IF_NEEDED), write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND)) hamcrest_assert(p, all_of(*pipeline_verifiers))
def test_big_query_legacy_sql(self): verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED) pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher( project=self.project, query=verify_query, checksum=expected_checksum)] gs_location = 'gs://temp-storage-for-upload-tests/%s' % self.output_table extra_opts = {'query': LEGACY_QUERY, 'output': self.output_table, 'bq_temp_location': gs_location, 'output_schema': DIALECT_OUTPUT_SCHEMA, 'use_standard_sql': False, 'on_success_matcher': all_of(*pipeline_verifiers)} options = self.test_pipeline.get_full_options_as_args(**extra_opts) big_query_query_to_table_pipeline.run_bq_pipeline(options)
def test_bigquery_side_input_it(self): state_verifier = PipelineStateMatcher(PipelineState.DONE) NUM_GROUPS = 3 extra_opts = { 'output': self.output, 'num_groups': str(NUM_GROUPS), 'on_success_matcher': all_of(state_verifier) } # Register clean up before pipeline execution self.addCleanup(delete_files, [self.output + '*']) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. bigquery_side_input.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def _test_streaming(self, with_attributes): """Runs IT pipeline with message verifier. Args: with_attributes: False - Reads and writes message data only. True - Reads and writes message data and attributes. Also verifies id_label and timestamp_attribute features. """ # Set on_success_matcher to verify pipeline state and pubsub output. These # verifications run on a (remote) worker. # Expect the state to be RUNNING since a streaming pipeline is usually # never DONE. The test runner will cancel the pipeline after verification. state_verifier = PipelineStateMatcher(PipelineState.RUNNING) expected_messages = self.EXPECTED_OUTPUT_MESSAGES[self.runner_name] if not with_attributes: expected_messages = [pubsub_msg.data.decode('utf-8') for pubsub_msg in expected_messages] if self.runner_name == 'TestDirectRunner': strip_attributes = None else: strip_attributes = [self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE] pubsub_msg_verifier = PubSubMessageMatcher( self.project, self.output_sub.name, expected_messages, timeout=MESSAGE_MATCHER_TIMEOUT_S, with_attributes=with_attributes, strip_attributes=strip_attributes) extra_opts = {'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)} # Generate input data and inject to PubSub. for msg in self.INPUT_MESSAGES[self.runner_name]: self.pub_client.publish(self.input_topic.name, msg.data, **msg.attributes) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. pubsub_it_pipeline.run_pipeline( argv=self.test_pipeline.get_full_options_as_args(**extra_opts), with_attributes=with_attributes, id_label=self.ID_LABEL, timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
def test_step_attach(timed_report_for): report, start, stop = timed_report_for(""" import pytest def test_ololo_pewpew(): with pytest.allure.step(title='withattach'): pytest.allure.attach('myattach', 'abcdef') """) assert_that( report.findall('.//test-case/steps/step'), contains( all_of( step_with('withattach', start, stop, Status.PASSED), has_property( 'attachments', has_property('attachment', has_entry('title', 'myattach'))))))
def test_user_score_it(self): state_verifier = PipelineStateMatcher(PipelineState.DONE) file_verifier = FileChecksumMatcher(self.output + '*-of-*', self.DEFAULT_EXPECTED_CHECKSUM) extra_opts = {'input': self.DEFAULT_INPUT_FILE, 'output': self.output + '/user-score', 'on_success_matcher': all_of(state_verifier, file_verifier)} # Register clean up before pipeline execution self.addCleanup(delete_files, [self.output + '*']) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. user_score.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def test_many_nested(): Item = xmlfied('item', value=Element()) Box = xmlfied('box', items=Many(Nested())) box = Box(items=[]) box.items.append(Item('a')) box.items.append(Item('a')) box.items.append(Item('a')) assert_that(etree.tostring(box.toxml()), all_of( string_contains_in_order('<box>', '<items>', '<item>', 'a', '</item>', '<item>', 'a', '</item>', '<item>', 'a', '</item>', '</items>', '</box>'), ))
def test_bigquery_tornadoes_it(self): test_pipeline = TestPipeline(is_integration_test=True) # Set extra options to the pipeline for test purpose output_table = ('BigQueryTornadoesIT' '.monthly_tornadoes_%s' % int(round(time.time() * 1000))) query = 'SELECT month, tornado_count FROM [%s]' % output_table pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher( project=test_pipeline.get_option('project'), query=query, checksum=self.DEFAULT_CHECKSUM)] extra_opts = {'output': output_table, 'on_success_matcher': all_of(*pipeline_verifiers)} # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. bigquery_tornadoes.run( test_pipeline.get_full_options_as_args(**extra_opts))
def test_user_score_it(self): state_verifier = PipelineStateMatcher(PipelineState.DONE) file_verifier = FileChecksumMatcher(self.output + '*-of-*', self.DEFAULT_EXPECTED_CHECKSUM) extra_opts = { 'input': self.DEFAULT_INPUT_FILE, 'output': self.output + '/user-score', 'on_success_matcher': all_of(state_verifier, file_verifier) } # Register clean up before pipeline execution self.addCleanup(delete_files, [self.output + '*']) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. user_score.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def run_pipeline(self): # Waits for messages to appear in output topic. expected_msg = [msg.encode('utf-8') for msg in MESSAGES_TO_PUBLISH] pubsub_msg_verifier = PubSubMessageMatcher( self.project, self.output_sub.name, expected_msg, timeout=600) # Checks that pipeline initializes to RUNNING state. state_verifier = PipelineStateMatcher(PipelineState.RUNNING) extra_opts = { 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier), 'experiment': 'beam_fn_api', 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, } argv = self.test_pipeline.get_full_options_as_args(**extra_opts) return dataflow_exercise_streaming_metrics_pipeline.run(argv)
def _test_streaming(self, with_attributes): """Runs IT pipeline with message verifier. Args: with_attributes: False - Reads and writes message data only. True - Reads and writes message data and attributes. Also verifies id_label and timestamp_attribute features. """ # Build expected dataset. # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) expected_messages = self.EXPECTED_OUTPUT_MESSAGES if not with_attributes: expected_messages = [ pubsub_msg.data for pubsub_msg in expected_messages ] pubsub_msg_verifier = PubSubMessageMatcher( self.project, OUTPUT_SUB + self.uuid, expected_messages, timeout=MESSAGE_MATCHER_TIMEOUT_S, with_attributes=with_attributes, strip_attributes=[self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE]) extra_opts = { 'input_subscription': self.input_sub.full_name, 'output_topic': self.output_topic.full_name, 'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) } # Generate input data and inject to PubSub. test_utils.wait_for_subscriptions_created([self.input_sub]) for msg in self.INPUT_MESSAGES: self.input_topic.publish(msg.data, **msg.attributes) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. pubsub_it_pipeline.run_pipeline( argv=self.test_pipeline.get_full_options_as_args(**extra_opts), with_attributes=with_attributes, id_label=self.ID_LABEL, timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
def test_bqfl_streaming(self): if isinstance(self.test_pipeline.runner, TestDataflowRunner): self.skipTest("TestStream is not supported on TestDataflowRunner") output_table = '%s_%s' % (self.output_table, 'ints') _SIZE = 100 schema = self.BIG_QUERY_STREAMING_SCHEMA l = [{'Integr': i} for i in range(_SIZE)] state_matcher = PipelineStateMatcher(PipelineState.RUNNING) bq_matcher = BigqueryFullResultStreamingMatcher( project=self.project, query="SELECT Integr FROM %s" % output_table, data=[(i,) for i in range(100)]) args = self.test_pipeline.get_full_options_as_args( on_success_matcher=all_of(state_matcher, bq_matcher), experiments='use_beam_bq_sink', streaming=True) with beam.Pipeline(argv=args) as p: stream_source = (TestStream() .advance_watermark_to(0) .advance_processing_time(100) .add_elements(l[:_SIZE//4]) .advance_processing_time(100) .advance_watermark_to(100) .add_elements(l[_SIZE//4:2*_SIZE//4]) .advance_processing_time(100) .advance_watermark_to(200) .add_elements(l[2*_SIZE//4:3*_SIZE//4]) .advance_processing_time(100) .advance_watermark_to(300) .add_elements(l[3*_SIZE//4:]) .advance_processing_time(100) .advance_watermark_to_infinity()) _ = (p | stream_source | bigquery.WriteToBigQuery(output_table, schema=schema, method=bigquery.WriteToBigQuery \ .Method.FILE_LOADS, triggering_frequency=100))
def test_big_query_standard_sql_kms_key(self): verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED) pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher( project=self.project, query=verify_query, checksum=expected_checksum)] extra_opts = {'query': STANDARD_QUERY, 'output': self.output_table, 'output_schema': DIALECT_OUTPUT_SCHEMA, 'use_standard_sql': True, 'on_success_matcher': all_of(*pipeline_verifiers), 'kms_key': KMS_KEY } options = self.test_pipeline.get_full_options_as_args(**extra_opts) big_query_query_to_table_pipeline.run_bq_pipeline(options) table = self.bigquery_client.get_table( self.project, self.dataset_id, 'output_table') self.assertEqual(KMS_KEY, table.encryptionConfiguration.kmsKeyName)
def setUp(self): super(GroupByKeyStreamingTest, self).setUp() self.topic_short_name = self.pipeline.get_option('pubsub_topic_name') self.setup_pubsub() timeout = self.pipeline.get_option('timeout') or DEFAULT_TIMEOUT expected_num_of_records = self.pipeline.get_option('num_of_records') pubsub_msg_verifier = PubSubMessageMatcher( self.project_id, self.output_sub.name, expected_msg_len=int(expected_num_of_records), timeout=int(timeout)) self.extra_opts = { 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'metrics_namespace': self.metrics_namespace, 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(pubsub_msg_verifier) }
def test_wordcount_it(self): test_pipeline = TestPipeline(is_integration_test=True) # Set extra options to the pipeline for test purpose output = '/'.join([ test_pipeline.get_option('output'), test_pipeline.get_option('job_name'), 'results' ]) pipeline_verifiers = [ PipelineStateMatcher(), FileChecksumMatcher(output + '*-of-*', self.DEFAULT_CHECKSUM) ] extra_opts = { 'output': output, 'on_success_matcher': all_of(*pipeline_verifiers) } # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. wordcount.run(test_pipeline.get_full_options_as_args(**extra_opts))
def test_game_stats_it(self): state_verifier = PipelineStateMatcher(PipelineState.RUNNING) success_condition = 'mean_duration=300 LIMIT 1' sessions_query = ('SELECT mean_duration FROM [%s:%s.%s] ' 'WHERE %s' % (self.project, self.dataset.name, self.OUTPUT_TABLE_SESSIONS, success_condition)) bq_sessions_verifier = BigqueryMatcher(self.project, sessions_query, self.DEFAULT_EXPECTED_CHECKSUM) # TODO(mariagh): Add teams table verifier once game_stats.py is fixed. extra_opts = {'subscription': self.input_sub.name, 'dataset': self.dataset.name, 'topic': self.input_topic.name, 'fixed_window_duration': 1, 'user_activity_window_duration': 1, 'wait_until_finish_duration': self.WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, bq_sessions_verifier)} # Register cleanup before pipeline execution. # Note that actual execution happens in reverse order. self.addCleanup(self._cleanup_pubsub) self.addCleanup(self._cleanup_dataset) self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name, self.OUTPUT_TABLE_SESSIONS) self.addCleanup(utils.delete_bq_table, self.project, self.dataset.name, self.OUTPUT_TABLE_TEAMS) # Generate input data and inject to PubSub. self._inject_pubsub_game_events(self.input_topic, self.DEFAULT_INPUT_COUNT) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. game_stats.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def test_big_query_legacy_sql(self): verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED) pipeline_verifiers = [ PipelineStateMatcher(), BigqueryMatcher(project=self.project, query=verify_query, checksum=expected_checksum) ] gs_location = 'gs://temp-storage-for-upload-tests/%s' % self.output_table extra_opts = { 'query': LEGACY_QUERY, 'output': self.output_table, 'bq_temp_location': gs_location, 'output_schema': DIALECT_OUTPUT_SCHEMA, 'use_standard_sql': False, 'on_success_matcher': all_of(*pipeline_verifiers) } options = self.test_pipeline.get_full_options_as_args(**extra_opts) big_query_query_to_table_pipeline.run_bq_pipeline(options)
def test_streaming_wordcount_debugging_it(self): # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) pubsub_msg_verifier = PubSubMessageMatcher( self.project, self.output_sub.name, EXPECTED_MESSAGE, timeout=400) extra_opts = { 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) } # Generate input data and inject to PubSub. self._inject_data(self.input_topic, SAMPLE_MESSAGES) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. streaming_wordcount_debugging.run( self.test_pipeline.get_full_options_as_args(**extra_opts), save_main_session=False)
def test_datastore_wordcount_it(self): test_pipeline = TestPipeline(is_integration_test=True) dataset = test_pipeline.get_option("project") kind = self.DATASTORE_WORDCOUNT_KIND output = '/'.join([test_pipeline.get_option('output'), str(int(time.time() * 1000)), 'datastore_wordcount_results']) arg_sleep_secs = test_pipeline.get_option('sleep_secs') sleep_secs = int(arg_sleep_secs) if arg_sleep_secs is not None else None pipeline_verifiers = [PipelineStateMatcher(), FileChecksumMatcher(output + '*-of-*', self.EXPECTED_CHECKSUM, sleep_secs)] extra_opts = {'dataset': dataset, 'kind': kind, 'output': output, 'read_only': True, 'on_success_matcher': all_of(*pipeline_verifiers)} datastore_wordcount.run(test_pipeline.get_full_options_as_args( **extra_opts))
def test_wordcount_it(self): test_pipeline = TestPipeline(is_integration_test=True) # Set extra options to the pipeline for test purpose output = '/'.join([test_pipeline.get_option('output'), str(int(time.time())), 'results']) arg_sleep_secs = test_pipeline.get_option('sleep_secs') sleep_secs = int(arg_sleep_secs) if arg_sleep_secs is not None else None pipeline_verifiers = [PipelineStateMatcher(), FileChecksumMatcher(output + '*-of-*', self.DEFAULT_CHECKSUM, sleep_secs)] extra_opts = {'output': output, 'on_success_matcher': all_of(*pipeline_verifiers)} # Register clean up before pipeline execution self.addCleanup(delete_files, [output + '*']) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. wordcount.run(test_pipeline.get_full_options_as_args(**extra_opts))
def _test_streaming(self, with_attributes): """Runs IT pipeline with message verifier. Args: with_attributes: False - Reads and writes message data only. True - Reads and writes message data and attributes. Also verifies id_label and timestamp_attribute features. """ # Build expected dataset. # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) expected_messages = self.EXPECTED_OUTPUT_MESSAGES if not with_attributes: expected_messages = [pubsub_msg.data for pubsub_msg in expected_messages] pubsub_msg_verifier = PubSubMessageMatcher( self.project, OUTPUT_SUB + self.uuid, expected_messages, timeout=MESSAGE_MATCHER_TIMEOUT_S, with_attributes=with_attributes, strip_attributes=[self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE]) extra_opts = {'input_subscription': self.input_sub.full_name, 'output_topic': self.output_topic.full_name, 'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)} # Generate input data and inject to PubSub. test_utils.wait_for_subscriptions_created([self.input_sub]) for msg in self.INPUT_MESSAGES: self.input_topic.publish(msg.data, **msg.attributes) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. pubsub_it_pipeline.run_pipeline( argv=self.test_pipeline.get_full_options_as_args(**extra_opts), with_attributes=with_attributes, id_label=self.ID_LABEL, timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
def test_streaming_wordcount_it(self): # Build expected dataset. expected_msg = [('%d: 1' % num) for num in range(DEFAULT_INPUT_NUMBERS)] # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) pubsub_msg_verifier = PubSubMessageMatcher(self.project, self.output_sub.name, expected_msg, timeout=400) extra_opts = {'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)} # Generate input data and inject to PubSub. self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. streaming_wordcount.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def test_return_next_cell_in_valid_range(self): for x in range(999): assert_that( self.next_cell_calculator.next(), all_of(greater_than_or_equal_to(0), less_than_or_equal_to(100)) )
def __init__(self, *element_matchers): delegates = [has_item(e) for e in element_matchers] self.matcher = all_of(*delegates)
def has_properties(*keys_valuematchers, **kv_args): """Matches if an object has properties satisfying all of a dictionary of string property names and corresponding value matchers. :param matcher_dict: A dictionary mapping keys to associated value matchers, or to expected values for :py:func:`~hamcrest.core.core.isequal.equal_to` matching. Note that the keys must be actual keys, not matchers. Any value argument that is not a matcher is implicitly wrapped in an :py:func:`~hamcrest.core.core.isequal.equal_to` matcher to check for equality. Examples:: has_properties({'foo':equal_to(1), 'bar':equal_to(2)}) has_properties({'foo':1, 'bar':2}) ``has_properties`` also accepts a list of keyword arguments: .. function:: has_properties(keyword1=value_matcher1[, keyword2=value_matcher2[, ...]]) :param keyword1: A keyword to look up. :param valueMatcher1: The matcher to satisfy for the value, or an expected value for :py:func:`~hamcrest.core.core.isequal.equal_to` matching. Examples:: has_properties(foo=equal_to(1), bar=equal_to(2)) has_properties(foo=1, bar=2) Finally, ``has_properties`` also accepts a list of alternating keys and their value matchers: .. function:: has_properties(key1, value_matcher1[, ...]) :param key1: A key (not a matcher) to look up. :param valueMatcher1: The matcher to satisfy for the value, or an expected value for :py:func:`~hamcrest.core.core.isequal.equal_to` matching. Examples:: has_properties('foo', equal_to(1), 'bar', equal_to(2)) has_properties('foo', 1, 'bar', 2) """ if len(keys_valuematchers) == 1: try: base_dict = keys_valuematchers[0].copy() for key in base_dict: base_dict[key] = wrap_shortcut(base_dict[key]) except AttributeError: raise ValueError('single-argument calls to has_properties must pass a dict as the argument') else: if len(keys_valuematchers) % 2: raise ValueError('has_properties requires key-value pairs') base_dict = {} for index in range(int(len(keys_valuematchers) / 2)): base_dict[keys_valuematchers[2 * index]] = wrap_shortcut(keys_valuematchers[2 * index + 1]) for key, value in kv_args.items(): base_dict[key] = wrap_shortcut(value) return all_of(*[has_property(property_name, property_value_matcher) for \ property_name, property_value_matcher in base_dict.items()])