def _run_wordcount_it(self, **opts): test_pipeline = TestPipeline(is_integration_test=True) # Set extra options to the pipeline for test purpose output = '/'.join([ test_pipeline.get_option('output'), str(int(time.time() * 1000)), 'results' ]) arg_sleep_secs = test_pipeline.get_option('sleep_secs') sleep_secs = int( arg_sleep_secs) if arg_sleep_secs is not None else None pipeline_verifiers = [ PipelineStateMatcher(), FileChecksumMatcher(output + '*-of-*', self.DEFAULT_CHECKSUM, sleep_secs) ] extra_opts = { 'output': output, 'on_success_matcher': all_of(*pipeline_verifiers) } extra_opts.update(opts) # Register clean up before pipeline execution self.addCleanup(delete_files, [output + '*']) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. wordcount.run(test_pipeline.get_full_options_as_args(**extra_opts))
def _run_wordcount_it(self, run_wordcount, **opts): test_pipeline = TestPipeline(is_integration_test=True) extra_opts = {} # Set extra options to the pipeline for test purpose test_output = '/'.join([ test_pipeline.get_option('output'), str(int(time.time() * 1000)), 'results' ]) extra_opts['output'] = test_output test_input = test_pipeline.get_option('input') if test_input: extra_opts['input'] = test_input arg_sleep_secs = test_pipeline.get_option('sleep_secs') sleep_secs = int( arg_sleep_secs) if arg_sleep_secs is not None else None expect_checksum = (test_pipeline.get_option('expect_checksum') or self.DEFAULT_CHECKSUM) pipeline_verifiers = [ PipelineStateMatcher(), FileChecksumMatcher(test_output + '*-of-*', expect_checksum, sleep_secs) ] extra_opts['on_success_matcher'] = all_of(*pipeline_verifiers) extra_opts.update(opts) # Register clean up before pipeline execution self.addCleanup(delete_files, [test_output + '*']) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. run_wordcount(test_pipeline.get_full_options_as_args(**extra_opts), save_main_session=False)
def test_datastore_wordcount_it(self): test_pipeline = TestPipeline(is_integration_test=True) dataset = test_pipeline.get_option("project") kind = self.DATASTORE_WORDCOUNT_KIND output = '/'.join([ test_pipeline.get_option('output'), str(int(time.time() * 1000)), 'datastore_wordcount_results' ]) arg_sleep_secs = test_pipeline.get_option('sleep_secs') sleep_secs = int( arg_sleep_secs) if arg_sleep_secs is not None else None pipeline_verifiers = [ PipelineStateMatcher(), FileChecksumMatcher(output + '*-of-*', self.EXPECTED_CHECKSUM, sleep_secs) ] extra_opts = { 'dataset': dataset, 'kind': kind, 'output': output, 'read_only': True, 'on_success_matcher': all_of(*pipeline_verifiers) } datastore_wordcount.run( test_pipeline.get_full_options_as_args(**extra_opts))
def test_datastore_wordcount_it(self): test_pipeline = TestPipeline(is_integration_test=True) kind = self.DATASTORE_WORDCOUNT_KIND output = '/'.join([ test_pipeline.get_option('output'), str(int(time.time() * 1000)), 'datastore_wordcount_results' ]) arg_sleep_secs = test_pipeline.get_option('sleep_secs') sleep_secs = int( arg_sleep_secs) if arg_sleep_secs is not None else None pipeline_verifiers = [ PipelineStateMatcher(), FileChecksumMatcher(output + '*-of-*', self.EXPECTED_CHECKSUM, sleep_secs) ] extra_opts = { 'kind': kind, 'output': output, # Comment this out to regenerate input data on Datastore (delete # existing data first using the bulk delete Dataflow template). 'read_only': True, 'on_success_matcher': all_of(*pipeline_verifiers) } datastore_wordcount.run( test_pipeline.get_full_options_as_args(**extra_opts))
def test_user_score_it(self): state_verifier = PipelineStateMatcher(PipelineState.DONE) file_verifier = FileChecksumMatcher(self.output + '*-of-*', self.DEFAULT_EXPECTED_CHECKSUM) extra_opts = { 'input': self.DEFAULT_INPUT_FILE, 'output': self.output + '/user-score', 'on_success_matcher': all_of(state_verifier, file_verifier) } # Register clean up before pipeline execution self.addCleanup(delete_files, [self.output + '*']) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. user_score.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def test_userscore_output_checksum_on_small_input(self): # Small dataset to prevent Out of Memory when running in local runners INPUT_FILE = 'gs://apache-beam-samples/game/small/gaming_data.csv' EXPECTED_CHECKSUM = '5b1bc0e8080e3c0f162809ac4c0f49acab23854e' state_verifier = PipelineStateMatcher(PipelineState.DONE) arg_sleep_secs = self.test_pipeline.get_option('sleep_secs') sleep_secs = int( arg_sleep_secs) if arg_sleep_secs is not None else None file_verifier = FileChecksumMatcher(self.output + '/*-of-*', EXPECTED_CHECKSUM, sleep_secs) extra_opts = { 'input': INPUT_FILE, 'output': self.output + '/user-score', 'on_success_matcher': all_of(state_verifier, file_verifier) } # Register clean up before pipeline execution self.addCleanup(delete_files, [self.output + '*']) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. user_score.run( self.test_pipeline.get_full_options_as_args(**extra_opts))