Ejemplo n.º 1
0
 def test_big_query_new_types_native(self):
     expected_checksum = test_utils.compute_hash(NEW_TYPES_OUTPUT_EXPECTED)
     verify_query = NEW_TYPES_OUTPUT_VERIFY_QUERY % self.output_table
     pipeline_verifiers = [
         PipelineStateMatcher(),
         BigqueryMatcher(
             project=self.project,
             query=verify_query,
             checksum=expected_checksum,
             timeout_secs=30,
         )
     ]
     self._setup_new_types_env()
     extra_opts = {
         'query':
         NEW_TYPES_QUERY % (self.dataset_id, NEW_TYPES_INPUT_TABLE),
         'output': self.output_table,
         'output_schema': NEW_TYPES_OUTPUT_SCHEMA,
         'use_standard_sql': False,
         'native': True,
         'use_json_exports': True,
         'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION_MS,
         'on_success_matcher': all_of(*pipeline_verifiers),
         'experiments': 'use_legacy_bq_sink',
     }
     options = self.test_pipeline.get_full_options_as_args(**extra_opts)
     big_query_query_to_table_pipeline.run_bq_pipeline(options)
Ejemplo n.º 2
0
    def test_big_query_standard_sql_kms_key_native(self):
        if isinstance(self.test_pipeline.runner, TestDirectRunner):
            self.skipTest("This test doesn't work on DirectRunner.")
        verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
        expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
        pipeline_verifiers = [
            PipelineStateMatcher(),
            BigqueryMatcher(project=self.project,
                            query=verify_query,
                            checksum=expected_checksum)
        ]
        kms_key = self.test_pipeline.get_option('kms_key_name')
        self.assertTrue(kms_key)
        extra_opts = {
            'query': STANDARD_QUERY,
            'output': self.output_table,
            'output_schema': DIALECT_OUTPUT_SCHEMA,
            'use_standard_sql': True,
            'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION_MS,
            'on_success_matcher': all_of(*pipeline_verifiers),
            'kms_key': kms_key,
            'native': True,
            'experiments': 'use_legacy_bq_sink',
        }
        options = self.test_pipeline.get_full_options_as_args(**extra_opts)
        big_query_query_to_table_pipeline.run_bq_pipeline(options)

        table = self.bigquery_client.get_table(self.project, self.dataset_id,
                                               'output_table')
        self.assertIsNotNone(table.encryptionConfiguration,
                             'No encryption configuration found: %s' % table)
        self.assertEqual(kms_key, table.encryptionConfiguration.kmsKeyName)
Ejemplo n.º 3
0
    def _matches(self, _):
        if self.checksum is None:
            response = self._query_with_retry()
            _LOGGER.info('Read from given query (%s), total rows %d',
                         self.query, len(response))
            self.checksum = compute_hash(response)
            _LOGGER.info('Generate checksum: %s', self.checksum)

        return self.checksum == self.expected_checksum
Ejemplo n.º 4
0
 def get_checksum():
     response = self._query_with_retry()
     _LOGGER.info('Read from given query (%s), total rows %d',
                  self.query, len(response))
     self.checksum = compute_hash(response)
     _LOGGER.info('Generate checksum: %s', self.checksum)
     if self.checksum != self.expected_checksum:
         # This exception is never raised beyond the enclosing method.
         raise ValueError(
             'Checksums do not match. Expected: %s, got: %s' %
             (self.expected_checksum, self.checksum))
Ejemplo n.º 5
0
    def test_autocomplete_it(self):
        with TestPipeline(is_integration_test=True) as p:
            words = p | beam.io.ReadFromText(self.KINGLEAR_INPUT)
            result = words | autocomplete.TopPerPrefix(10)
            # values must be hashable for now
            result = result | beam.Map(
                lambda k_vs: [k_vs[0], k_vs[1][0][0], k_vs[1][0][1]])
            checksum = (result
                        | beam.Map(lambda x: int(compute_hash(x)[:8], 16))
                        | beam.CombineGlobally(sum))

            assert_that(checksum, equal_to([self.KINGLEAR_HASH_SUM]))
Ejemplo n.º 6
0
  def test_autocomplete_it(self):
    with TestPipeline(is_integration_test=True) as p:
      words = p | beam.io.ReadFromText(self.KINGLEAR_INPUT)
      result = words | autocomplete.TopPerPrefix(10)
      # values must be hashable for now
      result = result | beam.Map(lambda k_vs: [k_vs[0],
                                               k_vs[1][0][0], k_vs[1][0][1]])
      checksum = (result
                  | beam.Map(lambda x: int(compute_hash(x)[:8], 16))
                  | beam.CombineGlobally(sum))

      assert_that(checksum, equal_to([self.KINGLEAR_HASH_SUM]))
 def test_big_query_standard_sql(self):
   verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
   expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
   pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
       project=self.project,
       query=verify_query,
       checksum=expected_checksum)]
   extra_opts = {'query': STANDARD_QUERY,
                 'output': self.output_table,
                 'output_schema': DIALECT_OUTPUT_SCHEMA,
                 'use_standard_sql': True,
                 'on_success_matcher': all_of(*pipeline_verifiers)}
   options = self.test_pipeline.get_full_options_as_args(**extra_opts)
   big_query_query_to_table_pipeline.run_bq_pipeline(options)
 def test_big_query_standard_sql(self):
   verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
   expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
   pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
       project=self.project,
       query=verify_query,
       checksum=expected_checksum)]
   extra_opts = {'query': STANDARD_QUERY,
                 'output': self.output_table,
                 'output_schema': DIALECT_OUTPUT_SCHEMA,
                 'use_standard_sql': True,
                 'on_success_matcher': all_of(*pipeline_verifiers)}
   options = self.test_pipeline.get_full_options_as_args(**extra_opts)
   big_query_query_to_table_pipeline.run_bq_pipeline(options)
Ejemplo n.º 9
0
    def _matches(self, _):
        logging.info('Start verify Bigquery data.')
        # Run query
        bigquery_client = bigquery.Client(project=self.project)
        response = self._query_with_retry(bigquery_client)
        logging.info('Read from given query (%s), total rows %d', self.query,
                     len(response))

        # Compute checksum
        self.checksum = compute_hash(response)
        logging.info('Generate checksum: %s', self.checksum)

        # Verify result
        return self.checksum == self.expected_checksum
Ejemplo n.º 10
0
  def _matches(self, _):
    if self.sleep_secs:
      # Wait to have output file ready on FS
      logging.info('Wait %d seconds...', self.sleep_secs)
      time.sleep(self.sleep_secs)

    # Read from given file(s) path
    read_lines = self._read_with_retry()

    # Compute checksum
    self.checksum = utils.compute_hash(read_lines)
    logging.info('Read from given path %s, %d lines, checksum: %s.',
                 self.file_path, len(read_lines), self.checksum)
    return self.checksum == self.expected_checksum
Ejemplo n.º 11
0
  def _matches(self, _):
    logging.info('Start verify Bigquery data.')
    # Run query
    bigquery_client = bigquery.Client(project=self.project)
    response = self._query_with_retry(bigquery_client)
    logging.info('Read from given query (%s), total rows %d',
                 self.query, len(response))

    # Compute checksum
    self.checksum = compute_hash(response)
    logging.info('Generate checksum: %s', self.checksum)

    # Verify result
    return self.checksum == self.expected_checksum
 def test_big_query_new_types(self):
   expected_checksum = test_utils.compute_hash(NEW_TYPES_OUTPUT_EXPECTED)
   verify_query = NEW_TYPES_OUTPUT_VERIFY_QUERY % self.output_table
   pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
       project=self.project,
       query=verify_query,
       checksum=expected_checksum)]
   self._setup_new_types_env()
   extra_opts = {
       'query': NEW_TYPES_QUERY % (self.dataset_id, NEW_TYPES_INPUT_TABLE),
       'output': self.output_table,
       'output_schema': NEW_TYPES_OUTPUT_SCHEMA,
       'use_standard_sql': False,
       'on_success_matcher': all_of(*pipeline_verifiers)}
   options = self.test_pipeline.get_full_options_as_args(**extra_opts)
   big_query_query_to_table_pipeline.run_bq_pipeline(options)
 def test_big_query_new_types(self):
   expected_checksum = test_utils.compute_hash(NEW_TYPES_OUTPUT_EXPECTED)
   verify_query = NEW_TYPES_OUTPUT_VERIFY_QUERY % self.output_table
   pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
       project=self.project,
       query=verify_query,
       checksum=expected_checksum)]
   self._setup_new_types_env()
   extra_opts = {
       'query': NEW_TYPES_QUERY % (self.dataset_id, NEW_TYPES_INPUT_TABLE),
       'output': self.output_table,
       'output_schema': NEW_TYPES_OUTPUT_SCHEMA,
       'use_standard_sql': False,
       'on_success_matcher': all_of(*pipeline_verifiers)}
   options = self.test_pipeline.get_full_options_as_args(**extra_opts)
   big_query_query_to_table_pipeline.run_bq_pipeline(options)
  def test_big_query_legacy_sql(self):
    verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
    expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
    pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
        project=self.project,
        query=verify_query,
        checksum=expected_checksum)]

    extra_opts = {'query': LEGACY_QUERY,
                  'output': self.output_table,
                  'output_schema': DIALECT_OUTPUT_SCHEMA,
                  'use_standard_sql': False,
                  'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION_MS,
                  'on_success_matcher': all_of(*pipeline_verifiers)}
    options = self.test_pipeline.get_full_options_as_args(**extra_opts)
    big_query_query_to_table_pipeline.run_bq_pipeline(options)
  def test_big_query_legacy_sql(self):
    verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
    expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
    pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
        project=self.project,
        query=verify_query,
        checksum=expected_checksum)]

    gs_location = 'gs://temp-storage-for-upload-tests/%s' % self.output_table
    extra_opts = {'query': LEGACY_QUERY,
                  'output': self.output_table,
                  'bq_temp_location': gs_location,
                  'output_schema': DIALECT_OUTPUT_SCHEMA,
                  'use_standard_sql': False,
                  'on_success_matcher': all_of(*pipeline_verifiers)}
    options = self.test_pipeline.get_full_options_as_args(**extra_opts)
    big_query_query_to_table_pipeline.run_bq_pipeline(options)
  def test_big_query_standard_sql_kms_key(self):
    verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
    expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
    pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
        project=self.project,
        query=verify_query,
        checksum=expected_checksum)]
    extra_opts = {'query': STANDARD_QUERY,
                  'output': self.output_table,
                  'output_schema': DIALECT_OUTPUT_SCHEMA,
                  'use_standard_sql': True,
                  'on_success_matcher': all_of(*pipeline_verifiers),
                  'kms_key': KMS_KEY
                 }
    options = self.test_pipeline.get_full_options_as_args(**extra_opts)
    big_query_query_to_table_pipeline.run_bq_pipeline(options)

    table = self.bigquery_client.get_table(
        self.project, self.dataset_id, 'output_table')
    self.assertEqual(KMS_KEY, table.encryptionConfiguration.kmsKeyName)
Ejemplo n.º 17
0
  def test_big_query_standard_sql_kms_key(self):
    verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
    expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
    pipeline_verifiers = [PipelineStateMatcher(), BigqueryMatcher(
        project=self.project,
        query=verify_query,
        checksum=expected_checksum)]
    extra_opts = {'query': STANDARD_QUERY,
                  'output': self.output_table,
                  'output_schema': DIALECT_OUTPUT_SCHEMA,
                  'use_standard_sql': True,
                  'on_success_matcher': all_of(*pipeline_verifiers),
                  'kms_key': KMS_KEY
                 }
    options = self.test_pipeline.get_full_options_as_args(**extra_opts)
    big_query_query_to_table_pipeline.run_bq_pipeline(options)

    table = self.bigquery_client.get_table(
        self.project, self.dataset_id, 'output_table')
    self.assertEqual(KMS_KEY, table.encryptionConfiguration.kmsKeyName)
Ejemplo n.º 18
0
    def test_big_query_legacy_sql(self):
        verify_query = DIALECT_OUTPUT_VERIFY_QUERY % self.output_table
        expected_checksum = test_utils.compute_hash(DIALECT_OUTPUT_EXPECTED)
        pipeline_verifiers = [
            PipelineStateMatcher(),
            BigqueryMatcher(project=self.project,
                            query=verify_query,
                            checksum=expected_checksum)
        ]

        gs_location = 'gs://temp-storage-for-upload-tests/%s' % self.output_table
        extra_opts = {
            'query': LEGACY_QUERY,
            'output': self.output_table,
            'bq_temp_location': gs_location,
            'output_schema': DIALECT_OUTPUT_SCHEMA,
            'use_standard_sql': False,
            'on_success_matcher': all_of(*pipeline_verifiers)
        }
        options = self.test_pipeline.get_full_options_as_args(**extra_opts)
        big_query_query_to_table_pipeline.run_bq_pipeline(options)