def __init__(self, file_pattern, min_bundle_size=0, compression_type=CompressionTypes.AUTO, splittable=True, validate=True): """Initializes :class:`FileBasedSource`. Args: file_pattern (str): the file glob to read a string or a :class:`~apache_beam.options.value_provider.ValueProvider` (placeholder to inject a runtime value). min_bundle_size (str): minimum size of bundles that should be generated when performing initial splitting on this source. compression_type (str): Used to handle compressed output files. Typical value is :attr:`CompressionTypes.AUTO <apache_beam.io.filesystem.CompressionTypes.AUTO>`, in which case the final file path's extension will be used to detect the compression. splittable (bool): whether :class:`FileBasedSource` should try to logically split a single file into data ranges so that different parts of the same file can be read in parallel. If set to :data:`False`, :class:`FileBasedSource` will prevent both initial and dynamic splitting of sources for single files. File patterns that represent multiple files may still get split into sources for individual files. Even if set to :data:`True` by the user, :class:`FileBasedSource` may choose to not split the file, for example, for compressed files where currently it is not possible to efficiently read a data range without decompressing the whole file. validate (bool): Boolean flag to verify that the files exist during the pipeline creation time. Raises: ~exceptions.TypeError: when **compression_type** is not valid or if **file_pattern** is not a :class:`str` or a :class:`~apache_beam.options.value_provider.ValueProvider`. ~exceptions.ValueError: when compression and splittable files are specified. ~exceptions.IOError: when the file pattern specified yields an empty result. """ if not isinstance(file_pattern, (basestring, ValueProvider)): raise TypeError('%s: file_pattern must be of type string' ' or ValueProvider; got %r instead' % (self.__class__.__name__, file_pattern)) if isinstance(file_pattern, basestring): file_pattern = StaticValueProvider(str, file_pattern) self._pattern = file_pattern self._concat_source = None self._min_bundle_size = min_bundle_size if not CompressionTypes.is_valid_compression_type(compression_type): raise TypeError('compression_type must be CompressionType object but ' 'was %s' % type(compression_type)) self._compression_type = compression_type self._splittable = splittable if validate and file_pattern.is_accessible(): self._validate()
def __init__(self, load_time, group_by): if isinstance(load_time, string_types): load_time = StaticValueProvider(str, load_time) if isinstance(group_by, string_types): group_by = StaticValueProvider(str, group_by) self.load_time = load_time self.group_by = group_by
def _add_argparse_args(cls, parser): parser.add_value_provider_argument( '--group_by', default=StaticValueProvider(str, 'ASSET_TYPE'), choices=['ASSET_TYPE', 'ASSET_TYPE_VERSION'], help='How to group exported resources into Bigquery tables.') parser.add_value_provider_argument( '--write_disposition', default=StaticValueProvider(str, 'WRITE_APPEND'), choices=['WRITE_APPEND', 'WRITE_EMPTY'], help='To append to or overwrite BigQuery tables..') parser.add_value_provider_argument( '--input', help='A glob of all input asset json files to process.') parser.add_value_provider_argument( '--stage', help='GCS location to write intermediary BigQuery load files.') parser.add_value_provider_argument( '--load_time', default=StaticValueProvider(str, datetime.now().isoformat()), help='Load time of the data (YYYY-MM-DD[HH:MM:SS])).') parser.add_value_provider_argument('--dataset', help='BigQuery dataset to load to.')
def __init__(self, file_pattern, min_bundle_size=0, compression_type=CompressionTypes.AUTO, splittable=True, validate=True): """Initializes :class:`FileBasedSource`. Args: file_pattern (str): the file glob to read a string or a :class:`~apache_beam.options.value_provider.ValueProvider` (placeholder to inject a runtime value). min_bundle_size (str): minimum size of bundles that should be generated when performing initial splitting on this source. compression_type (str): Used to handle compressed output files. Typical value is :attr:`CompressionTypes.AUTO <apache_beam.io.filesystem.CompressionTypes.AUTO>`, in which case the final file path's extension will be used to detect the compression. splittable (bool): whether :class:`FileBasedSource` should try to logically split a single file into data ranges so that different parts of the same file can be read in parallel. If set to :data:`False`, :class:`FileBasedSource` will prevent both initial and dynamic splitting of sources for single files. File patterns that represent multiple files may still get split into sources for individual files. Even if set to :data:`True` by the user, :class:`FileBasedSource` may choose to not split the file, for example, for compressed files where currently it is not possible to efficiently read a data range without decompressing the whole file. validate (bool): Boolean flag to verify that the files exist during the pipeline creation time. Raises: ~exceptions.TypeError: when **compression_type** is not valid or if **file_pattern** is not a :class:`str` or a :class:`~apache_beam.options.value_provider.ValueProvider`. ~exceptions.ValueError: when compression and splittable files are specified. ~exceptions.IOError: when the file pattern specified yields an empty result. """ if not isinstance(file_pattern, (basestring, ValueProvider)): raise TypeError('%s: file_pattern must be of type string' ' or ValueProvider; got %r instead' % (self.__class__.__name__, file_pattern)) if isinstance(file_pattern, basestring): file_pattern = StaticValueProvider(str, file_pattern) self._pattern = file_pattern self._concat_source = None self._min_bundle_size = min_bundle_size if not CompressionTypes.is_valid_compression_type(compression_type): raise TypeError('compression_type must be CompressionType object but ' 'was %s' % type(compression_type)) self._compression_type = compression_type self._splittable = splittable if validate and file_pattern.is_accessible(): self._validate()
def __init__(self, aggregator_dict=None, user_project_id="", user_job_id="", tags=tag_constants.SERVING, signature_name=( signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY), skip_preprocessing=False, target="", config=None): """Constructor of Prediction beam.DoFn class. Args: aggregator_dict: A dict of aggregators containing maps from counter name to the aggregator. user_project_id: A string. The project to which the logs will be sent. user_job_id: A string. The job to which the logs will be sent. tags: A comma-separated string that contains a list of tags for serving graph. signature_name: A string to map into the signature map to get the serving signature. skip_preprocessing: bool whether to skip preprocessing even when the metadata.yaml/metadata.json file exists. target: The execution engine to connect to. See target in tf.Session(). In most cases, users should not set the target. config: A ConfigProto proto with configuration options. See config in tf.Session() Side Inputs: model_dir: The directory containing the model to load and the checkpoint files to restore the session. """ self._target = target # TODO(user): Remove the "if" section when the direct use of # PredictionDoFn() is retired from ml_transform. if isinstance(user_project_id, basestring): user_project_id = StaticValueProvider(str, user_project_id) if isinstance(user_job_id, basestring): user_job_id = StaticValueProvider(str, user_job_id) if isinstance(tags, basestring): tags = StaticValueProvider(str, tags) if isinstance(signature_name, basestring): signature_name = StaticValueProvider(str, signature_name) self._user_project_id = user_project_id self._user_job_id = user_job_id self._tags = tags self._signature_name = signature_name self._skip_preprocessing = skip_preprocessing self._config = config self._aggregator_dict = aggregator_dict self._model_state = None self._cloud_logger = None self._tag_list = [] # Metrics. self._model_load_seconds_distribution = beam.metrics.Metrics.distribution( _METRICS_NAMESPACE, "model_load_seconds")
def uploader(mocker): credential_id = StaticValueProvider(str, 'id') secret = StaticValueProvider(str, 'secret') access = StaticValueProvider(str, 'access') refresh = StaticValueProvider(str, 'refresh') credentials = OAuthCredentials(credential_id, secret, access, refresh) return CampaignManagerConversionUploaderDoFn(credentials)
def __init__(self, group_by, num_shards): if isinstance(group_by, string_types): group_by = StaticValueProvider(str, group_by) if isinstance(num_shards, str): num_shards = StaticValueProvider(str, num_shards) self.num_shards = num_shards self.group_by = group_by self.shard_map = None
def uploader(mocker): mocker.patch('googleads.oauth2.GoogleRefreshTokenClient') mocker.patch('googleads.adwords.AdWordsClient') client_id = StaticValueProvider(str, 'id') secret = StaticValueProvider(str, 'secret') access = StaticValueProvider(str, 'access') refresh = StaticValueProvider(str, 'refresh') credentials = OAuthCredentials(client_id, secret, access, refresh) return GoogleAnalyticsDataImportUploaderDoFn(credentials)
def __init__(self, stage_dir, load_time): if isinstance(stage_dir, string_types): stage_dir = StaticValueProvider(str, stage_dir) if isinstance(load_time, string_types): load_time = StaticValueProvider(str, load_time) self.stage_dir = stage_dir self.load_time = load_time self.open_files = {}
def __init__(self, file_pattern, min_bundle_size=0, compression_type=CompressionTypes.AUTO, splittable=True, validate=True): """Initializes ``FileBasedSource``. Args: file_pattern: the file glob to read a string or a ValueProvider (placeholder to inject a runtime value). min_bundle_size: minimum size of bundles that should be generated when performing initial splitting on this source. compression_type: compression type to use splittable: whether FileBasedSource should try to logically split a single file into data ranges so that different parts of the same file can be read in parallel. If set to False, FileBasedSource will prevent both initial and dynamic splitting of sources for single files. File patterns that represent multiple files may still get split into sources for individual files. Even if set to True by the user, FileBasedSource may choose to not split the file, for example, for compressed files where currently it is not possible to efficiently read a data range without decompressing the whole file. validate: Boolean flag to verify that the files exist during the pipeline creation time. Raises: TypeError: when compression_type is not valid or if file_pattern is not a string or a ValueProvider. ValueError: when compression and splittable files are specified. IOError: when the file pattern specified yields an empty result. """ if not isinstance(file_pattern, (basestring, ValueProvider)): raise TypeError('%s: file_pattern must be of type string' ' or ValueProvider; got %r instead' % (self.__class__.__name__, file_pattern)) if isinstance(file_pattern, basestring): file_pattern = StaticValueProvider(str, file_pattern) self._pattern = file_pattern self._concat_source = None self._min_bundle_size = min_bundle_size if not CompressionTypes.is_valid_compression_type(compression_type): raise TypeError( 'compression_type must be CompressionType object but ' 'was %s' % type(compression_type)) self._compression_type = compression_type if compression_type in (CompressionTypes.UNCOMPRESSED, CompressionTypes.AUTO): self._splittable = splittable else: # We can't split compressed files efficiently so turn off splitting. self._splittable = False if validate and file_pattern.is_accessible(): self._validate()
def eraser(mocker): mocker.patch('googleads.oauth2.GoogleRefreshTokenClient') mocker.patch('googleads.adwords.AdWordsClient') client_id = StaticValueProvider(str, "id") secret = StaticValueProvider(str, "secret") access = StaticValueProvider(str, "access") refresh = StaticValueProvider(str, "refresh") credentials = OAuthCredentials(client_id, secret, access, refresh) return GoogleAnalyticsDataImportEraser(credentials)
def uploader(mocker): mocker.patch('googleads.oauth2.GoogleRefreshTokenClient') mocker.patch('googleads.adwords.AdWordsClient') credential_id = StaticValueProvider(str, 'id') secret = StaticValueProvider(str, 'secret') access = StaticValueProvider(str, 'access') refresh = StaticValueProvider(str, 'refresh') credentials = OAuthCredentials(credential_id, secret, access, refresh) return GoogleAdsOfflineUploaderDoFn(credentials, StaticValueProvider(str, 'devtoken'))
def __init__(self, dataset, write_disposition): if isinstance(dataset, string_types): dataset = StaticValueProvider(str, dataset) if isinstance(write_disposition, string_types): write_disposition = StaticValueProvider(str, write_disposition) self.write_disposition = write_disposition self.dataset = dataset self.bigquery_client = None self.dataset_location = None self.load_jobs = {}
def test_init(): id = StaticValueProvider(str, "id") secret = StaticValueProvider(str, "secret") access = StaticValueProvider(str, "access") refresh = StaticValueProvider(str, "refresh") credentials = OAuthCredentials(id, secret, access, refresh) assert credentials.get_client_id() == "id" assert credentials.get_client_secret() == "secret" assert credentials.get_access_token() == "access" assert credentials.get_refresh_token() == "refresh"
def expand(self, pcoll): p = pcoll.pipeline if not self._temp_directory: temp_location = ( p.options.view_as(GoogleCloudOptions).temp_location or self.path.get()) dir_uid = str(uuid.uuid4()) self._temp_directory = StaticValueProvider( str, filesystems.FileSystems.join(temp_location, '.temp%s' % dir_uid)) _LOGGER.info('Added temporary directory %s', self._temp_directory.get()) output = (pcoll | beam.ParDo( _WriteUnshardedRecordsFn( base_path=self._temp_directory, destination_fn=self.destination_fn, sink_fn=self.sink_fn, max_writers_per_bundle=self. _max_num_writers_per_bundle)).with_outputs( _WriteUnshardedRecordsFn.SPILLED_RECORDS, _WriteUnshardedRecordsFn.WRITTEN_FILES)) written_files_pc = output[_WriteUnshardedRecordsFn.WRITTEN_FILES] spilled_records_pc = output[_WriteUnshardedRecordsFn.SPILLED_RECORDS] more_written_files_pc = ( spilled_records_pc | beam.ParDo( _AppendShardedDestination(self.destination_fn, self.shards)) | "GroupRecordsByDestinationAndShard" >> beam.GroupByKey() | beam.ParDo( _WriteShardedRecordsFn(self._temp_directory, self.sink_fn, self.shards))) files_by_destination_pc = ( (written_files_pc, more_written_files_pc) | beam.Flatten() | beam.Map(lambda file_result: (file_result.destination, file_result)) | "GroupTempFilesByDestination" >> beam.GroupByKey()) # Now we should take the temporary files, and write them to the final # destination, with their proper names. file_results = (files_by_destination_pc | beam.ParDo( _MoveTempFilesIntoFinalDestinationFn( self.path, self.file_naming_fn, self._temp_directory))) return file_results
def test_not_active(mocker, caplog): credential_id = StaticValueProvider(str, 'id') secret = StaticValueProvider(str, 'secret') access = StaticValueProvider(str, 'access') refresh = StaticValueProvider(str, 'refresh') credentials = OAuthCredentials(credential_id, secret, access, refresh) uploader_dofn = GoogleAdsOfflineUploaderDoFn(credentials, None) mocker.patch.object(uploader_dofn, '_get_oc_service') uploader_dofn.process(Batch(None, [])) uploader_dofn._get_oc_service.assert_not_called() assert 'Skipping upload, parameters not configured.' in caplog.text
def test_static_value_provider_empty_write(self): temp_path = StaticValueProvider( value_type=str, value=tempfile.NamedTemporaryFile().name) sink = MyFileBasedSink(temp_path, file_name_suffix=StaticValueProvider( value_type=str, value='.output'), coder=coders.ToStringCoder()) with TestPipeline() as p: p | beam.Create([]) | beam.io.Write(sink) # pylint: disable=expression-not-assigned self.assertEqual( open(temp_path.get() + '-00000-of-00001.output').read(), '[start][end]')
def test_static_value_provider_empty_write(self): temp_path = StaticValueProvider(value_type=str, value=tempfile.NamedTemporaryFile().name) sink = MyFileBasedSink( temp_path, file_name_suffix=StaticValueProvider(value_type=str, value='.output'), coder=coders.ToStringCoder() ) with TestPipeline() as p: p | beam.Create([]) | beam.io.Write(sink) # pylint: disable=expression-not-assigned self.assertEqual( open(temp_path.get() + '-00000-of-00001.output').read(), '[start][end]')
def __init__( self, file_path_prefix, coder, file_name_suffix='', num_shards=0, shard_name_template=None, mime_type='application/octet-stream', compression_type=CompressionTypes.AUTO, skip_if_empty=False): """ Raises: TypeError: if file path parameters are not a :class:`str` or :class:`~apache_beam.options.value_provider.ValueProvider`, or if **compression_type** is not member of :class:`~apache_beam.io.filesystem.CompressionTypes`. ValueError: if **shard_name_template** is not of expected format. """ if not isinstance(file_path_prefix, (str, ValueProvider)): raise TypeError( 'file_path_prefix must be a string or ValueProvider;' 'got %r instead' % file_path_prefix) if not isinstance(file_name_suffix, (str, ValueProvider)): raise TypeError( 'file_name_suffix must be a string or ValueProvider;' 'got %r instead' % file_name_suffix) if not CompressionTypes.is_valid_compression_type(compression_type): raise TypeError( 'compression_type must be CompressionType object but ' 'was %s' % type(compression_type)) if shard_name_template is None: shard_name_template = DEFAULT_SHARD_NAME_TEMPLATE elif shard_name_template == '': num_shards = 1 if isinstance(file_path_prefix, str): file_path_prefix = StaticValueProvider(str, file_path_prefix) if isinstance(file_name_suffix, str): file_name_suffix = StaticValueProvider(str, file_name_suffix) self.file_path_prefix = file_path_prefix self.file_name_suffix = file_name_suffix self.num_shards = num_shards self.coder = coder self.shard_name_format = self._template_to_format(shard_name_template) self.shard_name_glob_format = self._template_to_glob_format( shard_name_template) self.compression_type = compression_type self.mime_type = mime_type self.skip_if_empty = skip_if_empty
def test_value_provider_options(self): class UserOptions(PipelineOptions): @classmethod def _add_argparse_args(cls, parser): parser.add_value_provider_argument( '--pot_vp_arg1', help='This flag is a value provider') parser.add_value_provider_argument('--pot_vp_arg2', default=1, type=int) parser.add_argument('--pot_non_vp_arg1', default=1, type=int) # Provide values: if not provided, the option becomes of the type runtime vp options = UserOptions(['--pot_vp_arg1', 'hello']) self.assertIsInstance(options.pot_vp_arg1, StaticValueProvider) self.assertIsInstance(options.pot_vp_arg2, RuntimeValueProvider) self.assertIsInstance(options.pot_non_vp_arg1, int) # Values can be overwritten options = UserOptions(pot_vp_arg1=5, pot_vp_arg2=StaticValueProvider(value_type=str, value='bye'), pot_non_vp_arg1=RuntimeValueProvider( option_name='foo', value_type=int, default_value=10)) self.assertEqual(options.pot_vp_arg1, 5) self.assertTrue(options.pot_vp_arg2.is_accessible(), '%s is not accessible' % options.pot_vp_arg2) self.assertEqual(options.pot_vp_arg2.get(), 'bye') self.assertFalse(options.pot_non_vp_arg1.is_accessible()) with self.assertRaises(RuntimeError): options.pot_non_vp_arg1.get()
def test_iobase_source(self): query = StaticValueProvider(str, self.query) with beam.Pipeline(argv=self.args) as p: result = ( p | 'read with value provider query' >> beam.io.ReadFromBigQuery( query=query, use_standard_sql=True, project=self.project)) assert_that(result, equal_to(self.TABLE_DATA))
def __init__(self, dataset): if isinstance(dataset, string_types): dataset = StaticValueProvider(str, dataset) self.dataset = dataset self.bigquery_client = None self.dataset_location = None self.load_jobs = {}
def __init__(self, dataset, write_disposition): # Can't use super(). # https://issues.apache.org/jira/browse/BEAM-6158?focusedCommentId=16919945 # super(DeleteDataSetTables, self).__init__(dataset) BigQueryDoFn.__init__(self, dataset) if isinstance(write_disposition, string_types): write_disposition = StaticValueProvider(str, write_disposition) self.write_disposition = write_disposition
def __init__(self, dataset, load_time): # Can't use super(). # https://issues.apache.org/jira/browse/BEAM-6158?focusedCommentId=16919945 # super(LoadToBigQuery, self).__init__(dataset) BigQueryDoFn.__init__(self, dataset) if isinstance(load_time, string_types): load_time = StaticValueProvider(str, load_time) self.load_time = load_time
def __init__(self, **kwargs): self._conn = None self._max_id = None self._current_id = 0 for k, v in kwargs.items(): if isinstance(v, (str, unicode)): v = StaticValueProvider(v) setattr(self, '_' + k, v)
def __init__(self, aggregator_dict=None, user_project_id="", user_job_id="", skip_preprocessing=False, target="", config=None): """Constructor of Prediction beam.DoFn class. Args: aggregator_dict: A dict of aggregators containing maps from counter name to the aggregator. user_project_id: A string. The project to which the logs will be sent. user_job_id: A string. The job to which the logs will be sent. skip_preprocessing: bool whether to skip preprocessing even when the metadata.yaml/metadata.json file exists. target: The execution engine to connect to. See target in tf.Session(). In most cases, users should not set the target. config: A ConfigProto proto with configuration options. See config in tf.Session() Side Inputs: model_dir: The directory containing the model to load and the checkpoint files to restore the session. """ self._target = target # TODO(user): Remove the "if" section when the direct use of # PredictionDoFn() is retired from ml_transform. if isinstance(user_project_id, basestring): user_project_id = StaticValueProvider(str, user_project_id) if isinstance(user_job_id, basestring): user_job_id = StaticValueProvider(str, user_job_id) self._user_project_id = user_project_id self._user_job_id = user_job_id self._skip_preprocessing = skip_preprocessing self._config = config self._aggregator_dict = aggregator_dict self._model_state = None self._cloud_logger = None # Metrics. self._model_load_seconds_distribution = beam.metrics.Metrics.distribution( self.__class__, "model_load_seconds")
def testValueProviderNamespace(self): self.vp_namespace = StaticValueProvider(str, 'vp_namespace') self.expected_namespace = 'vp_namespace' q = Query(kind='kind', project=self._PROJECT, namespace=self.vp_namespace) cq = q._to_client_query(self._test_client) self.assertEqual(self.expected_namespace, cq.namespace) _LOGGER.info('query: %s', q) # Test __repr__()
def __init__(self, file_path_prefix, coder, file_name_suffix='', num_shards=0, shard_name_template=None, mime_type='application/octet-stream', compression_type=CompressionTypes.AUTO): """ Raises: TypeError: if file path parameters are not a string or ValueProvider, or if compression_type is not member of CompressionTypes. ValueError: if shard_name_template is not of expected format. """ if not isinstance(file_path_prefix, (basestring, ValueProvider)): raise TypeError( 'file_path_prefix must be a string or ValueProvider;' 'got %r instead' % file_path_prefix) if not isinstance(file_name_suffix, (basestring, ValueProvider)): raise TypeError( 'file_name_suffix must be a string or ValueProvider;' 'got %r instead' % file_name_suffix) if not CompressionTypes.is_valid_compression_type(compression_type): raise TypeError( 'compression_type must be CompressionType object but ' 'was %s' % type(compression_type)) if shard_name_template is None: shard_name_template = DEFAULT_SHARD_NAME_TEMPLATE elif shard_name_template == '': num_shards = 1 if isinstance(file_path_prefix, basestring): file_path_prefix = StaticValueProvider(str, file_path_prefix) if isinstance(file_name_suffix, basestring): file_name_suffix = StaticValueProvider(str, file_name_suffix) self.file_path_prefix = file_path_prefix self.file_name_suffix = file_name_suffix self.num_shards = num_shards self.coder = coder self.shard_name_format = self._template_to_format(shard_name_template) self.compression_type = compression_type self.mime_type = mime_type
def __init__( self, # gcs_location=None, get_destination_uri=None, table=None, dataset=None, project=None, query=None, validate=False, coder=None, use_standard_sql=False, flatten_results=True, kms_key=None): if table is not None and query is not None: raise ValueError( 'Both a BigQuery table and a query were specified.' ' Please specify only one of these.') elif table is None and query is None: raise ValueError('A BigQuery table or a query must be specified') elif table is not None: self.table_reference = bigquery_tools.parse_table_reference( table, dataset, project) self.query = None self.use_legacy_sql = True else: if isinstance(query, (str, unicode)): query = StaticValueProvider(str, query) self.query = query # TODO(BEAM-1082): Change the internal flag to be standard_sql self.use_legacy_sql = not use_standard_sql self.table_reference = None self.get_destination_uri = get_destination_uri # self.gcs_location = gcs_location if isinstance(project, (str, unicode)): project = StaticValueProvider(str, query) self.project = project self.validate = validate self.flatten_results = flatten_results self.coder = coder or _JsonToDictCoder self.kms_key = kms_key self.split_result = None
def __init__(self, file_patterns, **kwargs): # Handle the templated values. if not isinstance(file_patterns, (basestring, ValueProvider)): raise TypeError('%s: file_pattern must be of type string' ' or ValueProvider; got %r instead' % (self.__class__.__name__, file_patterns)) if isinstance(file_patterns, basestring): file_patterns = StaticValueProvider(str, file_patterns) self._file_patterns = file_patterns self._sources = [] self._kwargs = kwargs
def test_iobase_source_with_query_and_filters(self): EXPECTED_TABLE_DATA = [{'string': u'привет'}] query = StaticValueProvider(str, self.query) with beam.Pipeline(argv=self.args) as p: result = (p | 'Direct read with query' >> beam.io.ReadFromBigQuery( method=beam.io.ReadFromBigQuery.Method.DIRECT_READ, row_restriction='number > 2', selected_fields=['string'], use_standard_sql=True, project=self.project, query=query)) assert_that(result, equal_to(EXPECTED_TABLE_DATA))
def testValueProviderFilters(self): self.vp_filters = [ [(StaticValueProvider(str, 'property_name'), StaticValueProvider(str, '='), StaticValueProvider(str, 'value'))], [(StaticValueProvider(str, 'property_name'), StaticValueProvider(str, '='), StaticValueProvider(str, 'value')), ('property_name', '=', 'value')], ] self.expected_filters = [ [('property_name', '=', 'value')], [('property_name', '=', 'value'), ('property_name', '=', 'value')], ] for vp_filter, exp_filter in zip(self.vp_filters, self.expected_filters): q = Query(kind='kind', project=self._PROJECT, namespace=self._NAMESPACE, filters=vp_filter) cq = q._to_client_query(self._test_client) self.assertEqual(exp_filter, cq.filters) logging.info('query: %s', q) # Test __repr__()