Example #1
0
def model_datastoreio():
  """Using a Read and Write transform to read/write to Cloud Datastore."""

  import uuid
  from google.datastore.v1 import entity_pb2
  from google.datastore.v1 import query_pb2
  import googledatastore
  import apache_beam as beam
  from apache_beam.utils.pipeline_options import PipelineOptions
  from apache_beam.io.datastore.v1.datastoreio import ReadFromDatastore
  from apache_beam.io.datastore.v1.datastoreio import WriteToDatastore

  project = 'my_project'
  kind = 'my_kind'
  query = query_pb2.Query()
  query.kind.add().name = kind

  # [START model_datastoreio_read]
  p = beam.Pipeline(options=PipelineOptions())
  entities = p | 'Read From Datastore' >> ReadFromDatastore(project, query)
  # [END model_datastoreio_read]

  # [START model_datastoreio_write]
  p = beam.Pipeline(options=PipelineOptions())
  musicians = p | 'Musicians' >> beam.Create(
      ['Mozart', 'Chopin', 'Beethoven', 'Vivaldi'])

  def to_entity(content):
    entity = entity_pb2.Entity()
    googledatastore.helper.add_key_path(entity.key, kind, str(uuid.uuid4()))
    googledatastore.helper.add_properties(entity, {'content': unicode(content)})
    return entity

  entities = musicians | 'To Entity' >> beam.Map(to_entity)
  entities | 'Write To Datastore' >> WriteToDatastore(project)
def write_to_datastore(project, user_options, pipeline_options):
    """Creates a pipeline that writes entities to Cloud Datastore."""
    p = beam.Pipeline(options=pipeline_options)

    # pylint: disable=expression-not-assigned
    (p
     | 'read' >> ReadFromText(user_options.input)
     | 'create entity' >> beam.Map(
         EntityWrapper(user_options.namespace, user_options.kind,
                       user_options.ancestor).make_entity)
     | 'write to datastore' >> WriteToDatastore(project))

    # Actually run the pipeline (all operations above are deferred).
    p.run().wait_until_finish()