예제 #1
0
 def _flush_batch(self):
     # Flush the current batch of mutations to Cloud Datastore.
     helper.write_mutations(self._datastore, self._project,
                            self._mutations)
     logging.debug("Successfully wrote %d mutations.",
                   len(self._mutations))
     self._mutations = []
예제 #2
0
def monitor_migration_status(migration_status, migration_history_obj):
    """
    migration_history_obj must be pickled!

    Takes a pipeline status result and uses it to update MigrationHistory
    status as the migration runs.

    Calling forget() on this result will free up the worker to work on
    other tasks.
    """
    # Set up a datastore client
    project = PROJECT or 'meridianedit-staging'
    client = apache_helper.get_datastore(project)
    throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)

    if migration_status == 'DONE':
        migration_history_obj.properties.get('status').string_value = Status.success
    elif migration_status in ['FAILED', 'CANCELLED', 'CANCELLING']:
        migration_history_obj.properties.get('status').string_value = Status.failed
    elif migration_status in ['STARTING', 'RUNNING', 'UPDATED', 'DRAINING', 'DRAINED']:
        migration_history_obj.properties.get('status').string_value = Status.running
    elif migration_status in ['PENDING', 'STOPPED'] :
        migration_history_obj.properties.get('status').string_value = Status.waiting
    elif migration_status == 'UNKNOWN':
        migration_history_obj.properties.get('status').string_value = Status.unknown
    else:
        # Sometimes migration status equals none of these things. Just assume success so
        # we can kick off post-migration work. This is based on observation in the wild.
        migration_history_obj.properties.get('status').string_value = Status.success

    # Write the mutated entity to the datastore
    mutations = [Mutation(update=migration_history_obj)]
    apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback)
예제 #3
0
    def _flush_batch(self):
      # Flush the current batch of mutations to Cloud Datastore.
      _, latency_ms = helper.write_mutations(
          self._datastore, self._project, self._mutations,
          self._update_rpc_stats)
      logging.debug("Successfully wrote %d mutations in %dms.",
                    len(self._mutations), latency_ms)

      if not self._fixed_batch_size:
        now = time.time()
        self._batch_sizer.report_latency(now, latency_ms, len(self._mutations))
        self._target_batch_size = self._batch_sizer.get_batch_size(now)

      self._mutations = []
      self._mutations_size = 0
예제 #4
0
    def _flush_batch(self):
      # Flush the current batch of mutations to Cloud Datastore.
      _, latency_ms = helper.write_mutations(
          self._datastore, self._project, self._mutations,
          self._throttler, self._update_rpc_stats,
          throttle_delay=_Mutate._WRITE_BATCH_TARGET_LATENCY_MS//1000)
      logging.debug("Successfully wrote %d mutations in %dms.",
                    len(self._mutations), latency_ms)

      if not self._fixed_batch_size:
        now = time.time()*1000
        self._batch_sizer.report_latency(now, latency_ms, len(self._mutations))
        self._target_batch_size = self._batch_sizer.get_batch_size(now)

      self._mutations = []
      self._mutations_size = 0
예제 #5
0
    def _flush_batch(self):
      # Flush the current batch of mutations to Cloud Datastore.
      _, latency_ms = helper.write_mutations(
          self._datastore, self._project, self._mutations,
          self._throttler, self._update_rpc_stats,
          throttle_delay=util.WRITE_BATCH_TARGET_LATENCY_MS//1000)
      _LOGGER.debug("Successfully wrote %d mutations in %dms.",
                    len(self._mutations), latency_ms)

      if not self._fixed_batch_size:
        now = time.time()*1000
        self._batch_sizer.report_latency(now, latency_ms, len(self._mutations))
        self._target_batch_size = self._batch_sizer.get_batch_size(now)

      self._mutations = []
      self._mutations_size = 0
예제 #6
0
 def _flush_batch(self):
   # Flush the current batch of mutations to Cloud Datastore.
   helper.write_mutations(self._datastore, self._project, self._mutations)
   logging.debug("Successfully wrote %d mutations.", len(self._mutations))
   self._mutations = []
예제 #7
0
def run_data_migration():
    request_data = json.loads(request.get_data())

    # Required fields
    fields = [
        'name',
        'function_kwargs',
        'user'
    ]

    # Some basic validation
    for f in fields:
        if f not in request_data:
            resp_data = json.dumps(
                {
                    'error': 'The ' + f + ' field is required.'
                }
            )
            resp = Response(resp_data, status=400, mimetype='application/json')
            return resp

    if request_data['name'] not in migration.choices:
        resp_data = json.dumps(
            {
                'error': 'The migration name is not valid.'
            }
        )
        resp = Response(resp_data, status=400, mimetype='application/json')
        return resp

    migration_name = request_data['name']
    function_kwargs = request_data['function_kwargs'] or {}
    user = request_data['user']

    function_kwargs.update({'name': migration_name})

    # Create a MigrationHistory entity to keep track of the migration status
    # set the project
    project = PROJECT or 'meridianedit-staging'

    # Create entity key
    partition_id = entity_pb2.PartitionId(project_id=project, namespace_id="")
    migration_history_obj_id = datetime.now().strftime("%Y%m%d%H%M%S")
    path_element = entity_pb2.Key.PathElement(kind="MigrationHistory", name=migration_history_obj_id)
    key = entity_pb2.Key(partition_id=partition_id, path=[path_element])

    # Create entity and give it properties
    entity = entity_pb2.Entity(key=key)
    property_dict = {
        'name': migration_name,
        'function_kwargs': json.dumps(function_kwargs),
        'started_by': user,
        'status': 'running',
        'created': datetime.now()
    }
    datastore_helper.add_properties(entity, property_dict)

    # Add entity to datastore
    mutations = [Mutation(insert=entity)]
    client = apache_helper.get_datastore(project)
    throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)
    apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback)

    # Call the migration with any given function kwargs
    migration_kwargs = {
        'migration_history_obj': migration_history_obj_id,
    }
    migration_kwargs.update(function_kwargs)

    # Run the migration in a celery task worker to prevent it timing
    # out this connection. Also monitor the task so we can update
    # migration status.
    run_dataflow_migration.delay(pickle.dumps(entity), **migration_kwargs)

    resp_data = {
        'migration_history_obj_id': migration_history_obj_id
    }

    # A default 500 error message is returned if any of this breaks
    return Response(json.dumps(resp_data), status=200, mimetype='application/json')