Beispiel #1
0
def monitor_migration_status(migration_status, migration_history_obj):
    """
    migration_history_obj must be pickled!

    Takes a pipeline status result and uses it to update MigrationHistory
    status as the migration runs.

    Calling forget() on this result will free up the worker to work on
    other tasks.
    """
    # Set up a datastore client
    project = PROJECT or 'meridianedit-staging'
    client = apache_helper.get_datastore(project)
    throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)

    if migration_status == 'DONE':
        migration_history_obj.properties.get('status').string_value = Status.success
    elif migration_status in ['FAILED', 'CANCELLED', 'CANCELLING']:
        migration_history_obj.properties.get('status').string_value = Status.failed
    elif migration_status in ['STARTING', 'RUNNING', 'UPDATED', 'DRAINING', 'DRAINED']:
        migration_history_obj.properties.get('status').string_value = Status.running
    elif migration_status in ['PENDING', 'STOPPED'] :
        migration_history_obj.properties.get('status').string_value = Status.waiting
    elif migration_status == 'UNKNOWN':
        migration_history_obj.properties.get('status').string_value = Status.unknown
    else:
        # Sometimes migration status equals none of these things. Just assume success so
        # we can kick off post-migration work. This is based on observation in the wild.
        migration_history_obj.properties.get('status').string_value = Status.success

    # Write the mutated entity to the datastore
    mutations = [Mutation(update=migration_history_obj)]
    apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback)
Beispiel #2
0
    def get_namespaces(self):
        # Skip auth-ing to db in test operations
        if not self.argv:
            return ['4952435991248896_1']

        query_pb = query_pb2.Query()
        helper.set_kind(query_pb, "__namespace__")
        client = apache_helper.get_datastore(PROJECT)
        namespace_entities = apache_helper.fetch_entities(
            PROJECT, '', query_pb, client)

        namespaces = []
        for n in namespace_entities:
            # Get namespace name or id
            key_path = n.key.path[-1]
            if key_path.HasField('id'):
                name_or_id = key_path.id
            else:
                name_or_id = key_path.name

            # Avoid duplicates and test namespaces
            if len(str(name_or_id)) > 1 and name_or_id not in namespaces:
                namespaces.append(name_or_id)

        return namespaces
Beispiel #3
0
 def start_bundle(self):
   self._mutations = []
   self._mutations_size = 0
   self._datastore = helper.get_datastore(self._project)
   if self._fixed_batch_size:
     self._target_batch_size = self._fixed_batch_size
   else:
     self._batch_sizer = _Mutate._DynamicBatchSizer()
     self._target_batch_size = self._batch_sizer.get_batch_size(time.time())
Beispiel #4
0
 def start_bundle(self):
     self._datastore = helper.get_datastore(self._project)
Beispiel #5
0
 def start_bundle(self):
   self._datastore = helper.get_datastore(self._project)
Beispiel #6
0
 def start_bundle(self):
   self._mutations = []
   self._datastore = helper.get_datastore(self._project)
Beispiel #7
0
 def start_bundle(self):
     self._mutations = []
     self._datastore = helper.get_datastore(self._project)
Beispiel #8
0
def run_data_migration():
    request_data = json.loads(request.get_data())

    # Required fields
    fields = [
        'name',
        'function_kwargs',
        'user'
    ]

    # Some basic validation
    for f in fields:
        if f not in request_data:
            resp_data = json.dumps(
                {
                    'error': 'The ' + f + ' field is required.'
                }
            )
            resp = Response(resp_data, status=400, mimetype='application/json')
            return resp

    if request_data['name'] not in migration.choices:
        resp_data = json.dumps(
            {
                'error': 'The migration name is not valid.'
            }
        )
        resp = Response(resp_data, status=400, mimetype='application/json')
        return resp

    migration_name = request_data['name']
    function_kwargs = request_data['function_kwargs'] or {}
    user = request_data['user']

    function_kwargs.update({'name': migration_name})

    # Create a MigrationHistory entity to keep track of the migration status
    # set the project
    project = PROJECT or 'meridianedit-staging'

    # Create entity key
    partition_id = entity_pb2.PartitionId(project_id=project, namespace_id="")
    migration_history_obj_id = datetime.now().strftime("%Y%m%d%H%M%S")
    path_element = entity_pb2.Key.PathElement(kind="MigrationHistory", name=migration_history_obj_id)
    key = entity_pb2.Key(partition_id=partition_id, path=[path_element])

    # Create entity and give it properties
    entity = entity_pb2.Entity(key=key)
    property_dict = {
        'name': migration_name,
        'function_kwargs': json.dumps(function_kwargs),
        'started_by': user,
        'status': 'running',
        'created': datetime.now()
    }
    datastore_helper.add_properties(entity, property_dict)

    # Add entity to datastore
    mutations = [Mutation(insert=entity)]
    client = apache_helper.get_datastore(project)
    throttler = AdaptiveThrottler(window_ms=120000, bucket_ms=1000, overload_ratio=1.25)
    apache_helper.write_mutations(client, project, mutations, throttler, rpc_stats_callback)

    # Call the migration with any given function kwargs
    migration_kwargs = {
        'migration_history_obj': migration_history_obj_id,
    }
    migration_kwargs.update(function_kwargs)

    # Run the migration in a celery task worker to prevent it timing
    # out this connection. Also monitor the task so we can update
    # migration status.
    run_dataflow_migration.delay(pickle.dumps(entity), **migration_kwargs)

    resp_data = {
        'migration_history_obj_id': migration_history_obj_id
    }

    # A default 500 error message is returned if any of this breaks
    return Response(json.dumps(resp_data), status=200, mimetype='application/json')