Ejemplo n.º 1
0
            config.SOURCES[chunk_config.source_shard]['config']['queue_key']
        ].delay(chunk_config)

    for _source_name, _source_config in config.SOURCES.items():
        _queue_key = _source_config['config']['queue_key']
        if _queue_key in MIGRATE_TASKS:
            continue
        if _source_config['type'] == 'mysql':
            _task_class = QueueChunksWorkerMysql
        elif _source_config['type'] == 'crate':
            _task_class = QueueChunksWorkerCrate
        else:
            raise UnrecoverableError('Source type %r unknown' % (_source_config['type'],))
        MIGRATE_TASKS[_queue_key] = celery_app.task(
            bind=True,
            name='shinkansen.worker.start_migration_' + _queue_key
        )(migration_task_wrapper(
            _task_class
        ))


# TODO(jpatrin): Apparently unused, remove?
def migrate_partition(
    partition_val, base_namespace, base_source_shard, base_destination_shard,
    force=False, requeue=False
):
    start = datetime.now()
    log.info('Migrating partition partition_val=%s shard=%s', partition_val, base_source_shard)
    for _, suffix in config.SHARD_SUFFIXES.items():
        namespace = base_namespace + suffix if base_namespace is not None and len(base_namespace) > 0 else ''
        source_shard = base_source_shard + suffix
        destination_shard = base_destination_shard + suffix
Ejemplo n.º 2
0
                '-i %s -p %r %s@%s "sudo bash -c \'rm -f %s\'"' % (
                    config.SSH_PRIVKEY, self.c.destination_ssh_port, config.SSH_USER, self.c.destination_host,
                    ssh.escape_double_quotes(ssh.escape_single_quotes(self.c.import_filename))))
            rm_cmd = subprocess.Popen(
                cmd,
                shell=True,
                stdin=subprocess.PIPE)
            rm_cmd.stdin.close()
            rm_cmd.wait()
            if rm_cmd.returncode != 0:
                raise CommandException('Removing file on destination server failed with exit code %r' % (
                    rm_cmd.returncode,))
        except Exception, e:
            # We catch and log all exceptions here to make this task idempotent. We DO NOT want this task to be
            # retried at this point as duplicate imports can fail on mysql and would cause us to corrupt the crate
            # chunk import records.
            self.log('Exception during removal of destination file, removal will not be retried %r import_filename=%s',
                     e, self.c.import_filename)


if config.QUEUE_SYSTEM == 'multiprocessing':
    queue_import_chunk = import_queue.put
elif config.QUEUE_SYSTEM == 'celery':
    # TODO(jpatrin): Need import queues per-shard for mysql and crate
    # TODO(jpatrin): Once we split this queue, keep track of the queues, their names, and their shards
    # for use in the health API
    queue_import_chunk = celery_app.task(
        bind=True,
        name='shinkansen.worker.import_chunk'
    )(migration_task_wrapper(ImportChunkWorker)).delay
Ejemplo n.º 3
0
        self.log('Pipe worker finished elapsed=%s', datetime.now() - start)


if config.QUEUE_SYSTEM == 'multiprocessing':
    PIPE_QUEUES = {}
    for _shard_config in config.SOURCES.values():
        if _shard_config['config']['queue_key'] in PIPE_QUEUES:
            continue
        PIPE_QUEUES[_shard_config['config']['queue_key']] = multiprocessing.JoinableQueue()

    def queue_pipe_chunk(chunk_config):
        PIPE_QUEUES[
            config.SOURCES[chunk_config.source_shard]['config']['queue_key']
        ].put(chunk_config)

elif config.QUEUE_SYSTEM == 'celery':
    PIPE_TASKS = {}
    for _shard_config in config.SOURCES.values():
        if _shard_config['config']['queue_key'] in PIPE_TASKS:
            continue
        PIPE_TASKS[_shard_config['config']['queue_key']] = celery_app.task(
            bind=True,
            name='shinkansen.worker.pipe_' + _shard_config['config']['queue_key']
        )(migration_task_wrapper(PipeChunkWorker))

    def queue_pipe_chunk(chunk_config):
        PIPE_TASKS[
            config.SOURCES[chunk_config.source_shard]['config']['queue_key']
        ].delay(chunk_config)
Ejemplo n.º 4
0
if config.QUEUE_SYSTEM == 'multiprocessing':
    VERIFY_QUEUES = {}
    for _shard_config in config.DESTINATIONS.values():
        if _shard_config['config']['queue_key'] in VERIFY_QUEUES:
            continue
        VERIFY_QUEUES[_shard_config['config']['queue_key']] = multiprocessing.JoinableQueue()

    def queue_verification(chunk_config):
        VERIFY_QUEUES[
            config.DESTINATIONS[chunk_config.destination_shard]['config']['queue_key']
        ].put(chunk_config)

elif config.QUEUE_SYSTEM == 'celery':
    VERIFY_TASKS = {}
    for _shard_config in config.DESTINATIONS.values():
        if _shard_config['config']['queue_key'] in VERIFY_TASKS:
            continue
        VERIFY_TASKS[_shard_config['config']['queue_key']] = celery_app.task(
            bind=True,
            name='shinkansen.worker.verify_' + _shard_config['config']['queue_key']
        )(migration_task_wrapper(VerifyWorker))

    def queue_verification(chunk_config):
        # TODO(jpatrin): Add a unique identifier to this verification task and store this as a "key" for the lock
        # the verifier acquires. If the id doesn't match then we know a verification task has been queued after us.
        # We want the last-queued verify task to win as the countdown below is meant to allow the destination to
        # catch up (become eventually consistent).
        VERIFY_TASKS[
            config.DESTINATIONS[chunk_config.destination_shard]['config']['queue_key']
        ].apply_async([chunk_config], countdown=config.VERIFICATION_DELAY)