config.SOURCES[chunk_config.source_shard]['config']['queue_key'] ].delay(chunk_config) for _source_name, _source_config in config.SOURCES.items(): _queue_key = _source_config['config']['queue_key'] if _queue_key in MIGRATE_TASKS: continue if _source_config['type'] == 'mysql': _task_class = QueueChunksWorkerMysql elif _source_config['type'] == 'crate': _task_class = QueueChunksWorkerCrate else: raise UnrecoverableError('Source type %r unknown' % (_source_config['type'],)) MIGRATE_TASKS[_queue_key] = celery_app.task( bind=True, name='shinkansen.worker.start_migration_' + _queue_key )(migration_task_wrapper( _task_class )) # TODO(jpatrin): Apparently unused, remove? def migrate_partition( partition_val, base_namespace, base_source_shard, base_destination_shard, force=False, requeue=False ): start = datetime.now() log.info('Migrating partition partition_val=%s shard=%s', partition_val, base_source_shard) for _, suffix in config.SHARD_SUFFIXES.items(): namespace = base_namespace + suffix if base_namespace is not None and len(base_namespace) > 0 else '' source_shard = base_source_shard + suffix destination_shard = base_destination_shard + suffix
'-i %s -p %r %s@%s "sudo bash -c \'rm -f %s\'"' % ( config.SSH_PRIVKEY, self.c.destination_ssh_port, config.SSH_USER, self.c.destination_host, ssh.escape_double_quotes(ssh.escape_single_quotes(self.c.import_filename)))) rm_cmd = subprocess.Popen( cmd, shell=True, stdin=subprocess.PIPE) rm_cmd.stdin.close() rm_cmd.wait() if rm_cmd.returncode != 0: raise CommandException('Removing file on destination server failed with exit code %r' % ( rm_cmd.returncode,)) except Exception, e: # We catch and log all exceptions here to make this task idempotent. We DO NOT want this task to be # retried at this point as duplicate imports can fail on mysql and would cause us to corrupt the crate # chunk import records. self.log('Exception during removal of destination file, removal will not be retried %r import_filename=%s', e, self.c.import_filename) if config.QUEUE_SYSTEM == 'multiprocessing': queue_import_chunk = import_queue.put elif config.QUEUE_SYSTEM == 'celery': # TODO(jpatrin): Need import queues per-shard for mysql and crate # TODO(jpatrin): Once we split this queue, keep track of the queues, their names, and their shards # for use in the health API queue_import_chunk = celery_app.task( bind=True, name='shinkansen.worker.import_chunk' )(migration_task_wrapper(ImportChunkWorker)).delay
self.log('Pipe worker finished elapsed=%s', datetime.now() - start) if config.QUEUE_SYSTEM == 'multiprocessing': PIPE_QUEUES = {} for _shard_config in config.SOURCES.values(): if _shard_config['config']['queue_key'] in PIPE_QUEUES: continue PIPE_QUEUES[_shard_config['config']['queue_key']] = multiprocessing.JoinableQueue() def queue_pipe_chunk(chunk_config): PIPE_QUEUES[ config.SOURCES[chunk_config.source_shard]['config']['queue_key'] ].put(chunk_config) elif config.QUEUE_SYSTEM == 'celery': PIPE_TASKS = {} for _shard_config in config.SOURCES.values(): if _shard_config['config']['queue_key'] in PIPE_TASKS: continue PIPE_TASKS[_shard_config['config']['queue_key']] = celery_app.task( bind=True, name='shinkansen.worker.pipe_' + _shard_config['config']['queue_key'] )(migration_task_wrapper(PipeChunkWorker)) def queue_pipe_chunk(chunk_config): PIPE_TASKS[ config.SOURCES[chunk_config.source_shard]['config']['queue_key'] ].delay(chunk_config)
if config.QUEUE_SYSTEM == 'multiprocessing': VERIFY_QUEUES = {} for _shard_config in config.DESTINATIONS.values(): if _shard_config['config']['queue_key'] in VERIFY_QUEUES: continue VERIFY_QUEUES[_shard_config['config']['queue_key']] = multiprocessing.JoinableQueue() def queue_verification(chunk_config): VERIFY_QUEUES[ config.DESTINATIONS[chunk_config.destination_shard]['config']['queue_key'] ].put(chunk_config) elif config.QUEUE_SYSTEM == 'celery': VERIFY_TASKS = {} for _shard_config in config.DESTINATIONS.values(): if _shard_config['config']['queue_key'] in VERIFY_TASKS: continue VERIFY_TASKS[_shard_config['config']['queue_key']] = celery_app.task( bind=True, name='shinkansen.worker.verify_' + _shard_config['config']['queue_key'] )(migration_task_wrapper(VerifyWorker)) def queue_verification(chunk_config): # TODO(jpatrin): Add a unique identifier to this verification task and store this as a "key" for the lock # the verifier acquires. If the id doesn't match then we know a verification task has been queued after us. # We want the last-queued verify task to win as the countdown below is meant to allow the destination to # catch up (become eventually consistent). VERIFY_TASKS[ config.DESTINATIONS[chunk_config.destination_shard]['config']['queue_key'] ].apply_async([chunk_config], countdown=config.VERIFICATION_DELAY)