def handle(self, files, migrate=False, num_workers=10, **options): set_max_connections(num_workers) blob_db = get_blob_db() if not isinstance(blob_db, MigratingBlobDB): raise CommandError( "Expected to find migrating blob db backend (got %r)" % blob_db) old_db = blob_db.old_db new_db = blob_db.new_db ignored = 0 try: pool = Pool(size=num_workers) for filepath in files: print("Processing {}".format(filepath)) with open(filepath, encoding='utf-8') as fh: for line in fh: if not line: continue try: rec = json.loads(line) except ValueError: ignored += 1 print(("Ignore {}", line)) continue pool.spawn(process, rec, old_db, new_db, migrate) print("CTRL+C to abort") while not pool.join(timeout=10): print("waiting for {} workers to finish...".format(len(pool))) except KeyboardInterrupt: pass if ignored: print("Ignored {} malformed records".format(ignored)) for type_code, stats in sorted(Stats.items.items()): try: group = BLOB_MIXIN_MODELS[type_code].__name__ except KeyError: group = CODES.name_of(type_code, "type_code %s" % type_code) total = stats.new + stats.old + stats.noref + stats.lost print("{}: checked {} records".format(group, total)) print(" Found in new db: {}".format(stats.new)) print(" Found in old db: {}".format(stats.old)) print(" Not referenced: {}".format(stats.noref)) print(" Not found: {}".format(stats.lost))
def handle(self, slug, log_dir=None, **options): try: migrator = MIGRATIONS[slug] except KeyError: raise CommandError(USAGE) # drop options not added by this command for name in list(options): if name not in self.option_names: options.pop(name) if not migrator.has_worker_pool: num_workers = options.pop("num_workers") if num_workers != DEFAULT_WORKER_POOL_SIZE: print("--num-workers={} ignored because this migration " "does not use a worker pool".format(num_workers)) elif options["num_workers"] > DEFAULT_BOTOCORE_MAX_POOL_CONNECTIONS: set_max_connections(options["num_workers"]) if "date_range" in options: rng = options["date_range"] if rng is None: options.pop("date_range") else: if "-" not in rng: rng = (None, get_date(rng)) else: rng = rng.split("-") if len(rng) != 2: raise CommandError("bad date range: {}".format(rng)) rng = tuple(get_date(v) for v in rng) # date_range is a tuple containing two date values # a value of None means that side of the range is unbounded options["date_range"] = rng if log_dir is None: summary_file = log_file = None else: now = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ") summary_file = os.path.join(log_dir, "{}-blob-migration-{}-summary.txt".format(slug, now)) log_file = os.path.join(log_dir, "{}-blob-migration-{}.txt".format(slug, now)) assert not os.path.exists(summary_file), summary_file assert not os.path.exists(log_file), log_file def _migrate(): with tee_output(summary_file): try: total, skips = migrator.migrate(log_file, **options) if skips: sys.exit(skips) except KeyboardInterrupt: print("stopped by operator") if options.get('date_range'): print("while processing date range {}".format(options['date_range'])) sys.exit(1) process_day_by_day = options.pop('process_day_by_day') if 'date_range' in options and process_day_by_day: start, end = options.pop('date_range') num_days = (end - start).days for day in range(num_days + 1): date = start + timedelta(days=day) options['date_range'] = (date, date) print("Migrating for date {} ".format(date)) _migrate() print("Finished migration for date {} ".format(date)) else: _migrate()