Ejemplo n.º 1
0
    def handle(self, files, migrate=False, num_workers=10, **options):
        set_max_connections(num_workers)
        blob_db = get_blob_db()
        if not isinstance(blob_db, MigratingBlobDB):
            raise CommandError(
                "Expected to find migrating blob db backend (got %r)" %
                blob_db)
        old_db = blob_db.old_db
        new_db = blob_db.new_db
        ignored = 0

        try:
            pool = Pool(size=num_workers)
            for filepath in files:
                print("Processing {}".format(filepath))
                with open(filepath, encoding='utf-8') as fh:
                    for line in fh:
                        if not line:
                            continue
                        try:
                            rec = json.loads(line)
                        except ValueError:
                            ignored += 1
                            print(("Ignore {}", line))
                            continue
                        pool.spawn(process, rec, old_db, new_db, migrate)

            print("CTRL+C to abort")
            while not pool.join(timeout=10):
                print("waiting for {} workers to finish...".format(len(pool)))
        except KeyboardInterrupt:
            pass

        if ignored:
            print("Ignored {} malformed records".format(ignored))
        for type_code, stats in sorted(Stats.items.items()):
            try:
                group = BLOB_MIXIN_MODELS[type_code].__name__
            except KeyError:
                group = CODES.name_of(type_code, "type_code %s" % type_code)
            total = stats.new + stats.old + stats.noref + stats.lost
            print("{}: checked {} records".format(group, total))
            print("  Found in new db: {}".format(stats.new))
            print("  Found in old db: {}".format(stats.old))
            print("  Not referenced: {}".format(stats.noref))
            print("  Not found: {}".format(stats.lost))
Ejemplo n.º 2
0
    def handle(self, files, migrate=False, num_workers=10, **options):
        set_max_connections(num_workers)
        blob_db = get_blob_db()
        if not isinstance(blob_db, MigratingBlobDB):
            raise CommandError(
                "Expected to find migrating blob db backend (got %r)" % blob_db)
        old_db = blob_db.old_db
        new_db = blob_db.new_db
        ignored = 0

        try:
            pool = Pool(size=num_workers)
            for filepath in files:
                print("Processing {}".format(filepath))
                with open(filepath, encoding='utf-8') as fh:
                    for line in fh:
                        if not line:
                            continue
                        try:
                            rec = json.loads(line)
                        except ValueError:
                            ignored += 1
                            print(("Ignore {}", line))
                            continue
                        pool.spawn(process, rec, old_db, new_db, migrate)

            print("CTRL+C to abort")
            while not pool.join(timeout=10):
                print("waiting for {} workers to finish...".format(len(pool)))
        except KeyboardInterrupt:
            pass

        if ignored:
            print("Ignored {} malformed records".format(ignored))
        for type_code, stats in sorted(Stats.items.items()):
            try:
                group = BLOB_MIXIN_MODELS[type_code].__name__
            except KeyError:
                group = CODES.name_of(type_code, "type_code %s" % type_code)
            total = stats.new + stats.old + stats.noref + stats.lost
            print("{}: checked {} records".format(group, total))
            print("  Found in new db: {}".format(stats.new))
            print("  Found in old db: {}".format(stats.old))
            print("  Not referenced: {}".format(stats.noref))
            print("  Not found: {}".format(stats.lost))
Ejemplo n.º 3
0
    def handle(self, slug, log_dir=None, **options):
        try:
            migrator = MIGRATIONS[slug]
        except KeyError:
            raise CommandError(USAGE)
        # drop options not added by this command
        for name in list(options):
            if name not in self.option_names:
                options.pop(name)
        if not migrator.has_worker_pool:
            num_workers = options.pop("num_workers")
            if num_workers != DEFAULT_WORKER_POOL_SIZE:
                print("--num-workers={} ignored because this migration "
                      "does not use a worker pool".format(num_workers))
        elif options["num_workers"] > DEFAULT_BOTOCORE_MAX_POOL_CONNECTIONS:
            set_max_connections(options["num_workers"])

        if "date_range" in options:
            rng = options["date_range"]
            if rng is None:
                options.pop("date_range")
            else:
                if "-" not in rng:
                    rng = (None, get_date(rng))
                else:
                    rng = rng.split("-")
                    if len(rng) != 2:
                        raise CommandError("bad date range: {}".format(rng))
                    rng = tuple(get_date(v) for v in rng)
                # date_range is a tuple containing two date values
                # a value of None means that side of the range is unbounded
                options["date_range"] = rng

        if log_dir is None:
            summary_file = log_file = None
        else:
            now = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
            summary_file = os.path.join(log_dir,
                "{}-blob-migration-{}-summary.txt".format(slug, now))
            log_file = os.path.join(log_dir,
                "{}-blob-migration-{}.txt".format(slug, now))
            assert not os.path.exists(summary_file), summary_file
            assert not os.path.exists(log_file), log_file

        def _migrate():
            with tee_output(summary_file):
                try:
                    total, skips = migrator.migrate(log_file, **options)
                    if skips:
                        sys.exit(skips)
                except KeyboardInterrupt:
                    print("stopped by operator")
                    if options.get('date_range'):
                        print("while processing date range {}".format(options['date_range']))
                    sys.exit(1)

        process_day_by_day = options.pop('process_day_by_day')
        if 'date_range' in options and process_day_by_day:
            start, end = options.pop('date_range')
            num_days = (end - start).days
            for day in range(num_days + 1):
                date = start + timedelta(days=day)
                options['date_range'] = (date, date)
                print("Migrating for date {} ".format(date))
                _migrate()
                print("Finished migration for date {} ".format(date))
        else:
            _migrate()