Example #1
0
def load_new_batch(flt, batch):
    if app.config["S3_ENABLED"]:
        strg = S3Storage(app.config)
    else:
        strg = LocalFSStorage(app.config)

    pages_processed = 0
    shreds_created = 0
    import_took = time.time()

    out_dir = os.path.join(app.config["SPLIT_OUT_DIR"], "batch_%s" % batch)
    strg.clear(out_dir)
    Shreds.objects(batch=batch).delete()

    for src_key in strg.list(flt):
        fname = strg.get_file(src_key)
        sheet_name = os.path.splitext(os.path.basename(fname))[0]

        echo("\n\nProcessing file %s from %s" % (fname, sheet_name))
        sheet = SheetIO(fname, sheet_name, [GeometryFeatures, ColourFeatures],
                        out_dir, "png")

        pages_processed += 1

        for c in sheet.get_shreds():
            c = c._asdict()
            c["id"] = "%s:%s_%s" % (batch, c["sheet"], c["name"])
            c["usersCount"] = 0
            c["batch"] = batch
            shreds_created += 1

            del(c["simplified_contour"])
            c["contour"] = c["contour"].tolist()

            imgs = "piece_fname", "features_fname", "piece_in_context_fname"

            for k in imgs:
                if k in c:
                    res = strg.put_file(c[k])
                    c[k] = res

            try:
                Shreds.objects.create(**c)
            except bson.errors.InvalidDocument:
                echo(c)
                raise

    Batches(
        _id=batch,
        name=batch,
        shreds_created=shreds_created,
        pages_processed=pages_processed,
        import_took=int((time.time() - import_took) * 1000)
    ).save()

    Shreds.ensure_index(["name", "sheet", "batch"])
    Shreds.ensure_index(["users_processed", "users_count", "batch"])
    Shreds.ensure_index(["users_skipped", "users_count", "batch"])