def load_new_batch(flt, batch): if app.config["S3_ENABLED"]: strg = S3Storage(app.config) else: strg = LocalFSStorage(app.config) pages_processed = 0 shreds_created = 0 import_took = time.time() out_dir = os.path.join(app.config["SPLIT_OUT_DIR"], "batch_%s" % batch) strg.clear(out_dir) Shreds.objects(batch=batch).delete() for src_key in strg.list(flt): fname = strg.get_file(src_key) sheet_name = os.path.splitext(os.path.basename(fname))[0] echo("\n\nProcessing file %s from %s" % (fname, sheet_name)) sheet = SheetIO(fname, sheet_name, [GeometryFeatures, ColourFeatures], out_dir, "png") pages_processed += 1 for c in sheet.get_shreds(): c = c._asdict() c["id"] = "%s:%s_%s" % (batch, c["sheet"], c["name"]) c["usersCount"] = 0 c["batch"] = batch shreds_created += 1 del(c["simplified_contour"]) c["contour"] = c["contour"].tolist() imgs = "piece_fname", "features_fname", "piece_in_context_fname" for k in imgs: if k in c: res = strg.put_file(c[k]) c[k] = res try: Shreds.objects.create(**c) except bson.errors.InvalidDocument: echo(c) raise Batches( _id=batch, name=batch, shreds_created=shreds_created, pages_processed=pages_processed, import_took=int((time.time() - import_took) * 1000) ).save() Shreds.ensure_index(["name", "sheet", "batch"]) Shreds.ensure_index(["users_processed", "users_count", "batch"]) Shreds.ensure_index(["users_skipped", "users_count", "batch"])