Example #1
0
def load_new_batch(flt, batch):
    if app.config["S3_ENABLED"]:
        strg = S3Storage(app.config)
    else:
        strg = LocalFSStorage(app.config)

    pages_processed = 0
    shreds_created = 0
    import_took = time.time()

    out_dir = os.path.join(app.config["SPLIT_OUT_DIR"], "batch_%s" % batch)
    strg.clear(out_dir)
    Shreds.objects(batch=batch).delete()

    for src_key in strg.list(flt):
        fname = strg.get_file(src_key)
        sheet_name = os.path.splitext(os.path.basename(fname))[0]

        echo("\n\nProcessing file %s from %s" % (fname, sheet_name))
        sheet = SheetIO(fname, sheet_name, [GeometryFeatures, ColourFeatures],
                        out_dir, "png")

        pages_processed += 1

        for c in sheet.get_shreds():
            c = c._asdict()
            c["id"] = "%s:%s_%s" % (batch, c["sheet"], c["name"])
            c["usersCount"] = 0
            c["batch"] = batch
            shreds_created += 1

            del(c["simplified_contour"])
            c["contour"] = c["contour"].tolist()

            imgs = "piece_fname", "features_fname", "piece_in_context_fname"

            for k in imgs:
                if k in c:
                    res = strg.put_file(c[k])
                    c[k] = res

            try:
                Shreds.objects.create(**c)
            except bson.errors.InvalidDocument:
                echo(c)
                raise

    Batches(
        _id=batch,
        name=batch,
        shreds_created=shreds_created,
        pages_processed=pages_processed,
        import_took=int((time.time() - import_took) * 1000)
    ).save()

    Shreds.ensure_index(["name", "sheet", "batch"])
    Shreds.ensure_index(["users_processed", "users_count", "batch"])
    Shreds.ensure_index(["users_skipped", "users_count", "batch"])
Example #2
0
def next():
    if request.method == "POST":
        # TODO: helper
        tags = set(map(unicode.lower, request.form.getlist("tags")))

        Shreds.objects(pk=request.form["_id"]).update_one(
            push__tags=ShredTags(
                user=g.user.id,
                tags=list(tags),
                recognizable_chars=request.form.get("recognizable_chars", ""),
                angle=int(request.form.get("angle", 0))),
            inc__users_count=1,
            add_to_set__users_processed=g.user.id)

        User.objects(pk=g.user.id).update_one(
            inc__processed=1, inc__tags_count=len(tags),
            add_to_set__tags=list(tags))

        session["processed"] = session.get("processed", 0) + 1

        for tag in tags:
            Tags.objects(pk=tag).update_one(
                set_on_insert__is_base=False,
                set_on_insert__created_by=g.user.id,
                set_on_insert__created_at=Tags().created_at,
                inc__usages=1,
                add_to_set__shreds=request.form["_id"],
                upsert=True)

        start = datetime.strptime(request.form["tagging_start"],
                                  '%Y-%m-%d %H:%M:%S.%f')
        end = datetime.utcnow()
        TaggingSpeed.objects.create(
            user=g.user.id,
            shred=request.form["_id"],
            tags_count=len(tags),
            msec=(end - start).total_seconds() * 1000)

    shred = get_next_shred()
    return render_template(
        "_shred.html",
        shred=shred,
        auto_tags=get_auto_tags(shred),
        all_tags=get_tags(),
        tagging_start=datetime.utcnow(),

        # TODO: move to context processor
        processed_per_session=session.get("processed", 0),
        processed_total=User.objects(id=g.user.id).first()["processed"],
        rating=list(User.objects.order_by(
            "-processed").values_list("id")).index(g.user.id) + 1
    )
Example #3
0
def pages():
    if request.method == "POST":
        shreds = set(request.form.getlist("shreds"))
        page_name = request.form.get("page_name")
        page_id = request.form.get("page_id")

        if page_id:
            page = Pages.objects.get(pk=page_id)
        else:
            page, _ = Pages.objects.get_or_create(
                created_by=g.user.id, name=page_name)

        page.update(add_to_set__shreds=shreds)

        for shred in Shreds.objects(id__in=shreds):
            tags = shred.get_user_tags(g.user)
            if tags is not None:
                tags.pages = list(set(tags.pages + [page]))
            # TODO: else 404?

            shred.save()

    pages = Pages.objects(created_by=g.user.id)

    return render_template(
        "_pages.html",
        pages=pages)
Example #4
0
def get_next_shred():
    shred = Shreds\
        .objects(users_processed__ne=g.user.id, users_skipped__ne=g.user.id,
                 users_count__lte=app.config["USERS_PER_SHRED"])\
        .order_by("batch", "users_count").first()

    if shred:
        return shred

    shred = Shreds\
        .objects(users_skipped=g.user.id,
                 users_count__lte=app.config["USERS_PER_SHRED"])\
        .order_by("batch", "users_count").first()

    if shred:
        Shreds.objects(id=shred.id).update_one(pull__users_skipped=g.user.id)

    return shred
Example #5
0
    def test_skipping(self):
        self.create_user_and_login("user")
        user = User.objects.get(username="******")
        self.assertEqual(user.skipped, 0)
        self.assertEqual(user.processed, 0)

        self.client.post(url_for("fixtures.create_shreds"))

        res = self.client.get(url_for("next"))
        self.assert200(res)
        body = res.get_data(as_text=True)

        current_shred_id = first_shred_id = self.parse_shred_id(body)

        for i in xrange(9):
            res = self.client.post(url_for("skip"),
                                   data={"_id": current_shred_id},
                                   follow_redirects=True)

            body = res.get_data(as_text=True)
            self.assert200(res)

            current_shred_id = self.parse_shred_id(body)
            self.assertNotEqual(current_shred_id, first_shred_id)

        self.assertEqual(
            len(Shreds.objects(id=first_shred_id).first().users_skipped), 1)

        res = self.client.post(url_for("skip"),
                               data={"_id": current_shred_id},
                               follow_redirects=True)

        body = res.get_data(as_text=True)
        self.assert200(res)

        current_shred_id = self.parse_shred_id(body)
        self.assertEqual(current_shred_id, first_shred_id)

        self.assertEqual(
            len(Shreds.objects(id=first_shred_id).first().users_skipped), 0)

        user.reload()
        self.assertEqual(user.skipped, 10)
        self.assertEqual(user.processed, 0)
Example #6
0
def fetch_normalized_shreds_tags(repeats):
    """
    Returns dictionary where keys are shreds ids and values are sets of
    filtered normalized tags.
    """
    shreds = Shreds.objects().only('id', 'tags.tags')[:SHREDS_CAP]
    shreds_tags = {}
    for s in shreds:
        tags = s.get_repeated_tags(repeats)
        if tags:
            shreds_tags[s.id] = tags
    return shreds_tags
Example #7
0
def skip():
    Shreds.objects(pk=request.form["_id"]).update_one(
        add_to_set__users_skipped=g.user.id)
    User.objects(pk=g.user.id).update_one(inc__skipped=1)

    return redirect(url_for("next"))