def next(): if request.method == "POST": # TODO: helper tags = set(map(unicode.lower, request.form.getlist("tags"))) Cluster.objects(pk=request.form["_id"]).update_one( push__tags=ShredTags( user=g.user.id, tags=list(tags), recognizable_chars=request.form.get("recognizable_chars", ""), angle=int(request.form.get("angle", 0))), inc__users_count=1, add_to_set__users_processed=g.user.id) User.objects(pk=g.user.id).update_one( inc__processed=1, inc__tags_count=len(tags), add_to_set__tags=list(tags)) session["processed"] = session.get("processed", 0) + 1 for tag in tags: Tags.objects(pk=tag).update_one( set_on_insert__is_base=False, set_on_insert__created_by=g.user.id, set_on_insert__created_at=Tags().created_at, inc__usages=1, add_to_set__shreds=request.form["_id"], upsert=True) start = datetime.strptime(request.form["tagging_start"], '%Y-%m-%d %H:%M:%S.%f') end = datetime.utcnow() TaggingSpeed.objects.create( user=g.user.id, cluster=request.form["_id"], tags_count=len(tags), msec=(end - start).total_seconds() * 1000) cluster = Cluster.next_for_user(g.user, app.config['USERS_PER_SHRED']) auto_tags = cluster and cluster.get_auto_tags() or [] return render_template( "_shred.html", cluster=cluster, auto_tags=auto_tags, all_tags=get_tags(), tagging_start=datetime.utcnow(), # TODO: move to context processor processed_per_session=session.get("processed", 0), processed_total=User.objects(id=g.user.id).first()["processed"], rating=list(User.objects.order_by( "-processed").values_list("id")).index(g.user.id) + 1 )
def pages(): if request.method == "POST": shreds = set(request.form.getlist("shreds")) page_name = request.form.get("page_name") page_id = request.form.get("page_id") if page_id: page = Pages.objects.get(pk=page_id) else: page, _ = Pages.objects.get_or_create( created_by=g.user.id, name=page_name) page.update(add_to_set__shreds=shreds) for shred in Cluster.objects(id__in=shreds): tags = shred.get_user_tags(g.user) if tags is not None: tags.pages = list(set(tags.pages + [page])) # TODO: else 404? shred.save() pages = Pages.objects(created_by=g.user.id) return render_template( "_pages.html", pages=pages)
def create(self, payload): """ signup function :return: """ try: if payload.get('cluster_name') == "": return {'msg': "Cluster name is requerid", 'code': 0} records = Cluster.objects( cluster_name__exact=payload.get('cluster_name')) #_id = base_obj.insert(COLLECTIONS['USERS'], payload) if records.count() > 0: return {'msg': 'Cluster Aready Exists', 'code': 0} cluster = Cluster( cluster_name=payload.get('cluster_name'), cluster_id=payload.get('cluster_id'), ) _id = Cluster.objects.insert(cluster) return {'msg': _id.pk, 'code': 1} except Exception as err: print(err) return err
def test_skipping(self): self.create_user_and_login("user") user = User.objects.get(username="******") self.assertEqual(user.skipped, 0) self.assertEqual(user.processed, 0) self.client.post(url_for("fixtures.create_shreds")) res = self.client.get(url_for("next")) self.assert200(res) body = res.get_data(as_text=True) current_shred_id = first_shred_id = self.parse_shred_id(body) seen_shreds = {current_shred_id} for i in xrange(9): res = self.client.post(url_for("skip"), data={"_id": current_shred_id}, follow_redirects=True) body = res.get_data(as_text=True) self.assert200(res) current_shred_id = self.parse_shred_id(body) self.assertNotIn(current_shred_id, seen_shreds) seen_shreds.add(current_shred_id) self.assertEqual( len(Cluster.objects(id=first_shred_id).first().users_skipped), 1) res = self.client.post(url_for("skip"), data={"_id": current_shred_id}, follow_redirects=True) body = res.get_data(as_text=True) self.assert200(res) current_shred_id = self.parse_shred_id(body) self.assertIn(current_shred_id, seen_shreds) self.assertEqual( len(Cluster.objects(id=current_shred_id).first().users_skipped), 0) user.reload() self.assertEqual(user.skipped, 10) self.assertEqual(user.processed, 0)
def update(self, payload): try: cluster = Cluster.objects(_id=id).get() cluster.update(cluster_name=payload['cluster_name'], ) return {'msg': "User updated!", 'code': 1} except Exception as err: print(err) return {'msg': "User not updated!", 'code': 0}
def _fetch_normalized_shreds_tags(repeats): """Gets normalized tags for every cluster. Args: repeats: minimum number of tag occurences to be included in the result. Returns: Dict {obj_id: set(tags)} mapping cluster ids to sets of string tags. """ shreds = Cluster.objects().timeout(False).only( 'id', 'tags.tags', 'members.shred')[:SHREDS_CAP] shreds_tags = {} # TODO: on every iteration queries mongodb for # cluster->member->shred->auto_tags. That's too slow. for s in shreds: tags = s.get_repeated_tags(repeats) if tags: shreds_tags[s.id] = frozenset(tags) return shreds_tags
def skip(): Cluster.objects(pk=request.form["_id"]).update_one( add_to_set__users_skipped=g.user.id) User.objects(pk=g.user.id).update_one(inc__skipped=1) return redirect(url_for("next"))
def load_new_batch(fname_glob, batch): if app.config["S3_ENABLED"]: storage = S3Storage(app.config) else: storage = LocalFSStorage(app.config) pages_processed = 0 shreds_created = 0 out_dir = os.path.join(app.config["SPLIT_OUT_DIR"], "batch_%s" % batch) storage.clear(out_dir) Cluster.objects(batch=batch).delete() for src_key in storage.list(fname_glob): fname = storage.get_file(src_key) sheet_name = os.path.splitext(os.path.basename(fname))[0] echo("\n\nProcessing file %s from %s" % (fname, sheet_name)) sheet = SheetIO(fname, sheet_name, [GeometryFeatures, ColourFeatures], out_dir, "png") image_path_fields = ["piece_fname", "mask_fname", "piece_in_context_fname"] # TODO: Remove when all field names match unshred's. field_name_map = { # Unshred-tag name: unshred name. "mask_fname": "features_fname", } drop_fields = ['simplified_contour', 'img_roi'] drop_features = ['on_sheet_height', 'on_sheet_width', 'on_sheet_angle', 'bottommost', 'topmost', 'on_sheet_x', 'on_sheet_y'] pages_processed += 1 for shred in sheet.get_shreds(): shred = shred._asdict() shred["id"] = "%s:%s_%s" % (batch, shred["sheet"], shred["name"]) shreds_created += 1 def _convert_opencv_contour(contour): """Converts opencv contour to a list of pairs.""" return contour.reshape((len(contour), 2)).tolist() shred["contour"] = _convert_opencv_contour( shred["simplified_contour"]) shred['tags'] = shred.pop('tags_suggestions') for field in drop_fields: del shred[field] for field in drop_features: del shred['features'][field] cluster = {} cluster["id"] = shred["id"] cluster["users_count"] = 0 cluster["batch"] = batch for model_field_name in image_path_fields: import_field_name = field_name_map.get(model_field_name, model_field_name) image_path = shred.pop(import_field_name) res = storage.put_file(image_path) shred[model_field_name] = res cluster["parents"] = [] try: shred_obj = Shred.objects.create(**shred) cluster_member = ClusterMember(shred=shred_obj, position=[0, 0], angle=0) cluster["members"] = [cluster_member] Cluster.objects.create(**cluster) except bson.errors.InvalidDocument: echo(shred) raise Cluster.ensure_index(["users_processed", "users_count", "batch"]) Cluster.ensure_index(["users_skipped", "users_count", "batch"])