def index_v2_page(doc_type): item_per_page = 50 page = request.args.get('p', 1) page = int(page) total = Doc.objects.filter(type=doc_type).count() total_page = math.ceil(total / item_per_page) paginator = Pagination(Doc.objects(type=doc_type).order_by('seq'), page, 50) docs = paginator.items docs_data = [] for doc in docs: item = doc.dump() item['sent_total'] = Sent.objects(doc=doc).count() item['progress'] = Annotation.objects(doc=doc, user=g.user, type='sentence').count() docs_data.append(item) pagination = { 'page': page, 'total_page': total_page, 'left': max(1, page - 5), 'right': min(page + 5, total_page), } return render_template('index.html', type=doc_type, docs=docs_data, g=g, pagination=pagination)
def generate_encrypted_file(seq_id): from itertools import cycle def str_xor(s1, s2): result = [] for (c1, c2) in zip(s1, cycle(s2)): result.append(str(ord(c1) ^ ord(c2))) return ",".join(result) try: doc = Doc.objects().get(seq=seq_id) sents = Sent.objects(doc=doc).order_by('index') except Exception: return data = { 'doc_id': str(doc.id), 'title': doc.title, 'seq': doc.seq, 'sents': [], } for sent in sents: data['sents'].append(sent.dump()) data = json.dumps(data) data = str_xor(data, config.Config.ENCRYPTION_KEY) file_path = os.path.abspath( os.path.dirname(__file__) + '/../data/encrypted/#{}_{}'.format(seq_id, doc.title)) with open(file_path, 'w') as f: f.write(data)
def doc_migration(): docs = Doc.objects().all() for doc in tqdm(docs): if doc.mturk: doc.type = 'mturk' else: doc.type = 'v1' doc.save()
def delete_doc(doc_id): doc = Doc.objects().get(id=doc_id) sents = Sent.objects(doc=doc).order_by('index') for sent in sents: sent.delete() annotations = Sent.objects(doc=doc) for annotation in annotations: annotation.delete() doc.delete()
def target_migration(): docs = Doc.objects().all() for doc in tqdm(docs): doc.text = doc.text.replace('<<TARGET>>', '(TARGET)') doc.save() sents = Sent.objects() for sent in tqdm(sents): sent.text = sent.text.replace('<<TARGET>>', '(TARGET)') sent.save()
def post_annotation(): data = request.get_json() doc = data['doc'] target_text = data['target_text'] index = data['index'] anchor_offset = data['anchor_offset'] focus_offset = data['focus_offset'] type = data['type'] basket = data['basket'] doc = Doc.objects().get(id=doc) sent = Sent.objects().get(doc=doc, index=index) user = g.user target_sent = Sent.objects().get(doc=doc, index=index) # In sentence, filter logic have to be changed if type == 'sentence': annotations = Annotation.objects.filter(doc=doc, sent=sent, index=index, user=g.user, type=type) else: annotations = Annotation.objects.filter(doc=doc, sent=sent, index=index, user=g.user, type=type, anchor_offset=anchor_offset) if annotations.count() > 0: annotation = annotations[0] else: annotation = Annotation(doc=doc, sent=sent, user=user, index=index, type=type, anchor_offset=anchor_offset) annotation.anchor_offset = anchor_offset annotation.focus_offset = focus_offset annotation.entire_text = target_sent.text annotation.target_text = target_text annotation.basket = basket annotation.ip = request.remote_addr annotation.save() return json.dumps({ 'annotation': annotation.dump(), })
def get_annotation(doc_id): try: doc = Doc.objects().get(id=doc_id) annotations = Annotation.objects(doc=doc, user=g.user) except Exception as e: return Response('not found', status=404) data = [] for annotation in annotations: data.append(annotation.dump()) return json.dumps({ 'annotations': data, })
def duplicate_doc(from_type='v2', to_type='v3'): docs = Doc.objects(type=from_type).all() for doc in tqdm(docs): title = doc.title.replace('TARGET_ONLY', to_type) new_doc = Doc(title=title, text=doc.text, source=doc.source, type=to_type) new_doc.seq = Doc.objects.count() + 1 new_doc.save() sents = Sent.objects(doc=doc).all() for sent in sents: Sent(index=sent.index, text=sent.text, doc=new_doc).save()
def delete_doc_type(doc_type='v3'): docs = Doc.objects(type=doc_type).all() for doc in tqdm(docs): delete_doc(doc.id)
def generate_encrypted_files(): docs = Doc.objects().all() for doc in tqdm(docs): if not (doc.type == 'v1' or doc.type == 'v2' or doc.type == 'v3'): continue generate_encrypted_file(seq_id=doc.seq)