def main(args): init_django() client = Client(get_project_by_id(id=args.project_id)) docs = [json.loads(text) for text in sys.stdin.readlines()] if args.nodups: existing_docs = set(client.get_doc_texts()) else: existing_docs = [] now = datetime.datetime.utcnow() added = 0 for doc in tqdm(docs): if doc['text'] in existing_docs: # skip the document, it was already added continue client.add_doc(doc['text'], doc.get('meta', {}), doc['labels'], priority=1000, updated=now) added += 1 print("Added:", added)
def main(args): init_django() project = get_project_by_id(id=args.project_id) client = Client(project) docs = client.get_approved_docs() for doc in docs: print(json.dumps(doc, ensure_ascii=False))
class Learner: def __init__(self, project): self.client = Client(project) def run(self, texts, max_add=None): labels = self.client.get_labels() docs = self.client.get_docs() print("Docs to train:", len(docs)) nlp = train_model(labels, docs) results = get_predictions(nlp, texts) # print([r['predicts'] for r in results]) results = sorted(results, key=lambda x: x['unsure'], reverse=True) self.client.del_unapproved(max_add) self.client.add_docs(results, max_add=max_add)
def main(args): init_django() project = get_project_by_id(id=args.project_id) while True: client = Client(project) model = train(client) docs = client.get_unapproved_docs(with_anno=False, limit=args.max_update) results = get_predictions(model, docs) results = sorted(results, key=lambda x: x['unsure'], reverse=True) now = datetime.datetime.utcnow() for r in tqdm(results, desc="Updating DB"): doc = r['document'] priority = int(1000 - r['unsure'] * 1000) # 0 is the most urgent, 1 is the least urgent updated = now - datetime.timedelta(seconds=priority) # print("Predicted labels:", r['labels']) status = client.update_doc(id=doc['id'], labels=r['labels'], priority=priority, updated=updated) if not status: print("Document", doc['id'], "was already marked as annotated. Skipping.") if not args.watch: break approved_count = client.get_unapproved_doc_count() while True: time.sleep(1) new_count = client.get_unapproved_doc_count() if new_count != approved_count: print("Unapproved documents count changed, now we have:", new_count) break
def __init__(self, project): self.client = Client(project)
def main(args): init_django() client = Client(get_project_by_id(id=args.project_id)) client.fix_unapproved()
def main(args): init_django() client = Client(get_project_by_id(id=args.project_id)) deleted = client.del_unapproved(args.delete_count) print("Deleted:", deleted)