def upload_task(self, passage): passage_out = self.create_passage(text=to_text(passage, sentences=False)[0], type="PUBLIC", source=self.source) task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project, user=self.user, passage=passage_out, manager_comment=passage.ID, user_comment=passage.ID, parent=None, is_demo=False, is_active=True) tok_task_out = self.create_tokenization_task(**task_in) tok_user_task_in = dict(tok_task_out) tok_user_task_in.update( to_json(passage, return_dict=True, tok_task=True)) tok_user_task_out = self.submit_tokenization_task(**tok_user_task_in) task_in.update(parent=tok_task_out, type="ANNOTATION") ann_user_task_in = self.create_annotation_task(**task_in) ann_user_task_in.update( to_json(passage, return_dict=True, tok_task=tok_user_task_out, all_categories=self.layer["categories"])) return self.submit_annotation_task(**ann_user_task_in)
def upload_task(self, passage, log=None, submit=True, ids=None, upload=True): if ids: passage_id, tok_id, ann_id = ids[passage.ID] passage_out = self.get_passage(passage_id) tok_user_task_out = tok_task_out = self.get_user_task(tok_id) ann_user_task_in = self.get_user_task(ann_id) else: passage_out = self.create_passage( text=to_text(passage, sentences=False)[0], type="PUBLIC", source=self.source, external_id=passage.ID) if upload else passage task_in = dict(type="TOKENIZATION", status="ONGOING", project=self.project, user=self.user, passage=passage_out, manager_comment=passage.ID, user_comment=passage.ID, parent=None, is_demo=False, is_active=True) tok_task_out = self.create_task(**task_in) if upload else task_in tok_user_task_in = dict(tok_task_out) tok_user_task_in.update( to_json(passage, return_dict=True, tok_task=True)) tok_user_task_out = self.submit_task( **tok_user_task_in) if upload else tok_user_task_in task_in.update(parent=tok_task_out, type="ANNOTATION") ann_user_task_in = self.create_task( **task_in) if upload else task_in ann_user_task_in.update( to_json(passage, return_dict=True, tok_task=tok_user_task_out, all_categories=self.layer["categories"])) ann_user_task_out = self.submit_task( **ann_user_task_in, submit=submit) if upload else ann_user_task_in if log: print(passage.ID, passage_out["id"], tok_task_out["id"], ann_user_task_out["id"], file=log, sep="\t", flush=True) return ann_user_task_out
def upload_task(self, passage): passage_out = self.create_passage(text=to_text(passage, sentences=False)[0], type="PUBLIC", source=self.source) task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project, user=self.user, passage=passage_out, manager_comment=passage.ID, user_comment=passage.ID, parent=None, is_demo=False, is_active=True) tok_task_out = self.create_tokenization_task(**task_in) tok_user_task_in = dict(tok_task_out) tok_user_task_in.update(to_json(passage, return_dict=True, tok_task=True)) tok_user_task_out = self.submit_tokenization_task(**tok_user_task_in) task_in.update(parent=tok_task_out, type="ANNOTATION") ann_user_task_in = self.create_annotation_task(**task_in) ann_user_task_in.update( to_json(passage, return_dict=True, tok_task=tok_user_task_out, all_categories=self.layer["categories"])) return self.submit_annotation_task(**ann_user_task_in)
def upload_passage(self, external_id, tokens): assert external_id, "Missing external ID for passage %s" % tokens assert tokens, "Empty passage %s" % external_id passage_out = self.create_passage(text=" ".join(tokens), external_id=external_id, type="PUBLIC", source=self.source) task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project, user=self.user, passage=passage_out, manager_comment="External ID: " + external_id, user_comment="", parent=None, is_demo=False, is_active=True) tok_task_out = self.create_tokenization_task(**task_in) tok_user_task_in = dict(tok_task_out) passage = list(from_text(tokens, tokenized=True))[0] tok_user_task_in.update( to_json(passage, return_dict=True, tok_task=True)) self.submit_tokenization_task(**tok_user_task_in) task_in = dict(type="ANNOTATION", status="NOT_STARTED", project=self.project, user=self.annotation_user, passage=tok_task_out["passage"], manager_comment="External ID: " + external_id, user_comment="", parent=tok_task_out, is_demo=False, is_active=True) self.create_annotation_task(**task_in) print("Uploaded passage " + external_id + " successfully")
def main(args): os.makedirs(args.outdir, exist_ok=True) for passage in get_passages_with_progress_bar(args.filenames): site_filename = os.path.join(args.outdir, passage.ID + ".json") with open(site_filename, "w", encoding="utf-8") as f: print("\n".join(convert.to_json(passage)), file=f) if args.verbose: with external_write_mode(): print("Wrote '%s'" % site_filename)
def upload_streussel_passage_file(self, filenames, log=None, **kwargs): del kwargs log_h = open(log, "w", encoding="utf-8") if log else None with open(filenames) as f_all: for filename in f_all: passage_text = "" external_id = "None given" filename = filename.strip() with open(filename, encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue elif line.startswith("#"): fields = line.split() if len(fields) != 4 or fields[1] != "sent_id": print("FORMAT ERROR in " + filename, file=sys.stderr) else: external_id = fields[3].split("-")[1] else: passage_text = passage_text + " " + line passage_out = self.create_passage(text=passage_text.strip(), external_id=external_id, type="PUBLIC", source=self.source) task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project, user=self.user, passage=passage_out, manager_comment="External ID: " + external_id, user_comment="", parent=None, is_demo=False, is_active=True) tok_task_out = self.create_task(**task_in) tok_user_task_in = dict(tok_task_out) passage = list(from_text(passage_text.split(), tokenized=True))[0] tok_user_task_in.update( to_json(passage, return_dict=True, tok_task=True)) self.submit_task(**tok_user_task_in) print("Uploaded passage " + filename + " successfully.", file=sys.stderr) if log: print(filename.split(".")[-2], passage_out["id"], tok_task_out["id"], file=log_h, sep="\t") if log: log_h.close()
def upload_passage(self, external_id, tokens): assert external_id, "Missing external ID for passage %s" % tokens assert tokens, "Empty passage %s" % external_id passage_out = self.create_passage(text=" ".join(tokens), external_id=external_id, type="PUBLIC", source=self.source) task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project, user=self.user, passage=passage_out, manager_comment="External ID: "+external_id, user_comment="", parent=None, is_demo=False, is_active=True) tok_task_out = self.create_task(**task_in) tok_user_task_in = dict(tok_task_out) passage = list(from_text(tokens, tokenized=True))[0] tok_user_task_in.update(to_json(passage, return_dict=True, tok_task=True)) self.submit_task(**tok_user_task_in) task_in = dict(type="ANNOTATION", status="NOT_STARTED", project=self.project, user=self.annotation_user, passage=tok_task_out["passage"], manager_comment="External ID: "+external_id, user_comment=external_id, parent=tok_task_out, is_demo=False, is_active=True) self.create_task(**task_in) print("Uploaded passage "+external_id+" successfully")
def tokenize_and_upload(self, filename, log=None, lang=None, **kwargs): del kwargs log_h = open(log, "w", encoding="utf-8") if log else None prefix = os.path.splitext(os.path.basename(filename))[0].replace( " ", "_") with open(filename, encoding="utf-8") as f: for passage, text in from_text(f, passage_id=prefix, lang=lang, return_text=True): passage_out = self.create_passage(text=text, type="PUBLIC", source=self.source) task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project, user=self.user, passage=passage_out, manager_comment=passage.ID, user_comment="", parent=None, is_demo=False, is_active=True) tok_task_out = self.create_task(**task_in) tok_user_task_in = dict(tok_task_out) tok_user_task_in.update( to_json(passage, return_dict=True, tok_task=True)) self.submit_task(**tok_user_task_in) task_in.update(parent=tok_task_out, type="ANNOTATION") ann_user_task_out = self.create_task(**task_in) print("Uploaded passage " + filename + " successfully.", file=sys.stderr) if log: print(passage.ID, passage_out["id"], tok_task_out["id"], ann_user_task_out["id"], file=log_h, sep="\t", flush=True) if log: log_h.close()
def upload_streussel_passage_file(self, filenames, log=None, **kwargs): del kwargs log_h = open(log, "w", encoding="utf-8") if log else None with open(filenames) as f_all: for filename in f_all: passage_text = "" external_id = "None given" filename = filename.strip() with open(filename, encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue elif line.startswith("#"): fields = line.split() if len(fields) != 4 or fields[1] != "sent_id": print("FORMAT ERROR in " + filename, file=sys.stderr) else: external_id = fields[3].split("-")[1] else: passage_text = passage_text + " " + line passage_out = self.create_passage(text=passage_text.strip(), external_id=external_id, type="PUBLIC", source=self.source) task_in = dict(type="TOKENIZATION", status="SUBMITTED", project=self.project, user=self.user, passage=passage_out, manager_comment="External ID: " + external_id, user_comment="", parent=None, is_demo=False, is_active=True) tok_task_out = self.create_task(**task_in) tok_user_task_in = dict(tok_task_out) passage = list(from_text(passage_text.split(), tokenized=True))[0] tok_user_task_in.update(to_json(passage, return_dict=True, tok_task=True)) self.submit_task(**tok_user_task_in) print("Uploaded passage " + filename + " successfully.", file=sys.stderr) if log: print(filename.split(".")[-2], passage_out["id"], tok_task_out["id"], file=log_h, sep="\t") if log: log_h.close()