Example #1
0
def main():
    with open('../../tests/sample_text_annotation2.json', 'r') as myfile:
        data = myfile.read()
    ta = TextAnnotation(json_str=data)
    print(ta)
    print(ta.get_views)
    # TextVizualization.do(ta, "MENTION")
    TextVizualization.do(ta, "SRL_VERB", True)
Example #2
0
def load_ta_from_jsons(json_dir):
    doc2ta = {}
    tafiles = glob.glob(json_dir + "/*")
    for tafile in tafiles:
        ta = TextAnnotation(json_str=open(tafile).read())
        docid = tafile.split("/")[-1]
        doc2ta[docid] = ta
    return doc2ta
Example #3
0
 def inference_on_text(self, text: str) -> TextAnnotation:
     ccgdoc_dict = self.inf_reader.mention_detector.get_mentions_from_text(text)
     ccgdoc = TextAnnotation(json.dumps(ccgdoc_dict))
     # return self.inference_on_ta(ccgdoc)
     wiki_view = self.create_view(ccgdoc)
     wiki_view.view_name = self.provided_view
     ccgdoc.view_dictionary[self.provided_view] = wiki_view
     return ccgdoc
Example #4
0
 def get_text_annotation_for_model(self, text: str, required_views: List[str]) -> TextAnnotation:
     # TODO This is a problem with ccg_nlpy text annotation, it does not like newlines (e.g., marking paragraphs)
     text = text.replace("\n", "")
     pretokenized_text = [text.split(" ")]
     required_views = ",".join(required_views)
     ta_json = self.pipeline.call_server_pretokenized(pretokenized_text=pretokenized_text, views=required_views)
     ta = TextAnnotation(json_str=ta_json)
     return ta
Example #5
0
    def get_view_from_model(self, docta:TextAnnotation) -> View:
        """
        This method is where your model will create the new view that will get added to the text annotation.
        The input docta text annotation should already contain all the views that are needed by your model.
        :param docta:
        :return:
        """
        # This upcases each token. Test for TokenLabelView
        new_view = copy.deepcopy(docta.get_view("TOKENS"))
        tokens = docta.get_tokens
        for token, cons in zip(tokens, new_view.cons_list):
            cons["label"] = token.upper()

        # # This replaces each NER with its upcased tokens. Test for SpanLabelView
        # new_view = copy.deepcopy(docta.get_view("NER_CONLL"))
        # for nercons in new_view.cons_list:
        #     nercons["label"] = nercons["tokens"].upper()
        return new_view
Example #6
0
                                 debug=True)
    wiki_cg.load_probs("data/{}wiki/probmap/{}wiki-{}".format(
        args["lang"], args["lang"], args["date"]))

    # pipeline = remote_pipeline.RemotePipeline(server_api='http://macniece.seas.upenn.edu:4001')
    pipeline = local_pipeline.LocalPipeline()

    md = SpacyNER_Annotator(lang=args["lang"], pipeline=pipeline)
    # md = PoormanMentionDetector(cg=wiki_cg, pipeline=pipeline)

    inf_reader = InferenceReader(args=args,
                                 mention_detector=md,
                                 cg=wiki_cg,
                                 batch_size=args["batch_size"],
                                 loader=loader,
                                 num_cands=args["ncands"],
                                 strict_context=False,
                                 usecoh=args["usecoh"])

    test_file = args["test_doc"]
    # print("[#] Test Mentions File : {}".format(test_file))
    with open(test_file, 'r') as f:
        lines = f.read().strip().split("\n")
    assert len(lines) == 1, "Only support inference for single doc"
    doctext = lines[0].strip()

    ccgdoc_dict = inf_reader.mention_detector.get_mentions_from_text(doctext)
    ccgdoc = TextAnnotation(json.dumps(ccgdoc_dict))

    inf_reader.set_test_doc(ccgdoc=ccgdoc)
if __name__ == '__main__':
    # example text
    print('---')
    print('input text')
    print('')

    # text = "Chris Manning is a nice person. Chris wrote a simple sentence. He also gives oranges to people."
    text = "奧巴馬的母親斯坦利·安·鄧納姆,在1942年11月29日,生於堪薩斯州威奇托圣方濟各医院,主要是英國血統。他的父親老巴拉克·奧巴馬,在1936年6月18" \
           "日,生於東非肯尼亞西部維多利亞湖邊夏亞郡科蓋若村,盧歐族人,肯亞政治家、多國政府顧問,也是學者 "

    # set up the client
    print('---')
    print('starting up Java Stanford CoreNLP Server...')
    pipeline = local_pipeline.LocalPipeline()
    md = StanfordNLPMentionDetector(pipeline=pipeline, lang="zh")
    md.get_mentions_from_text(text=text)
    ccgdoc_dict = md.get_mentions_from_text(text)
    ccgdoc = TextAnnotation(json.dumps(ccgdoc_dict))
    ner_cons_list = ccgdoc.get_view(md.get_provided_view()).cons_list
    print(len(ner_cons_list))
    print([ner_cons for ner_cons in ner_cons_list])

    text = "青年時期,奧巴馬因為自己的多種族背景,很難取得社會認同,十分自卑。十幾歲的他成了癮君子,他和任何絕望的黑人青年一樣,不知道生命的意義何在。家境貧窮,膚色經常遭人嘲笑,前途無望,成功的道路曲折得連路都找不著。他過了一段荒唐的日子,做了很多愚蠢的事,比如翹課、吸毒、泡妞等,成了不折不扣的“迷途叛逆少年”,曾以吸食大麻和可卡因來“將‘我是誰’的問題擠出腦袋”[7]。有媒體撰文認為,給青年的他帶來深刻影響的不是他的父母親,而是他的外祖父斯坦利·埃默·鄧漢姆和外祖母斯坦利·安·鄧漢姆[8];媒體同時還披露著名黑人詩人、記者和美國共產黨、左翼活動家法蘭克·米歇爾·大衛斯也是深刻影響青年奧巴馬的人物,1960年代大衛斯就成為奧巴馬家裡的常客 "
    # print(text)
    ccgdoc_dict = md.get_mentions_from_text(text)
    ccgdoc = TextAnnotation(json.dumps(ccgdoc_dict))
    ner_cons_list = ccgdoc.get_view(md.get_provided_view()).cons_list
    print(len(ner_cons_list))
    print([ner_cons for ner_cons in ner_cons_list])
 def setUp(self):
     with open('tests/sample_text_annotation.json', 'r') as myfile:
         data = myfile.read()
     self.ta = TextAnnotation(json_str=data)