Beispiel #1
0
 def __init__(self, host=None, ontonotes=True):
     super().__init__()
     self.host = host
     self.connl = not ontonotes
     self.ontonotes = ontonotes
     self.pipeline = remote_pipeline.RemotePipeline(server_api=host)
     self.add_detector(self.annotate)
Beispiel #2
0
def get_ta_dir(directory):
    """
        Returns a list of TextAnnotation objects which are instatiated
        using the serialized json data in the directory parameter.

        @param directory path to directory with serialized TAs
        @return tas a list of TextAnnotations
    """
    #pipeline = local_pipeline.LocalPipeline()
    pipeline = remote_pipeline.RemotePipeline()
    serialized_tas = [
        join(directory + "/", f) for f in listdir(directory)
        if isfile(join(directory + "/", f))
    ]
    tas = []

    for ser_ta in serialized_tas:
        with open(ser_ta, mode='r', encoding='utf-8') as f:
            try:
                tas.append(
                    core.text_annotation.TextAnnotation(f.read(), pipeline))
            except:
                logging.info(ser_ta + " could not be parsed.")
                continue
    return tas
Beispiel #3
0
def get_ta(path_to_file):
    """
        Returns a TextAnnotation object which has been deserialized from
        a json-serialized TextAnnotation at the given path.

        @param path_to_file json serialized TextAnnotation
        @return ta deserialized TA
    """
    #pipeline = local_pipeline.LocalPipeline()
    pipeline = remote_pipeline.RemotePipeline()
    ta = None
    with open(path_to_file, "r", encoding='utf-8') as f:
        ta = core.text_annotation.TextAnnotation(f.read(), pipeline)
    return ta
    def test_user_config(self):
        test_config_folder = os.path.dirname(os.path.realpath(__file__))

        with codecs.open(test_config_folder + '/config.cfg',
                         mode='w',
                         encoding='utf-8') as f:
            f.write('''
[remote_pipeline_setting]
api = someaddress
''')

        rp = remote_pipeline.RemotePipeline(file_name=test_config_folder +
                                            '/config.cfg')
        self.assertEqual("someaddress", rp.url)
Beispiel #5
0
    def __init__(self,
                 config,
                 vocabloader,
                 test_mens_file,
                 num_cands,
                 batch_size,
                 strict_context=True,
                 pretrain_wordembed=True,
                 coherence=True):
        self.pipeline = remote_pipeline.RemotePipeline(
            server_api='http://austen.cs.illinois.edu:5800')
        self.typeOfReader = "inference"
        self.start_word = start_word
        self.end_word = end_word
        self.unk_word = 'unk'  # In tune with word2vec
        self.unk_wid = "<unk_wid>"
        self.tr_sup = 'tr_sup'
        self.tr_unsup = 'tr_unsup'
        self.pretrain_wordembed = pretrain_wordembed
        self.coherence = coherence

        # Word Vocab
        (self.word2idx, self.idx2word) = vocabloader.getGloveWordVocab()
        self.num_words = len(self.idx2word)

        # Label Vocab
        (self.label2idx, self.idx2label) = vocabloader.getLabelVocab()
        self.num_labels = len(self.idx2label)

        # Known WID Vocab
        (self.knwid2idx, self.idx2knwid) = vocabloader.getKnwnWidVocab()
        self.num_knwn_entities = len(self.idx2knwid)

        # Wid2Wikititle Map
        self.wid2WikiTitle = vocabloader.getWID2Wikititle()

        # Coherence String Vocab
        print("Loading Coherence Strings Dicts ... ")
        (self.cohG92idx,
         self.idx2cohG9) = utils.load(config.cohstringG9_vocab_pkl)
        self.num_cohstr = len(self.idx2cohG9)

        # Crosswikis
        print("Loading Crosswikis dict. (takes ~2 mins to load)")
        self.crosswikis = utils.load(config.crosswikis_pruned_pkl)
        print("Crosswikis loaded. Size: {}".format(len(self.crosswikis)))

        if self.pretrain_wordembed:
            stime = time.time()
            self.word2vec = vocabloader.loadGloveVectors()
            print("[#] Glove Vectors loaded!")
            ttime = (time.time() - stime) / float(60)
            print("[#] Time to load vectors : {} mins".format(ttime))

        print("[#] Test Mentions File : {}".format(test_mens_file))

        print("[#] Loading test file and preprocessing ... ")
        self.processTestDoc(test_mens_file)
        self.mention_lines = self.convertSent2NerToMentionLines()
        self.mentions = []
        for line in self.mention_lines:
            m = Mention(line)
            self.mentions.append(m)

        self.men_idx = 0
        self.num_mens = len(self.mentions)
        self.epochs = 0
        print("[#] Test Mentions : {}".format(self.num_mens))

        self.batch_size = batch_size
        print("[#] Batch Size: %d" % self.batch_size)
        self.num_cands = num_cands
        self.strict_context = strict_context

        print("\n[#]LOADING COMPLETE")
Beispiel #6
0
import requests
import json

from django.http import HttpResponseRedirect, JsonResponse

from ccg_nlpy import remote_pipeline

def availableViews(request):
    return JsonResponse({"images": []})

pipeline = remote_pipeline.RemotePipeline()

def annotate(request):
    if request.method == "POST":
        text = request.POST['text']
        views=request.POST['views']
    else:
        text = request.GET['text']
        views = request.GET['views']

    doc = pipeline.doc(text)

    if 'NITISH_VIEW' in views:
        pass # if the view is requested add it to the document.  


    return JsonResponse(doc.as_json)
 def setUp(self):
     self.rp = remote_pipeline.RemotePipeline()
def read_inputs():
    pipeline = remote_pipeline.RemotePipeline(
        server_api="http://macniece.seas.upenn.edu:4001")

    output_json = []
    import json
    file = "/home/danielk/perspectives/data/perspectives.json"

    with open(file, encoding='utf-8') as data_file:
        predictor = load_model()
        data = json.loads(data_file.read())

    # pre-process the lemmas
    for i, item1 in enumerate(data):
        try:
            doc = pipeline.doc(item1["title"])
            lemmas = [x["label"].lower() for x in doc.get_lemma]
            data[i]["lemmas"] = lemmas
        except ValueError:
            data[i]["lemmas"] = []
            print("Something happened . . . ")

    print("Done with pre-processing the lemmas . . . ")

    def save():
        with open("/home/danielk/perspectives/data/perspectives_pairs.json",
                  'a') as fp:
            json.dump(output_json, fp)

    for i, item1 in enumerate(data):
        print(f" - Processes {i} out of {len(data)}")
        # filter the perspectives
        filtered_list = []
        split1 = item1["lemmas"]
        for item2 in data:
            split2 = item2["lemmas"]
            shared = intersection(split1, split2)
            if (len(shared) > 1):
                filtered_list.append(item2)

        print(f"Size of selected list: {len(filtered_list)}")
        if i % 100 == 1:
            save()
        for item2 in filtered_list:
            # print("item2: " + str(item2["title"]))
            inputs = {
                "premise": str(item1["title"]),
                "hypothesis": str(item2["title"])
            }
            try:
                json1 = predictor.predict_json(inputs)
                label_logits = json1["label_logits"]
                exps = [math.exp(x) for x in label_logits]
                sumexps = sum(exps)
                prob = [e / sumexps for e in exps]
                output_json.append({
                    "id1": item1["id"],
                    "id2": item2["id"],
                    "prob": prob
                })
            except ValueError:
                print("Something happened . . . ")
    save()
Beispiel #9
0
 def get_pipeline_instance(self):
     return remote_pipeline.RemotePipeline()
                        'Wikipedia')
    parser.add_argument('--date',
                        type=str,
                        default="20170520",
                        help='wikidump date')
    parser.add_argument('--numcands',
                        type=int,
                        default=10,
                        help='max # of cands')
    parser.add_argument(
        '--nofallback',
        action="store_true",
        help='whether to fallback to word level cand gen or not')
    parser.add_argument('--interactive',
                        action="store_true",
                        help='interactive candgen mode for debug')
    args = parser.parse_args()
    args = vars(args)
    wiki_cg = CandidateGenerator(kbfile=args["kbfile"],
                                 K=args["numcands"],
                                 lang=args["lang"],
                                 fallback=False,
                                 debug=True)
    wiki_cg.load_probs("data/{}wiki/probmap/{}wiki-{}".format(
        args["lang"], args["lang"], args["date"]))
    # pipeline = local_pipeline.LocalPipeline()
    pipeline = remote_pipeline.RemotePipeline(
        server_api='http://macniece.seas.upenn.edu:4001')
    md = PoormanMentionDetector(cg=wiki_cg, pipeline=pipeline)
    md.get_mentions_from_file("testdoc.txt")
 def checkPosWithCogComp(self, word):
     pipeline = remote_pipeline.RemotePipeline()
     doc = pipeline.doc(word)
     pos_with_cc = list(doc.get_pos)
     return pos_with_cc[0]['label']