def __init__(self, host=None, ontonotes=True): super().__init__() self.host = host self.connl = not ontonotes self.ontonotes = ontonotes self.pipeline = remote_pipeline.RemotePipeline(server_api=host) self.add_detector(self.annotate)
def get_ta_dir(directory): """ Returns a list of TextAnnotation objects which are instatiated using the serialized json data in the directory parameter. @param directory path to directory with serialized TAs @return tas a list of TextAnnotations """ #pipeline = local_pipeline.LocalPipeline() pipeline = remote_pipeline.RemotePipeline() serialized_tas = [ join(directory + "/", f) for f in listdir(directory) if isfile(join(directory + "/", f)) ] tas = [] for ser_ta in serialized_tas: with open(ser_ta, mode='r', encoding='utf-8') as f: try: tas.append( core.text_annotation.TextAnnotation(f.read(), pipeline)) except: logging.info(ser_ta + " could not be parsed.") continue return tas
def get_ta(path_to_file): """ Returns a TextAnnotation object which has been deserialized from a json-serialized TextAnnotation at the given path. @param path_to_file json serialized TextAnnotation @return ta deserialized TA """ #pipeline = local_pipeline.LocalPipeline() pipeline = remote_pipeline.RemotePipeline() ta = None with open(path_to_file, "r", encoding='utf-8') as f: ta = core.text_annotation.TextAnnotation(f.read(), pipeline) return ta
def test_user_config(self): test_config_folder = os.path.dirname(os.path.realpath(__file__)) with codecs.open(test_config_folder + '/config.cfg', mode='w', encoding='utf-8') as f: f.write(''' [remote_pipeline_setting] api = someaddress ''') rp = remote_pipeline.RemotePipeline(file_name=test_config_folder + '/config.cfg') self.assertEqual("someaddress", rp.url)
def __init__(self, config, vocabloader, test_mens_file, num_cands, batch_size, strict_context=True, pretrain_wordembed=True, coherence=True): self.pipeline = remote_pipeline.RemotePipeline( server_api='http://austen.cs.illinois.edu:5800') self.typeOfReader = "inference" self.start_word = start_word self.end_word = end_word self.unk_word = 'unk' # In tune with word2vec self.unk_wid = "<unk_wid>" self.tr_sup = 'tr_sup' self.tr_unsup = 'tr_unsup' self.pretrain_wordembed = pretrain_wordembed self.coherence = coherence # Word Vocab (self.word2idx, self.idx2word) = vocabloader.getGloveWordVocab() self.num_words = len(self.idx2word) # Label Vocab (self.label2idx, self.idx2label) = vocabloader.getLabelVocab() self.num_labels = len(self.idx2label) # Known WID Vocab (self.knwid2idx, self.idx2knwid) = vocabloader.getKnwnWidVocab() self.num_knwn_entities = len(self.idx2knwid) # Wid2Wikititle Map self.wid2WikiTitle = vocabloader.getWID2Wikititle() # Coherence String Vocab print("Loading Coherence Strings Dicts ... ") (self.cohG92idx, self.idx2cohG9) = utils.load(config.cohstringG9_vocab_pkl) self.num_cohstr = len(self.idx2cohG9) # Crosswikis print("Loading Crosswikis dict. (takes ~2 mins to load)") self.crosswikis = utils.load(config.crosswikis_pruned_pkl) print("Crosswikis loaded. Size: {}".format(len(self.crosswikis))) if self.pretrain_wordembed: stime = time.time() self.word2vec = vocabloader.loadGloveVectors() print("[#] Glove Vectors loaded!") ttime = (time.time() - stime) / float(60) print("[#] Time to load vectors : {} mins".format(ttime)) print("[#] Test Mentions File : {}".format(test_mens_file)) print("[#] Loading test file and preprocessing ... ") self.processTestDoc(test_mens_file) self.mention_lines = self.convertSent2NerToMentionLines() self.mentions = [] for line in self.mention_lines: m = Mention(line) self.mentions.append(m) self.men_idx = 0 self.num_mens = len(self.mentions) self.epochs = 0 print("[#] Test Mentions : {}".format(self.num_mens)) self.batch_size = batch_size print("[#] Batch Size: %d" % self.batch_size) self.num_cands = num_cands self.strict_context = strict_context print("\n[#]LOADING COMPLETE")
import requests import json from django.http import HttpResponseRedirect, JsonResponse from ccg_nlpy import remote_pipeline def availableViews(request): return JsonResponse({"images": []}) pipeline = remote_pipeline.RemotePipeline() def annotate(request): if request.method == "POST": text = request.POST['text'] views=request.POST['views'] else: text = request.GET['text'] views = request.GET['views'] doc = pipeline.doc(text) if 'NITISH_VIEW' in views: pass # if the view is requested add it to the document. return JsonResponse(doc.as_json)
def setUp(self): self.rp = remote_pipeline.RemotePipeline()
def read_inputs(): pipeline = remote_pipeline.RemotePipeline( server_api="http://macniece.seas.upenn.edu:4001") output_json = [] import json file = "/home/danielk/perspectives/data/perspectives.json" with open(file, encoding='utf-8') as data_file: predictor = load_model() data = json.loads(data_file.read()) # pre-process the lemmas for i, item1 in enumerate(data): try: doc = pipeline.doc(item1["title"]) lemmas = [x["label"].lower() for x in doc.get_lemma] data[i]["lemmas"] = lemmas except ValueError: data[i]["lemmas"] = [] print("Something happened . . . ") print("Done with pre-processing the lemmas . . . ") def save(): with open("/home/danielk/perspectives/data/perspectives_pairs.json", 'a') as fp: json.dump(output_json, fp) for i, item1 in enumerate(data): print(f" - Processes {i} out of {len(data)}") # filter the perspectives filtered_list = [] split1 = item1["lemmas"] for item2 in data: split2 = item2["lemmas"] shared = intersection(split1, split2) if (len(shared) > 1): filtered_list.append(item2) print(f"Size of selected list: {len(filtered_list)}") if i % 100 == 1: save() for item2 in filtered_list: # print("item2: " + str(item2["title"])) inputs = { "premise": str(item1["title"]), "hypothesis": str(item2["title"]) } try: json1 = predictor.predict_json(inputs) label_logits = json1["label_logits"] exps = [math.exp(x) for x in label_logits] sumexps = sum(exps) prob = [e / sumexps for e in exps] output_json.append({ "id1": item1["id"], "id2": item2["id"], "prob": prob }) except ValueError: print("Something happened . . . ") save()
def get_pipeline_instance(self): return remote_pipeline.RemotePipeline()
'Wikipedia') parser.add_argument('--date', type=str, default="20170520", help='wikidump date') parser.add_argument('--numcands', type=int, default=10, help='max # of cands') parser.add_argument( '--nofallback', action="store_true", help='whether to fallback to word level cand gen or not') parser.add_argument('--interactive', action="store_true", help='interactive candgen mode for debug') args = parser.parse_args() args = vars(args) wiki_cg = CandidateGenerator(kbfile=args["kbfile"], K=args["numcands"], lang=args["lang"], fallback=False, debug=True) wiki_cg.load_probs("data/{}wiki/probmap/{}wiki-{}".format( args["lang"], args["lang"], args["date"])) # pipeline = local_pipeline.LocalPipeline() pipeline = remote_pipeline.RemotePipeline( server_api='http://macniece.seas.upenn.edu:4001') md = PoormanMentionDetector(cg=wiki_cg, pipeline=pipeline) md.get_mentions_from_file("testdoc.txt")
def checkPosWithCogComp(self, word): pipeline = remote_pipeline.RemotePipeline() doc = pipeline.doc(word) pos_with_cc = list(doc.get_pos) return pos_with_cc[0]['label']