def loadCrosswikis(self): if self.crosswikis == None: if not os.path.exists(self.config.crosswikis_pkl): print("Crosswikis pkl missing") sys.exit() self.crosswikis = utils.load(self.config.crosswikis_pkl) return self.crosswikis
def getCrosswikisSlice(self): if self.cwikis_slice == None: if not os.path.exists(self.config.crosswikis_slice): print("CWikis Slice Dict missing") sys.exit() print("Loading CWIKI Slice") self.cwikis_slice = utils.load(self.config.crosswikis_slice) return self.cwikis_slice
def loadGloveVectors(self): if self.glove2vec == None: if not os.path.exists(self.config.glove_pkl): print("Glove_Vectors_PKL doesnot exist") sys.exit() print("Loading Glove Word Vectors") self.glove2vec = utils.load(self.config.glove_pkl) return self.glove2vec
def getWID2Wikititle(self): if self.wid2Wikititle == None: if not os.path.exists(self.config.widWiktitle_pkl): print("wid2Wikititle pkl missing") sys.exit() print("Loading wid2Wikititle") self.wid2Wikititle = utils.load(self.config.widWiktitle_pkl) return self.wid2Wikititle
def getTestAllEnCwiki(self): if self.test_allen_cwikis == None: if not os.path.exists(self.config.test_allen_cwikis_pkl): print("Test All Entity CWikis Dict missing") sys.exit() print("Loading Test Data All Entity CWIKI") self.test_allen_cwikis = utils.load( self.config.test_allen_cwikis_pkl) return self.test_allen_cwikis
def getKnwnWidVocab(self): if self.knwid2idx == None or self.idx2knwid == None: if not os.path.exists(self.config.kwnwid_vocab_pkl): print("Known Entities Vocab PKL missing") sys.exit() print("Loading Known Entity Vocabulary ... ") (self.knwid2idx, self.idx2knwid) = utils.load(self.config.kwnwid_vocab_pkl) return (self.knwid2idx, self.idx2knwid)
def getLabelVocab(self): if self.label2idx == None or self.idx2label == None: if not os.path.exists(self.config.label_vocab_pkl): print("Label Vocab PKL missing") sys.exit() print("Loading Type Label Vocabulary") (self.label2idx, self.idx2label) = utils.load(self.config.label_vocab_pkl) return (self.label2idx, self.idx2label)
def getWordVocab(self): if self.word2idx == None or self.idx2word == None: if not os.path.exists(self.config.word_vocab_pkl): print("Word Vocab PKL missing") sys.exit() print("Loading Word Vocabulary") (self.word2idx, self.idx2word) = utils.load(self.config.word_vocab_pkl) return (self.word2idx, self.idx2word)
def getWID2TypeLabels(self): if self.wid2TypeLabels == None: if not os.path.exists(self.config.wid2typelabels_vocab_pkl): print("wid2TypeLabels pkl missing") sys.exit() print("Loading wid2TypeLabels") self.wid2TypeLabels = utils.load( self.config.wid2typelabels_vocab_pkl) return self.wid2TypeLabels
def __init__(self, config, vocabloader, num_cands, batch_size, strict_context=True, pretrain_wordembed=True, coherence=True): self.typeOfReader = "inference" self.start_word = start_word self.end_word = end_word self.unk_word = 'unk' # In tune with word2vec self.unk_wid = "<unk_wid>" self.tr_sup = 'tr_sup' self.tr_unsup = 'tr_unsup' self.pretrain_wordembed = pretrain_wordembed self.coherence = coherence self.disambiguations = set() # Word Vocab (self.word2idx, self.idx2word) = vocabloader.getGloveWordVocab() self.num_words = len(self.idx2word) # Label Vocab (self.label2idx, self.idx2label) = vocabloader.getLabelVocab() self.num_labels = len(self.idx2label) # Known WID Vocab (self.knwid2idx, self.idx2knwid) = vocabloader.getKnwnWidVocab() self.num_knwn_entities = len(self.idx2knwid) # Wid2Wikititle Map self.wid2WikiTitle = vocabloader.getWID2Wikititle() self.wikiTitle2Wid = {} for wid in self.wid2WikiTitle: self.wikiTitle2Wid[self.wid2WikiTitle[wid]] = wid # Coherence String Vocab print("Loading Coherence Strings Dicts ... ") (self.cohG92idx, self.idx2cohG9) = utils.load(config.cohstringG9_vocab_pkl) self.num_cohstr = len(self.idx2cohG9) # Crosswikis print("Loading Crosswikis dict. (takes ~2 mins to load)") self.crosswikis = utils.load(config.crosswikis_pruned_pkl) print("Crosswikis loaded. Size: {}".format(len(self.crosswikis))) if self.pretrain_wordembed: stime = time.time() self.word2vec = vocabloader.loadGloveVectors() print("[#] Glove Vectors loaded!") ttime = (time.time() - stime) / float(60) self.batch_size = batch_size print("[#] Batch Size: %d" % self.batch_size) self.num_cands = num_cands self.strict_context = strict_context print("\n[#]LOADING COMPLETE")