Example #1
0
 def loadCrosswikis(self):
     if self.crosswikis == None:
         if not os.path.exists(self.config.crosswikis_pkl):
             print("Crosswikis pkl missing")
             sys.exit()
         self.crosswikis = utils.load(self.config.crosswikis_pkl)
     return self.crosswikis
Example #2
0
 def getCrosswikisSlice(self):
     if self.cwikis_slice == None:
         if not os.path.exists(self.config.crosswikis_slice):
             print("CWikis Slice Dict missing")
             sys.exit()
         print("Loading CWIKI Slice")
         self.cwikis_slice = utils.load(self.config.crosswikis_slice)
     return self.cwikis_slice
Example #3
0
 def loadGloveVectors(self):
     if self.glove2vec == None:
         if not os.path.exists(self.config.glove_pkl):
             print("Glove_Vectors_PKL doesnot exist")
             sys.exit()
         print("Loading Glove Word Vectors")
         self.glove2vec = utils.load(self.config.glove_pkl)
     return self.glove2vec
Example #4
0
 def getWID2Wikititle(self):
     if self.wid2Wikititle == None:
         if not os.path.exists(self.config.widWiktitle_pkl):
             print("wid2Wikititle pkl missing")
             sys.exit()
         print("Loading wid2Wikititle")
         self.wid2Wikititle = utils.load(self.config.widWiktitle_pkl)
     return self.wid2Wikititle
Example #5
0
 def getTestAllEnCwiki(self):
     if self.test_allen_cwikis == None:
         if not os.path.exists(self.config.test_allen_cwikis_pkl):
             print("Test All Entity CWikis Dict missing")
             sys.exit()
         print("Loading Test Data All Entity CWIKI")
         self.test_allen_cwikis = utils.load(
             self.config.test_allen_cwikis_pkl)
     return self.test_allen_cwikis
Example #6
0
 def getKnwnWidVocab(self):
     if self.knwid2idx == None or self.idx2knwid == None:
         if not os.path.exists(self.config.kwnwid_vocab_pkl):
             print("Known Entities Vocab PKL missing")
             sys.exit()
         print("Loading Known Entity Vocabulary ... ")
         (self.knwid2idx,
          self.idx2knwid) = utils.load(self.config.kwnwid_vocab_pkl)
     return (self.knwid2idx, self.idx2knwid)
Example #7
0
 def getLabelVocab(self):
     if self.label2idx == None or self.idx2label == None:
         if not os.path.exists(self.config.label_vocab_pkl):
             print("Label Vocab PKL missing")
             sys.exit()
         print("Loading Type Label Vocabulary")
         (self.label2idx,
          self.idx2label) = utils.load(self.config.label_vocab_pkl)
     return (self.label2idx, self.idx2label)
Example #8
0
 def getWordVocab(self):
     if self.word2idx == None or self.idx2word == None:
         if not os.path.exists(self.config.word_vocab_pkl):
             print("Word Vocab PKL missing")
             sys.exit()
         print("Loading Word Vocabulary")
         (self.word2idx,
          self.idx2word) = utils.load(self.config.word_vocab_pkl)
     return (self.word2idx, self.idx2word)
Example #9
0
 def getWID2TypeLabels(self):
     if self.wid2TypeLabels == None:
         if not os.path.exists(self.config.wid2typelabels_vocab_pkl):
             print("wid2TypeLabels pkl missing")
             sys.exit()
         print("Loading wid2TypeLabels")
         self.wid2TypeLabels = utils.load(
             self.config.wid2typelabels_vocab_pkl)
     return self.wid2TypeLabels
Example #10
0
    def __init__(self,
                 config,
                 vocabloader,
                 num_cands,
                 batch_size,
                 strict_context=True,
                 pretrain_wordembed=True,
                 coherence=True):

        self.typeOfReader = "inference"
        self.start_word = start_word
        self.end_word = end_word
        self.unk_word = 'unk'  # In tune with word2vec
        self.unk_wid = "<unk_wid>"
        self.tr_sup = 'tr_sup'
        self.tr_unsup = 'tr_unsup'
        self.pretrain_wordembed = pretrain_wordembed
        self.coherence = coherence
        self.disambiguations = set()

        # Word Vocab
        (self.word2idx, self.idx2word) = vocabloader.getGloveWordVocab()
        self.num_words = len(self.idx2word)

        # Label Vocab
        (self.label2idx, self.idx2label) = vocabloader.getLabelVocab()
        self.num_labels = len(self.idx2label)

        # Known WID Vocab
        (self.knwid2idx, self.idx2knwid) = vocabloader.getKnwnWidVocab()
        self.num_knwn_entities = len(self.idx2knwid)

        # Wid2Wikititle Map
        self.wid2WikiTitle = vocabloader.getWID2Wikititle()
        self.wikiTitle2Wid = {}
        for wid in self.wid2WikiTitle:
            self.wikiTitle2Wid[self.wid2WikiTitle[wid]] = wid

        # Coherence String Vocab
        print("Loading Coherence Strings Dicts ... ")

        (self.cohG92idx,
         self.idx2cohG9) = utils.load(config.cohstringG9_vocab_pkl)
        self.num_cohstr = len(self.idx2cohG9)

        # Crosswikis
        print("Loading Crosswikis dict. (takes ~2 mins to load)")
        self.crosswikis = utils.load(config.crosswikis_pruned_pkl)
        print("Crosswikis loaded. Size: {}".format(len(self.crosswikis)))

        if self.pretrain_wordembed:
            stime = time.time()
            self.word2vec = vocabloader.loadGloveVectors()
            print("[#] Glove Vectors loaded!")
            ttime = (time.time() - stime) / float(60)

        self.batch_size = batch_size
        print("[#] Batch Size: %d" % self.batch_size)
        self.num_cands = num_cands
        self.strict_context = strict_context

        print("\n[#]LOADING COMPLETE")