Ejemplo n.º 1
0
 def parse(self, fileName,entitySet,vectorMap):
     #unpickling dict for cheap link
     with open("entityDictionary.dat", "rb") as r:
         entDict=pickle.load(r)
     with open(fileName) as f:
         for line in f:
             if 'inv idx' in line:
                 break
             elif 'predicate' in line:
                 predicate=self.extractPredicate(line)
                 currentVector = []
                 vectorMap.put(predicate,currentVector)
             elif ': ' in line and 'num preds' not in line:
                 #if linking:extract single words
                 #if linking: link single words
                 #if linking: concatenate the entity links
                 
                 origEntities=self.extractEntities(line)
                 
                 l = Linker()
                 entities=l.cheapLink(origEntities, entDict)
                 count=self.extractCount(line)
                 index = entitySet.getIndex(entities)
                 pair = (index,count)
                 currentVector.append(pair)
     print("done parsing")
     return vectorMap, entitySet