def testRef(): e15 = ser.loadDict('dicts/15_wiki_full.pkl') p15 = ser.loadDict('dicts/people_ed_15.pkl') s = set() for p in p15: s.add(p['id']) ed15 = filter(lambda x: x['id'] in s, e15) ser.ed15 = ed15 getActualReferences(ed15, 'mention_references_15.html')
break return res def loadOccupations(): files = ['../data/occp/occupations', '../data/occp/nobility'] res = set() for name in files: f = open(name) for line in f: res.add(line[:-1].lower()) f.close() return res if __name__=='__main__': dfd = ser.loadDict('dicts/15_dfd.pkl') wts = ser.getWikiTitles() #arts = ser.loadDict('dicts/newcontext_people_9_v2.pkl') arts = ser.loadDict('dicts/newcontext_people_15_v1.pkl') c = 0 for a in arts: if 'candids' not in a: break c+=1 c = 37000 print 'starting at', c matchPickledArticles(arts, wts, dfd, N15, 'dicts/newcontext3_people_15.txt', start=c) print 'shelve hit', wiki.SHM.hit print 'shelve miss', wiki.SHM.miss
def load_existing_names(self, name): return ser.loadDict(name)
def get_wt(): dfd = ser.loadDict('_data/15_dfd.pkl') occp = loadOccupations() fn = lambda x, y: a_m.getTFScore(x,y,dfd,81697, a_m.getWTFIDFd, PARAMS, occp=occp) return fn
def __init__(self, exist_brit_path, edition, wikidealer, pickle_path): Dealer.__init__(self,exist_brit_path, edition) self.wikidealer = wikidealer self.arts = ser.loadDict(pickle_path)