Example #1
0
def testRef():
    e15 = ser.loadDict('dicts/15_wiki_full.pkl')
    p15 = ser.loadDict('dicts/people_ed_15.pkl')
    s = set()
    for p in p15:
        s.add(p['id'])

    ed15 = filter(lambda x: x['id'] in s, e15)
    ser.ed15 = ed15
    getActualReferences(ed15, 'mention_references_15.html')
Example #2
0
                    break
    return res
        

def loadOccupations():
    files = ['../data/occp/occupations', '../data/occp/nobility']
    res = set()
    for name in files:
        f = open(name)
        for line in f:
            res.add(line[:-1].lower())
        f.close()
    return res

if __name__=='__main__':
    dfd = ser.loadDict('dicts/15_dfd.pkl')
    wts = ser.getWikiTitles()    
    
    #arts = ser.loadDict('dicts/newcontext_people_9_v2.pkl')
    arts = ser.loadDict('dicts/newcontext_people_15_v1.pkl')
    c = 0
    for a in arts:
        if 'candids' not in a:
            break
        c+=1
    c = 37000
    print 'starting at', c
    
    matchPickledArticles(arts, wts, dfd, N15, 'dicts/newcontext3_people_15.txt', start=c)
    print 'shelve hit', wiki.SHM.hit
    print 'shelve miss', wiki.SHM.miss
Example #3
0
 def load_existing_names(self, name):
     return ser.loadDict(name)
Example #4
0
def get_wt():
    dfd = ser.loadDict('_data/15_dfd.pkl')
    occp = loadOccupations()
    fn = lambda x, y: a_m.getTFScore(x,y,dfd,81697, a_m.getWTFIDFd, PARAMS, occp=occp)
    return fn
Example #5
0
 def __init__(self, exist_brit_path, edition, wikidealer, pickle_path):
     Dealer.__init__(self,exist_brit_path, edition)
     self.wikidealer = wikidealer
     self.arts = ser.loadDict(pickle_path)