Exemple #1
0
def getEasyRec():
    encoding='ISO-8859-1'
    sep=None
    if fp.ROOT_DIR_NAME=='ml-1m':
        sep='::'
    elif fp.ROOT_DIR_NAME=='ml-latest-small':
        sep=','

    count=0
    with open(Easy_rating_path,'w+',encoding='utf-8') as f:
        for uid,iid,rating,_ in tqdm(osUtils.readFile(Orginal_rating_path,sep,encoding)):
            if fp.ROOT_DIR_NAME=='ml-latest-small' and count==0:
                count+=1
                continue
            osUtils.writeTripleLine(int(uid),int(iid),int(float(rating)),f)

    if fp.ROOT_DIR_NAME=='ml-100k':
        sep='|'
    moiveids=[]
    for line in tqdm(osUtils.readFile(Orginal_movie_path,sep,encoding)):
        if fp.ROOT_DIR_NAME=='ml-latest-small' and count==0:
                count+=1
                continue
        moiveids.append(line[0])
    osUtils.dumpJson(moiveids,Easy_movie_json)
Exemple #2
0
def getAllRelations():
    print('get all relations')
    with open(all_relations_tsv, 'w+') as f:
        for line in osUtils.readFile(paris_count_tsv):
            osUtils.writeSinalLine(line[0],f)
    with open(all_relations_tsv_chose, 'w+') as f:
        for line in osUtils.readFile(paris_count_tsv):
            osUtils.writeSinalLine(line[0],f)
Exemple #3
0
def getKgfile():
    print('scan_kg')
    relations_chose = {r[0] for r in osUtils.readFile(ALL_relations_chose)}

    freebase_movies = set(osUtils.getJson(Link_json).values())
    with open(Kg_tsv, 'w+') as f:
        for h, r, t in tqdm(osUtils.readTriple(FB_movies)):
            if r not in relations_chose: continue
            if (h in freebase_movies) or (t in freebase_movies):
                osUtils.writeTripleLine(h, r, t, f)
Exemple #4
0
def scanMovies():
    print('scan_movie')
    all_links, inner_links = {}, {}
    for h, t in osUtils.readFile(ALL_link_file):
        all_links[h] = t
    mids = osUtils.getJson(Easy_movie_json)
    for mid in tqdm(mids):
        if mid not in all_links: continue
        fb_id = all_links[mid]
        inner_links[mid] = fb_id
    osUtils.dumpJson(inner_links, Link_json)
Exemple #5
0
def scanEntitys():
    print('write_entitys')
    a_names, a_types, e_names, e_types = {}, {}, {}, {}
    entitys = set()
    for h, n in osUtils.readFile(ALL_names):
        a_names[h] = n
    for h, t in osUtils.readFile(ALL_types):
        a_types[h] = t
    for h, r, t in tqdm(osUtils.readTriple(Kg_tsv)):
        entitys.add(h)
        entitys.add(t)
    name_file = open(E_names, 'w+', encoding='utf-8')
    type_file = open(E_types, 'w+', encoding='utf-8')

    for e in tqdm(entitys):
        name = a_names.get(e, None)
        if name:
            name_file.write(e + '\t' + name + '\n')
        type = a_types.get(e, None)
        if type:
            type_file.write(e + '\t' + type + '\n')
Exemple #6
0
def __getDeleteRelations():
    deleteRelations=set()
    for line in osUtils.readFile(FB_movie_paris_count_tsv):
        if int(line[2])<20000:
            deleteRelations.add(line[0])
    return deleteRelations