def getEasyRec(): encoding='ISO-8859-1' sep=None if fp.ROOT_DIR_NAME=='ml-1m': sep='::' elif fp.ROOT_DIR_NAME=='ml-latest-small': sep=',' count=0 with open(Easy_rating_path,'w+',encoding='utf-8') as f: for uid,iid,rating,_ in tqdm(osUtils.readFile(Orginal_rating_path,sep,encoding)): if fp.ROOT_DIR_NAME=='ml-latest-small' and count==0: count+=1 continue osUtils.writeTripleLine(int(uid),int(iid),int(float(rating)),f) if fp.ROOT_DIR_NAME=='ml-100k': sep='|' moiveids=[] for line in tqdm(osUtils.readFile(Orginal_movie_path,sep,encoding)): if fp.ROOT_DIR_NAME=='ml-latest-small' and count==0: count+=1 continue moiveids.append(line[0]) osUtils.dumpJson(moiveids,Easy_movie_json)
def getAllRelations(): print('get all relations') with open(all_relations_tsv, 'w+') as f: for line in osUtils.readFile(paris_count_tsv): osUtils.writeSinalLine(line[0],f) with open(all_relations_tsv_chose, 'w+') as f: for line in osUtils.readFile(paris_count_tsv): osUtils.writeSinalLine(line[0],f)
def getKgfile(): print('scan_kg') relations_chose = {r[0] for r in osUtils.readFile(ALL_relations_chose)} freebase_movies = set(osUtils.getJson(Link_json).values()) with open(Kg_tsv, 'w+') as f: for h, r, t in tqdm(osUtils.readTriple(FB_movies)): if r not in relations_chose: continue if (h in freebase_movies) or (t in freebase_movies): osUtils.writeTripleLine(h, r, t, f)
def scanMovies(): print('scan_movie') all_links, inner_links = {}, {} for h, t in osUtils.readFile(ALL_link_file): all_links[h] = t mids = osUtils.getJson(Easy_movie_json) for mid in tqdm(mids): if mid not in all_links: continue fb_id = all_links[mid] inner_links[mid] = fb_id osUtils.dumpJson(inner_links, Link_json)
def scanEntitys(): print('write_entitys') a_names, a_types, e_names, e_types = {}, {}, {}, {} entitys = set() for h, n in osUtils.readFile(ALL_names): a_names[h] = n for h, t in osUtils.readFile(ALL_types): a_types[h] = t for h, r, t in tqdm(osUtils.readTriple(Kg_tsv)): entitys.add(h) entitys.add(t) name_file = open(E_names, 'w+', encoding='utf-8') type_file = open(E_types, 'w+', encoding='utf-8') for e in tqdm(entitys): name = a_names.get(e, None) if name: name_file.write(e + '\t' + name + '\n') type = a_types.get(e, None) if type: type_file.write(e + '\t' + type + '\n')
def __getDeleteRelations(): deleteRelations=set() for line in osUtils.readFile(FB_movie_paris_count_tsv): if int(line[2])<20000: deleteRelations.add(line[0]) return deleteRelations