def song_lysic_to_mysql(self): i = 0 for line in open('../api/data/song_mess/songs_lysics_all.txt', 'r', encoding='utf-8'): _list = line.strip().split('\t') songid = '' try: if _list.__len__() > 1: songid = _list[0] lysic = _list[1] if lysic == 'null': lysic = '暂无歌词提供!' SongLysic(song_id=songid, song_lysic=lysic).save() else: songid = _list[0] lysic = '暂无提供歌词!' SongLysic(song_id=songid, song_lysic=lysic).save() i += 1 print(i) except Exception as e: print(e) print(songid) odf.write_to_file(self.error_lysic_file, line.replace('\n', '')) print('歌词信息写入数据库完成!')
def playlist_tag_mess_to_mysql(self): i = 0 for line in open('../api/data/playlist_mess/pl_mess_all.txt', 'r', encoding='utf-8'): try: _list = line.split(' |=| ') if _list.__len__() > 10: pl_id = _list[0] tags = _list[10].replace('[', '').replace(']', '') if tags.split(',').__len__() > 1: for tag in tags.split(','): PlayListToTag(pl_id=pl_id, tag=tag.replace("\'", "").replace( " ", "")).save() else: PlayListToTag(pl_id=pl_id, tag=tags.replace("\'", "").replace(" ", "")).save() else: odf.write_to_file(self.error_playlist_tag_file, line.replace('\n', '')) except Exception as e: print(e) odf.write_to_file(self.error_playlist_tag_file, line.replace('\n', '')) i += 1 print(i) print('Over!')
def sing_mess_to_mysql(self): i = 0 have_write_sing = list() for line in open('../api/data/sing_mess/sings_mess_all.txt', 'r', encoding='utf-8').readlines(): _list = line.strip().split(',') if _list[0] in have_write_sing: continue if _list.__len__() == 6: sing_id, sing_name, sing_music_num, sing_mv_num, sing_album_num, sing_url = line.strip( ).split(',') s = Sing(sing_id=sing_id, sing_name=sing_name, sing_music_num=sing_music_num, sing_mv_num=sing_mv_num, sing_album_num=sing_album_num, sing_url=sing_url) try: s.save() except Exception as e: print(e) print(sing_id) odf.write_to_file(self.error_sing_file, line.replace('\n', '')) pass have_write_sing.append(sing_id) else: print(_list) odf.write_to_file(self.error_sing_file, line.replace('\n', '')) # print(line) i += 1 print(i) print('Over!')
def user_sing_prefer_to_mysql(self): i = 0 user_singer_prefer = [] for line in open('../rec/data/user_singer_prefer.txt', 'r', encoding='utf-8'): i += 1 try: user_id = line[0:line.find(',')] singer = line[line.find(',') + 1:line.rfind(',')] sim = line[line.rfind(',') + 1:] user_sing_rec = UserSingRec(user=user_id, related=singer, sim=float(sim)) user_singer_prefer.append(user_sing_rec) if i % 2000 == 0: # 每2000条数据提交一次数据库 UserSingRec.objects.bulk_create(user_singer_prefer) print('%s-当前数据:%s' % (i, line.replace('\n', ''))) user_singer_prefer = [] except Exception as e: print(e) odf.write_to_file('error.txt', line.replace('\n', '')) # 如果数据量不能整除2000,则需要把剩下的写入数据库 UserSingRec.objects.bulk_create(user_singer_prefer) print("写入usersingrec表完毕 !")
def user_user_prefer_to_mysql(self): i = 0 user_user_prefer = [] for line in open('../rec/data/user_user_prefer.txt', 'r', encoding='utf-8'): i += 1 try: user_id1 = line[0:line.find(',')] user_id2 = line[line.find(',') + 1:line.rfind(',')] sim = line[line.rfind(',') + 1:] user_user_rec = UserUserRec(user=user_id1, related=user_id2, sim=float(sim)) user_user_prefer.append(user_user_rec) if i % 2000 == 0: UserUserRec.objects.bulk_create(user_user_prefer) print('%s-当前数据:%s' % (i, line.replace('\n', ''))) user_user_prefer = [] except Exception as e: print(e) odf.write_to_file('error.txt', line.replace('\n', '')) UserUserRec.objects.bulk_create(user_user_prefer) print("写入useruserrec表完毕 !")
def user_tag_mess_to_mysql(self): i = 0 for one in PlayList.objects.all(): try: print(one) for tag in one.pl_tags.split(','): UserTag(user_id=one.pl_creator.u_id, tag=tag.replace(' ', '')).save() i += 1 print(i) except Exception as e: print(e) odf.write_to_file(self.error_user_tag_file, one) print('Over!')
def filter_id(self): ids_list_not = list() for line in open(self.songs_mess_all, 'r', encoding='utf-8').readlines(): song_id = line.strip().split(' |+| ')[0] ids_list_not.append(song_id) i = 0 for one in self.song_ids: if one not in ids_list_not: print(i) i += 1 odf.write_to_file( self.pl_sing_id_all_file + 'not_get_ids_%s.txt' % str(int(i / 50000)), one)
def playlist_sing_mess_to_mysql(self): i = 0 for line in open('../api/data/playlist_mess/pl_song_id.txt', 'r', encoding='utf-8'): pid, sids = line.strip().split('\t') for sid in str(sids).split(','): try: pls = PlayListToSongs(pl_id=pid, song_id=sid) pls.save() except Exception as e: print(e, pid, sid) odf.write_to_file(self.error_playlist_sing_file, pid + ',' + sid) i += 1 print(i) print('歌单和歌曲ID对应信息写入完毕!')
def get_sing_mess(self): print('开始获取每个歌手的信息。。。') i = 0 for sing_id in self.sing_ids: try: i += 1 print('%s-歌手ID:%s' % (i, sing_id)) res_json = self.get_json(sing_id) artist_list = [ str(res_json['artist']['id']), str(res_json['artist']['name']), str(res_json['artist']['musicSize']), str(res_json['artist']['mvSize']), str(res_json['artist']['albumSize']), str(res_json['artist']['picUrl']) ] odf.write_to_file(self.sings_mess_file, ','.join(artist_list)) except Exception as e: print(e) print('将获取歌手信息错误的id写入文件:%s' % self.error_sing_file) odf.write_to_file(self.error_sing_file, '\n'.join(self.error_sing_list)) print('歌手信息获取完成。。。')
def user_sim_to_mysql(self): i = 0 user_sim_list = [] for line in open('../tools/data/user_sim.txt', 'r', encoding='utf-8'): i += 1 try: user_id = line[0:line.find(',')] sim_user_id = line[line.find(',') + 1:line.rfind(',')] sim = line[line.rfind(',') + 1:] user_sim = UserSim(user_id=user_id, sim_user_id=sim_user_id, sim=float(sim)) user_sim_list.append(user_sim) if i % 2000 == 0: UserSim.objects.bulk_create(user_sim_list) print('%s-当前数据:%s' % (i, line.replace('\n', ''))) user_sim_list = [] except Exception as e: print(e) odf.write_to_file('error.txt', line.replace('\n', '')) UserSim.objects.bulk_create(user_sim_list) print("写入usersim表完毕 !")
def song_sim_to_mysql(self): i = 0 song_sim_list = [] for line in open('../tools/data/song_sim.txt', 'r', encoding='utf-8'): i += 1 try: song_id = line[0:line.find(',')] sim_song_id = line[line.find(',') + 1:line.rfind(',')] sim = line[line.rfind(',') + 1:] song_sim = SongSim(song_id=song_id, sim_song_id=sim_song_id, sim=float(sim)) song_sim_list.append(song_sim) if i % 2000 == 0: SongSim.objects.bulk_create(song_sim_list) print('%s-当前数据:%s' % (i, line.replace('\n', ''))) song_sim_list = [] except Exception as e: print(e) odf.write_to_file('error.txt', line.replace('\n', '')) SongSim.objects.bulk_create(song_sim_list) print("写入songsim表完毕 !")
def get_every_playlist_mess(self): print('获取每个歌单的具体信息。。。') i = 0 while self.ids_list.__len__() != 0: i += 1 pl_id = self.ids_list.pop() # if pl_id != '2068079160': # continue url = self.url + str(pl_id) try: print("%s - 歌单ID为:%s" % (i, pl_id)) r = requests.get(url) # 解析信息 self.get_format_playlist_mess(r.json()) except Exception as e: # 将出错id写入记录一下,然后写入文件,出错时进行跳过 print(e) traceback.print_exc() print("歌单ID为:%s 获取出错,进行记录" % pl_id) self.error_id.append(pl_id) pass # break odf.write_to_file(self.error_id_file, ",".join(self.error_id)) print("歌单信息获取完毕,写入文件: %s" % self.playlist_file)
def get_song_mess(self): print('开始获取歌曲信息。。。') i = 0 for song_id in self.song_ids: try: print('%s-歌曲ID:%s' % (i, song_id)) # detail => id,name,专辑id[al],出版时间[publishTime],歌手信息[ar] url_1 = self.detail_url + str(song_id) res_1_json = requests.get(url_1).json()['songs'][0] url_1_list = [ str(res_1_json['id']), str(res_1_json['name']), str(res_1_json['al']['id']), str(res_1_json['publishTime']), '#'.join([str(one['id']) for one in res_1_json['ar']]) ] # comments => 总的评论数,热门评论数 url_2 = self.comments_url + str(song_id) res_2_json = requests.get(url_2).json() try: url_2_list = [ str(res_2_json['total']), str(len(res_2_json['hotComments'])) ] except: url_2_list = ['0', '0'] # lysic => 歌词 # song => 大小,歌曲链接 url_3 = self.song_url + str(song_id) res_3_json = requests.get(url_3).json()['data'][0] url_3_list = [str(res_3_json['size']), str(res_3_json['url'])] try: url_4 = self.lyric_url + str(song_id) lysic = requests.get(url_4).json()['lrc']['lyric'] lysic = lysic.replace('\n', '\\n') except: lysic = 'null' odf.write_to_file( self.songs_mess_all, ' |+| '.join(url_1_list + url_2_list + url_3_list)) odf.write_to_file(self.songs_lysics_all, str(res_1_json['id']) + '\t' + lysic) i += 1 except Exception as e: print('error:%s' % e) self.error_ids.append(song_id) pass # 如果有获取错误的歌曲将id写入文件 if self.error_ids.__len__() != 0: print('将获取错误的歌曲id写入文件:%s' % self.error_ids_file) odf.write_to_file(self.error_ids_file, ','.join(self.error_ids)) print('歌曲信息获取完成。。。')
def playlist_mess_to_mysql(self): i = 0 for line in open('../api/data/playlist_mess/pl_mess_all.txt', 'r', encoding='utf-8').readlines(): if line.split(' |=| ').__len__() < 13: odf.write_to_file(self.error_playlist_file, line.replace('\n', '')) i += 1 print(i) continue [ pl_id, pl_creator, pl_name, pl_create_time, pl_update_time, pl_songs_num, pl_listen_num, pl_share_num, pl_comment_num, pl_follow_num, pl_tags, pl_img_url, pl_desc ] = line.split(" |=| ") if pl_create_time is None or pl_create_time.lower() == 'null': odf.write_to_file(self.error_playlist_file, line.replace('\n', '')) i += 1 print(i) continue try: user = User.objects.filter(u_id=pl_creator)[0] pl = PlayList(pl_id=pl_id, pl_creator=user, pl_name=pl_name, pl_create_time=ot.transform_time( abs(int(pl_create_time)) / 1000), pl_update_time=ot.transform_time( abs(int(pl_update_time)) / 1000), pl_songs_num=int(pl_songs_num), pl_listen_num=int(pl_listen_num), pl_share_num=int(pl_share_num), pl_comment_num=int(pl_comment_num), pl_follow_num=int(pl_follow_num), pl_tags=str(pl_tags).replace("[", "").replace( "]", "").replace("\'", ""), pl_img_url=pl_img_url, pl_desc=pl_desc) pl.save() except Exception as e: print(e) odf.write_to_file(self.error_playlist_file, line.replace('\n', '')) i += 1 print(i) print('Over!')
def get_format_playlist_mess(self, json_line): # 创建者信息 用户id,昵称,生日,性别,省份,城市,类型,标签,头像链接,用户状态,账号状态,djStatus,vipStatus,签名 creator = json_line['playlist']['creator'] c_list = ( str(creator['userId']), str(creator["nickname"]), str(creator["birthday"]), str(creator["gender"]), str(creator["province"]), str(creator["city"]), str(creator["userType"]), str(creator["expertTags"]), str(creator["avatarUrl"]), str(creator["authStatus"]), str(creator["accountStatus"]), str(creator["djStatus"]), str(creator["vipType"]), str(creator["signature"]).replace('\n', '无签名') ) odf.write_to_file(self.creator_mess + 'user_mess_all.txt', ' |=| '.join(c_list)) # 歌单信息 # 歌单ID,创建者ID,名字,创建时间,更新时间,包含音乐数,播放次数,分享次数,评论次数,收藏次数,标签,歌单封面,描述 playlist = json_line["playlist"] p_list = [ str(playlist["id"]), str(playlist["userId"]), str(playlist["name"]).replace("\n", ""), str(playlist["createTime"]), str(playlist["updateTime"]), str(playlist["trackCount"]), str(playlist["playCount"]), str(playlist["shareCount"]), str(playlist["commentCount"]), str(playlist["subscribedCount"]), str(playlist["tags"]), str(playlist["coverImgUrl"]), str(playlist["description"]).replace("\n", "无描述") ] odf.write_to_file(self.playlist_mess + "pl_mess_all.txt", " |=| ".join(p_list)) # 歌单包含的歌曲信息 t_list = list() trackids = json_line["playlist"]["trackIds"] for one in trackids: t_list.append(str(one["id"])) odf.write_to_file(self.trackid_mess + "pl_sing_id_1.txt", str(playlist["id"]) + "\t" + ",".join(t_list))
def song_mess_to_mysql(self): i = 0 for line in open('../api/data/song_mess/songs_mess_all.txt', 'r', encoding='utf-8'): _list = line.split(' |+| ') if _list.__len__() == 9: [ song_id, song_name, song_pl_id, song_publish_time, song_sing_id, song_total_comments, song_hot_comments, size, song_url ] = line.split(' |+| ') if song_publish_time is None or song_publish_time.lower( ) == 'null': odf.write_to_file(self.error_song_file, line.replace('\n', '')) continue s = Song(song_id=song_id, song_name=song_name, song_pl_id=song_pl_id, song_publish_time=ot.transform_time( abs(int(song_publish_time)) / 1000), song_sing_id=song_sing_id, song_total_comments=song_total_comments, song_hot_comments=song_hot_comments, song_url=song_url) try: s.save() except Exception as e: print(e) print(song_id) odf.write_to_file(self.error_lysic_file, line.replace('\n', '')) pass else: odf.write_to_file(self.error_song_file, line.replace('\n', '')) # print(line) i += 1 print(i) print('Over!')
def user_mess_to_mysql(self): i = 0 uid_list = list() for line in open('../api/data/user_mess/user_mess_all.txt', 'r', encoding='utf-8').readlines(): if line.split(' |=| ').__len__() < 14: odf.write_to_file(self.error_user_file, line.replace('\n', '')) continue [ u_id, u_name, u_birthday, u_gender, u_province, u_city, u_type, u_tags, u_img_url, u_auth_status, u_account_status, u_dj_status, u_vip_type, u_sign ] = line.split(" |=| ") if u_birthday is None or u_birthday.lower() == 'null': odf.write_to_file(self.error_user_file, line.replace('\n', '')) continue elif u_id in uid_list: continue else: uid_list.append(u_id) try: user = User( u_id=u_id, u_name=u_name, u_birthday=ot.transform_time( abs(float(int(u_birthday) / 1000))), u_gender=int(u_gender), u_province=u_province, u_city=u_city, u_type=u_type, u_tags=u_tags.replace("[", "").replace("]", ""), u_img_url=u_img_url, u_auth_status=u_auth_status, u_account_status=u_account_status, u_dj_status=u_dj_status, u_vip_type=u_vip_type, u_sign='我就是我是颜色不一样的花火!' if u_sign == "\n" else u_sign) user.save() except Exception as e: user = User(u_id=u_id, u_name=u_name, u_birthday=ot.transform_time( abs(float(int(u_birthday) / 1000))), u_gender=int(u_gender), u_province=u_province, u_city=u_city, u_type=u_type, u_tags=u_tags.replace("[", "").replace("]", ""), u_img_url=u_img_url, u_auth_status=u_auth_status, u_account_status=u_account_status, u_dj_status=u_dj_status, u_vip_type=u_vip_type, u_sign='纵有诗论满腹,却道不尽这魏巍河山!') user.save() print('Error:{},{}'.format(u_id, e)) odf.write_to_file(self.error_user_file, line.replace('\n', '')) i += 1 # print(i) print('Over!')