def download_one_playlist(self, playlist): pl_dir = "{}/{}".format(playlist.root_dir, playlist.id) cmd = "mkdir -p {}".format(pl_dir) log.info("cmd={}".format(cmd)) os.system(cmd) uri = "{}/playlist/detail?id={}".format(self.server, playlist.id) try: resp = requests.get(uri) response = resp.json() print response except BaseException as e: print '获取人脸信息失败!' print e return None if "playlist" in response and "tracks" in response["playlist"]: i = 0 for track in response["playlist"]["tracks"]: song_id = track["id"] song_name = track["name"] m = Music(song_id, song_name, playlist.tag, playlist) print("music name={} id={}".format(song_name, song_id)) self.thread_num += 1 self.download_one_song(m) i += 1 # break if i > self.song_limit: break
def save_db(self): print("save db") log.debug("save db id={}".format(self.id)) # dblock.acquire() aids = [str(x.id) for x in self.artists] if len(aids) > 30: log.warn("music id={} aids to long={}".format(self.id, aids)) aids = aids[:30] log.debug("save db id={}".format(self.id)) if aids: aids_str = ";".join(aids) else: aids_str = "" log.debug("save db id={}".format(self.id)) # if self.lyric.find("'") >= 0: # self.lyric.replace("'", "\'") if len(self.lyric) > 1500: log.warn("music id={} lyric too long={}".format( self.id, self.lyric)) self.lyric = self.lyric[:1500] log.debug("save db id={}".format(self.id)) sql = """replace into song(id, netid, name, artists, tag, lyric) values('{}', '{}', '{}', '{}', '{}', '{}') """.\ format(self.id, self.id, MySQLdb.escape_string(self.name), aids_str, self.tag, MySQLdb.escape_string(self.lyric[:1500])) log.info("sql={}".format(sql)) mydb.exec_write(sql) log.debug("save db id={}".format(self.id)) for a in self.artists: log.debug("a id={}".format(a.id)) self.save_artist(a) log.debug("write id={}".format(self.id)) # dblock.release() log.debug("save db id={}".format(self.id)) pass
def do(self, q): songs = [] cmd = "ag --nonumbers '{}' {}|head -n 2000".format(q, dict_file) log.info("cmd={}".format(cmd)) (status, output) = commands.getstatusoutput(cmd) # print status, output for line in output.split("\n"): log.info("line={}".format(line)) if len(line) < 10: continue song_dict = json.loads(line.strip()) song = Music().from_dict(song_dict) if len(song.lyric) < 10 or len(song.id) < 1 or len(song.name) < 1: log.debug("filter song={}".format(line)) continue #highlight song.highlight(q) songs.append(song) if len(songs) > 30: break # print("song={}".format(song.to_dict())) return songs pass
def download_top_song(): root_dir = "/home/test/data/music/artist" dl = downloadmusic.DownloadMusic() dl.root_dir = root_dir dl.download_top_list() print("download song success") log.info("download song success") pass
def download_one_song(self, music): try: self._download_one_song(music) except Exception as e: log.info("music={} error={}".format(music.to_dict(), e)) finally: lock.acquire() self.g_index += 1 lock.release()
def download_artist(): root_dir = "/home/test/data/music/artist" filename = "{}/artist.txt".format(root_dir) dl = downloadmusic.DownloadMusic() dl.limit = 10 dl.root_dir = root_dir dl.get_artist() print("download success") log.info("download success")
def _download_one_song(self, music): uri = "{}/lyric?id={}".format(self.server, music.id) try: resp = requests.get(uri, timeout=60) response = resp.json() # print response except BaseException as e: print '获取人脸信息失败!' print e return None if "lrc" in response and "lyric" in response["lrc"]: music.lyric = response["lrc"]["lyric"] # print("id={} lyric={}".format(music.id, music.lyric)) uri = "{}/song/detail?ids={}".format(self.server, music.id) try: resp = requests.get(uri, timeout=60) response = resp.json() # log.info(response) except BaseException as e: print '获取人脸信息失败!' print e return None if "songs" in response: for one_music in response["songs"]: if "ar" in one_music: for a in one_music["ar"]: a_id = a["id"] a_name = a["name"] music.artists.append(Artist(a_id, a_name)) break # print("m={}".format(music.to_dict())) playlist = music.playlist if not playlist.root_dir: playlist.root_dir = self.root_dir root_dir = self.root_dir id_dir = "" if music.artists: id_dir = music.artists[0].id music_dir = "{}/{}/{}".format(root_dir, id_dir, music.id) cmd = "mkdir -p {}".format(music_dir) log.info("cmd={}".format(cmd)) os.system(cmd) obj_dict = music.to_dict() obj_json = json.dumps(obj_dict, ensure_ascii=False) # print("id={} json={}".format(music.id, obj_json)) f = open('{}/{}.json'.format(music_dir, music.id), 'w') f.write(obj_json) f.close() music.save_db() print("save {} ok!".format(music.id))
def reconnectDB(self): re = False try: self.initDb() log.info('重连mysql数据库成功') re = True except: import traceback traceback.print_exc() log.error('重连mysql数据库失败') finally: return re
def get_top_artist(self): cmd = "mkdir -p {}".format(self.root_dir) log.info("cmd={}".format(cmd)) os.system(cmd) url = "{}/top/artists".format(self.server) a_num = 0 try: resp = requests.get(url, timeout=120) response = resp.json() # response except BaseException as e: print '{}获取artist fail!'.format(url) log.fatal("err={} url={}".format(e, url)) print e print("uri={}".format(url)) artist_list = [] if "artists" in response: for one_artist in response["artists"]: a_id = str(one_artist["id"]).strip() a_name = str(one_artist["name"]).strip() ar = Artist(a_id, a_name) artist_list.append(ar) self.download_one_artist(ar) a_num += 1 time.sleep(1) print("down ok a_num={}".format(a_num)) log.info("down ok a_num={}".format(a_num)) if len(artist_list) < 1: log.fatal("too few top artists len={}".format(len(artist_list))) return log.info("write artist into top list") ar_str_list = [str(a.id) + "$$" + str(a.name) for a in artist_list] ar_str_list_str = ";".join(ar_str_list) dbops.write_top_artist(ar_str_list_str) log.info('write artist into top list success')
def exec_write(self, sql): """exec dml,ddl""" log.info("mysql write") self.lock.acquire() try: self.cursor.execute(sql) self.conn.commit() except Exception as e: errormsg = 'write db ERROR(%s):%s' % (e.args[0], e.args[1]) print(errormsg) log.fatal("err={} sql={}".format(errormsg, sql)) self.conn.rollback() finally: self.lock.release()
def exec_write(self, sql): """exec dml,ddl""" log.info("mysql write") conn = self.pool.connection() cursor = conn.cursor() try: cursor.execute(sql) conn.commit() except Exception as e: errormsg = 'write db ERROR(%s):%s' % (e.args[0], e.args[1]) log.fatal("err={} sql={}".format(errormsg, sql)) conn.rollback() finally: cursor.close() conn.close()
def _download_one_song_mp3(self, music): uri = "{}/music/url?id={}".format(self.server, music.id) try: resp = requests.get(uri, timeout=300) response = resp.json() # print response except BaseException as e: print '获取人脸信息失败!' print e return None if "data" in response: for one_music in response["data"]: url = one_music["url"] cmd = "wget -c {} -O {}/{}.mp3 -o {}/{}.wget.log".format( url, self.data_dir, music.id, self.data_dir, music.id) log.info("cmd={}".format(cmd)) os.system(cmd) break
def get_douyin(self): url = "{}/search?keywords=抖音热歌&limit=100" uri = url.format(self.server) print("uri={}".format(uri)) f = open('{}/douyin.txt'.format(self.root_dir), 'w') try: resp = requests.get(uri, timeout=120) response = resp.json() print response except BaseException as e: print '{}获取artist fail!'.format(uri) print e return if "result" in response and "songs" in response["result"]: for song in response["result"]["songs"]: id = song["id"] name = song["name"] f.write("{}\t{}\n".format(id, name)) if "{}.mp3".format(id) in self.id_map: log.info("id={} name={} exist".format(id, name)) else: json_file = '{}/{}.json'.format(self.root_dir, id) if os.path.exists(json_file): log.info("id={} name={} exist".format(id, name)) else: log.info("id={} name={} not_exist".format(id, name)) f.close()
def get_artist(self): cmd = "mkdir -p {}".format(self.root_dir) log.info("cmd={}".format(cmd)) os.system(cmd) type_ids = ["1", "2", "4", "5", "6", "7"] prefix_artist = [x for x in range(65, 91)] prefix_artist.append(0) print("prefix={}".format(prefix_artist)) # url = "https://music.163.com/#/discover/artist/cat?id={}&initial={}" url = "{}/artist/list?cat={}&limit=100" for type_id in type_ids[3:]: id_list = [type_id + x for x in ["001", "002", "003"]] for t_id in id_list: a_num = 0 print("type id={}".format(t_id)) uri = url.format(self.server, t_id) print("uri={}".format(uri)) try: resp = requests.get(uri, timeout=120) response = resp.json() print response except BaseException as e: print '{}获取artist fail!'.format(uri) print e continue print("uri={}".format(uri)) time_count = 0 self.g_index = 0 self.thread_num = 0 if "artists" in response: for one_artist in response["artists"]: a_id = one_artist["id"] a_name = one_artist["name"] ar = Artist(a_id, a_name) self.download_one_artist(ar) a_num += 1 time.sleep(1) # break start = time.time() # while self.g_index < self.thread_num - 1 and time_count < 3600: # log.info( # "wait for end list g_index={} thread_num={} time={}".format(self.g_index, # self.thread_num, # time_count)) # time.sleep(1) # time_count += 1 end = time.time() log.info( "wait for end list g_index={} thread_num={} time={}s count={}" .format(self.g_index, self.thread_num, end - start, time_count)) # break log.info("down success typeid={} t_id={} a num={}".format( type_id, t_id, a_num)) time.sleep(20)
def download_top_list(self): url = "{}/top/list?idx=".format(self.server) ids = [i for i in range(34)] for id in ids: if id != 1: continue try: uri = url + "{}".format(id) resp = requests.get(uri) response = resp.json() print response except Exception as e: print 'top list!' log.fatal("toplist fail err={} uri={}".format(e, uri)) print e return None song_list = [] if "playlist" in response and "tracks" in response["playlist"]: i = 0 for track in response["playlist"]["tracks"]: song_id = str(track["id"]) song_name = track["name"] m = Music(song_id, song_name) song_list.append(m) print("music name={} id={}".format(song_name, song_id)) self.thread_num += 1 self.download_one_song(m) i += 1 # break log.info("down ok a_num={}".format(len(song_list))) if len(song_list) < 5: log.fatal("too few top song len={}".format(len(song_list))) return log.info("write song into top list") song_str_list = [ str(a.id) + "$$" + str(a.name) for a in song_list[:200] ] song_str_list_str = ";".join(song_str_list) dbops.write_top_song(song_str_list_str) log.info('write song into top list success')
def get_playlist(self, cate, tag): print("tag={}".format(tag)) tag1 = self.special_char(tag) print("tag1={}".format(tag1)) tag_dir = "{}/{}/{}".format(self.root_dir, cate, tag1) if os.path.exists(tag_dir): log.info("{} exist, return".format(tag_dir)) cmd = "mkdir -p {}".format(tag_dir) log.info("cmd={}".format(cmd)) os.system(cmd) ret_list = [] # tag1=urllib.quote(tag, safe="#") tag1 = self.special_char(tag) uri = "{}/top/playlist?limit={}&order=new&cat={}".format( self.server, self.limit, tag1) try: resp = requests.get(uri) response = resp.json() print response if "playlists" in response: ret_list += response["playlists"] except BaseException as e: print '获取人脸信息失败!' print e return None uri = "{}/top/playlist?limit={}&order=hot&cat={}".format( self.server, self.limit, tag) try: resp = requests.get(uri) response = resp.json() print response if "playlists" in response: ret_list += response["playlists"] except BaseException as e: print '获取人脸信息失败!' print e return None time_count = 0 self.g_index = 0 self.thread_num = 0 for playlist in ret_list: playlist_id = playlist["id"] playlist_name = playlist["name"] pl = Playlist(playlist_id, playlist_name, tag) pl.root_dir = tag_dir print("playlist name={} id={}".format(playlist_name, playlist_id)) log.info("playlist name={} id={}".format(playlist_name, playlist_id)) time_count = 0 self.g_index = 0 self.thread_num = 0 self.download_one_playlist(pl) start = time.time() while self.g_index < self.thread_num and time_count < 3600: if self.thread_num > 30: if self.g_index >= self.thread_num - 3: break else: if self.g_index >= self.thread_num - 1: break log.info( "wait for end list id={} g_index={} thread_num={} time={}" .format(playlist_id, self.g_index, self.thread_num, time_count)) time.sleep(1) time_count += 1 end = time.time() log.info( "wait for end list id={} g_index={} thread_num={} time={}s count={}" .format(playlist_id, self.g_index, self.thread_num, end - start, time_count))
log.debug('查询数据:' + sql + '>>>' + str(data)) return data def reconnectDB(self): if self.conn: try: self.conn.close() except Exception, e: log.error('关闭数据库连接失败' + str(e)) try: self.conn = MySQLdb.connect(host=self.serverAddr, port=self.serverPort, user=self.serverUser, passwd=self.serverPwd, db=self.database) log.info('重连mysql数据库成功') except MySQLdb.MySQLError, e: log.error('重连mysql数据库失败') def __del__(self): self.conn.close() @staticmethod def getMysqlConn(conf): if MysqlConn.mysqlConner is not None: return MysqlConn.mysqlConner MysqlConn.mysqlConner = MysqlConn(conf) MysqlConn.mysqlConner.initDb() return MysqlConn.mysqlConner
def save_artist(self, artist): aid = artist.id aname = artist.name print("aid={}".format(aid)) sql = """select * from artist where netid='{}' """.format(aid) log.info("sql={}".format(sql)) log.info("song id={} in artists={}".format(self.id, aid)) results = mydb.query(sql) print("aid={} results=len={}".format(aid, len(results))) songs = "" log.info("song id={} in artists={} song={}".format( self.id, aid, songs)) sids = [] for row in results: netid = row[1] name = row[2] songs = row[3] if len(songs) > 0: sids = songs.split(";") print("songs={} sid={} id={}".format(songs, sids, self.id)) if self.id in set(sids): print("song id={} in artists={}".format(self.id, aid)) return else: sids.append(self.id) print("songs={} sid={} id={}".format(songs, sids, self.id)) print("songs={}, id={}".format(songs, self.id)) if len(sids) > 200: log.warn("aid={} songs too long={}".format(aid, sids)) sids = sids[:200] songs = ";".join(sids) if len(songs) < 1: songs = self.id log.info("song id={} in artists={}".format(self.id, aid)) sql = """replace into artist(id, netid, name, songs) values('{}', '{}', '{}', '{}') """. \ format(aid, aid, MySQLdb.escape_string(aname), songs) log.info("sql={}".format(sql)) mydb.exec_write(sql) log.info("write a={} id={}".format(aid, self.id)) print("write a={} id={}".format(aid, self.id))