def Start(db_, list_): ARITIST_URL_TEMPLATE_ = 'http://music.baidu.com/artist/%s' parser = HotNumParser() for l_ in list_: raw_content = common.http_read(ARITIST_URL_TEMPLATE_ % l_) try: parser.feed(raw_content) db_.set_artist_hot(l_, parser.hotnum) obj = db_.get_artist(l_) if obj: obj.pop('_id') elsup[0].transfer(json.dumps(obj), obj['artist_id']) print 'artist %s hot num is %s' % (l_, parser.hotnum) except Exception, e: common.log('HotNumParser.feed: ' + str(e))
artid_ = v[v.find('/artist/') + len('/artist/'):] if (k and k == 'title'): title_ = v if (artid_ != ''): if artid_.isdigit(): #Artist_List_[artid_] = title_ db_.add_artist(artid_, title_) elif Category_List_Switch_: Category_List_.add(PRE_URL_ + href_) except Exception, e: common.log('Find_Artist_Link: ' + str(e)) parser = HTMLParser() parser.handle_starttag = Find_Artist_Link raw_content = common.http_read(URL_) try: parser.feed(raw_content) except Exception, e: common.log('HTMLParser.feed: ' + str(e)) print '"' + URL_ + '" has been processed.' Category_List_Switch_ = False for l_ in Category_List_: raw_content = common.http_read(l_) try: parser.feed(raw_content) except Exception, e: common.log('HTMLParser.feed: ' + str(e))
def Find_Song_Link(tag, attrs): try: if tag == 'a': for k, v in attrs: if (k and k == 'href' and v and v.find('/song/') != -1): href_ = v[v.find('/song/') + len('/song/'):] if href_.find('/') != -1: href_ = href_[:href_.find('/')] #Song_List_.add(href_) raw_content = common.http_read(SongLink_URL_Template_ % href_) if raw_content is None: continue raw_object = json.loads(raw_content) songList = raw_object['data']['songList'] if len(songList) > 0: song_ = songList[0] songId = song_['songId'] songName = song_['songName'] lrclink = PRE_URL_ + song_['lrcLink'] songlink = song_['songLink'] rate = song_['rate'] size = song_['size'] artist_id = Artist_Id_ if songName not in SongNameMap: SongNameMap[songName] = None if (order > Order_[0] and songlink and songlink != ''): #important db_.add_song(songId, songName, lrclink, songlink, rate, size, artist_id, Order_[0]) obj = { "songId": songId, "songName": songName, "rate": rate, "size": size, "order": Order_[0], "artistId": artist_id } elsup[0].transfer(json.dumps(obj), songId) #elsup[0].transfer('{'\ # '"songId": %d,'\ # '"songName": "%s",'\ # '"rate": %d,'\ # '"size": %d,'\ # '"order": %d,'\ # '"artistId": "%s"}' % (songId, songName, rate, size, Order_[0], artist_id), songId) for i in range(0, 3): if i > 0: common.log( 'try download music %s again, time: %d' % (songId, i)) if dwn_music[0].transfer( songlink, songId, 'audio/mpeg'): break elif i == 2: db_.add_failed( songlink, songId, 'audio/mpeg', 1) if lrclink.endswith('.lrc'): for i in range(0, 3): if i > 0: common.log( 'try download lrc %s again, time: %d' % (songId, i)) if dwn_lrc[0].transfer( lrclink, songId, 'text/plain'): break elif i == 2: db_.add_failed( lrclink, songId, 'text/plain', 2) Order_[0] = Order_[0] + 1 #Order_[0] = Order_[0] + 1 print 'song %d has been saved.' % songId Find_Song_Switch_[0] = True except Exception, e: common.log('Find_Song_Link: ' + str(e))
Find_Song_Switch_[0] = True except Exception, e: common.log('Find_Song_Link: ' + str(e)) parser = HTMLParser() parser.handle_starttag = Find_Song_Link for k_ in artist_list: print 'start process artist %s ...' % k_ Order_[0] = 0 SongNameMap = {} s_ = 0 Find_Song_Switch_[0] = True while (Find_Song_Switch_[0]): Find_Song_Switch_[0] = False raw_content = common.http_read(GetSongs_URL_Template_ % (s_, k_)) s_ = s_ + 25 if raw_content is None: continue try: raw_object = json.loads(raw_content) except Exception, e: common.log('json.loads: ' + str(e)) try: raw_content = raw_object['data']['html'] except Exception, e: common.log('extract html from json object: ' + str(e)) try: raw_content = raw_content.decode('unicode_escape') except Exception, e: common.log('str.decode: ' + str(e))
def Find_Song_Link(tag, attrs): try: if tag == "a": for k, v in attrs: if k and k == "href" and v and v.find("/song/") != -1: href_ = v[v.find("/song/") + len("/song/") :] if href_.find("/") != -1: href_ = href_[: href_.find("/")] # Song_List_.add(href_) raw_content = common.http_read(SongLink_URL_Template_ % href_) if raw_content is None: continue raw_object = json.loads(raw_content) songList = raw_object["data"]["songList"] if len(songList) > 0: song_ = songList[0] songId = song_["songId"] songName = song_["songName"] lrclink = PRE_URL_ + song_["lrcLink"] songlink = song_["songLink"] rate = song_["rate"] size = song_["size"] artist_id = Artist_Id_ if songName not in SongNameMap: SongNameMap[songName] = None if order > Order_[0] and songlink and songlink != "": # important db_.add_song(songId, songName, lrclink, songlink, rate, size, artist_id, Order_[0]) obj = { "songId": songId, "songName": songName, "rate": rate, "size": size, "order": Order_[0], "artistId": artist_id, } elsup[0].transfer(json.dumps(obj), songId) # elsup[0].transfer('{'\ # '"songId": %d,'\ # '"songName": "%s",'\ # '"rate": %d,'\ # '"size": %d,'\ # '"order": %d,'\ # '"artistId": "%s"}' % (songId, songName, rate, size, Order_[0], artist_id), songId) for i in range(0, 3): if i > 0: common.log("try download music %s again, time: %d" % (songId, i)) if dwn_music[0].transfer(songlink, songId, "audio/mpeg"): break elif i == 2: db_.add_failed(songlink, songId, "audio/mpeg", 1) if lrclink.endswith(".lrc"): for i in range(0, 3): if i > 0: common.log("try download lrc %s again, time: %d" % (songId, i)) if dwn_lrc[0].transfer(lrclink, songId, "text/plain"): break elif i == 2: db_.add_failed(lrclink, songId, "text/plain", 2) Order_[0] = Order_[0] + 1 # Order_[0] = Order_[0] + 1 print "song %d has been saved." % songId Find_Song_Switch_[0] = True except Exception, e: common.log("Find_Song_Link: " + str(e))
Find_Song_Switch_[0] = True except Exception, e: common.log("Find_Song_Link: " + str(e)) parser = HTMLParser() parser.handle_starttag = Find_Song_Link for k_ in artist_list: print "start process artist %s ..." % k_ Order_[0] = 0 SongNameMap = {} s_ = 0 Find_Song_Switch_[0] = True while Find_Song_Switch_[0]: Find_Song_Switch_[0] = False raw_content = common.http_read(GetSongs_URL_Template_ % (s_, k_)) s_ = s_ + 25 if raw_content is None: continue try: raw_object = json.loads(raw_content) except Exception, e: common.log("json.loads: " + str(e)) try: raw_content = raw_object["data"]["html"] except Exception, e: common.log("extract html from json object: " + str(e)) try: raw_content = raw_content.decode("unicode_escape") except Exception, e: common.log("str.decode: " + str(e))
artid_ = v[v.find('/artist/') + len('/artist/'):] if(k and k == 'title'): title_ = v if(artid_ != ''): if artid_.isdigit(): #Artist_List_[artid_] = title_ db_.add_artist(artid_, title_) elif Category_List_Switch_: Category_List_.add(PRE_URL_ + href_) except Exception, e: common.log('Find_Artist_Link: ' + str(e)) parser = HTMLParser() parser.handle_starttag = Find_Artist_Link raw_content = common.http_read(URL_) try: parser.feed(raw_content) except Exception, e: common.log('HTMLParser.feed: ' + str(e)) print '"' + URL_ + '" has been processed.' Category_List_Switch_ = False for l_ in Category_List_: raw_content = common.http_read(l_) try: parser.feed(raw_content) except Exception, e: common.log('HTMLParser.feed: ' + str(e))