예제 #1
0
def Start(db_, list_):

    ARITIST_URL_TEMPLATE_ = 'http://music.baidu.com/artist/%s'

    parser = HotNumParser()

    for l_ in list_:
        raw_content = common.http_read(ARITIST_URL_TEMPLATE_ % l_)
        try:
            parser.feed(raw_content)
            db_.set_artist_hot(l_, parser.hotnum)
            obj = db_.get_artist(l_)
            if obj:
                obj.pop('_id')
                elsup[0].transfer(json.dumps(obj), obj['artist_id'])
            print 'artist %s hot num is %s' % (l_, parser.hotnum)
        except Exception, e:
            common.log('HotNumParser.feed: ' + str(e))
예제 #2
0
def Start(db_, list_):

    ARITIST_URL_TEMPLATE_ = 'http://music.baidu.com/artist/%s'

    parser = HotNumParser()
    
    for l_ in list_:
        raw_content = common.http_read(ARITIST_URL_TEMPLATE_ % l_)
        try:
            parser.feed(raw_content)
            db_.set_artist_hot(l_, parser.hotnum)
            obj = db_.get_artist(l_)
            if obj:
                obj.pop('_id')
                elsup[0].transfer(json.dumps(obj), obj['artist_id'])
            print 'artist %s hot num is %s' % (l_, parser.hotnum)
        except Exception, e:
            common.log('HotNumParser.feed: ' + str(e))
예제 #3
0
                        artid_ = v[v.find('/artist/') + len('/artist/'):]
                    if (k and k == 'title'):
                        title_ = v
                if (artid_ != ''):
                    if artid_.isdigit():
                        #Artist_List_[artid_] = title_
                        db_.add_artist(artid_, title_)
                    elif Category_List_Switch_:
                        Category_List_.add(PRE_URL_ + href_)
        except Exception, e:
            common.log('Find_Artist_Link: ' + str(e))

    parser = HTMLParser()
    parser.handle_starttag = Find_Artist_Link

    raw_content = common.http_read(URL_)

    try:
        parser.feed(raw_content)
    except Exception, e:
        common.log('HTMLParser.feed: ' + str(e))
    print '"' + URL_ + '" has been processed.'

    Category_List_Switch_ = False

    for l_ in Category_List_:
        raw_content = common.http_read(l_)
        try:
            parser.feed(raw_content)
        except Exception, e:
            common.log('HTMLParser.feed: ' + str(e))
예제 #4
0
 def Find_Song_Link(tag, attrs):
     try:
         if tag == 'a':
             for k, v in attrs:
                 if (k and k == 'href' and v and v.find('/song/') != -1):
                     href_ = v[v.find('/song/') + len('/song/'):]
                     if href_.find('/') != -1:
                         href_ = href_[:href_.find('/')]
                     #Song_List_.add(href_)
                     raw_content = common.http_read(SongLink_URL_Template_ %
                                                    href_)
                     if raw_content is None:
                         continue
                     raw_object = json.loads(raw_content)
                     songList = raw_object['data']['songList']
                     if len(songList) > 0:
                         song_ = songList[0]
                         songId = song_['songId']
                         songName = song_['songName']
                         lrclink = PRE_URL_ + song_['lrcLink']
                         songlink = song_['songLink']
                         rate = song_['rate']
                         size = song_['size']
                         artist_id = Artist_Id_
                         if songName not in SongNameMap:
                             SongNameMap[songName] = None
                             if (order > Order_[0] and songlink
                                     and songlink != ''):  #important
                                 db_.add_song(songId, songName, lrclink,
                                              songlink, rate, size,
                                              artist_id, Order_[0])
                                 obj = {
                                     "songId": songId,
                                     "songName": songName,
                                     "rate": rate,
                                     "size": size,
                                     "order": Order_[0],
                                     "artistId": artist_id
                                 }
                                 elsup[0].transfer(json.dumps(obj), songId)
                                 #elsup[0].transfer('{'\
                                 #    '"songId": %d,'\
                                 #    '"songName": "%s",'\
                                 #    '"rate": %d,'\
                                 #    '"size": %d,'\
                                 #    '"order": %d,'\
                                 #    '"artistId": "%s"}' % (songId, songName, rate, size, Order_[0], artist_id), songId)
                                 for i in range(0, 3):
                                     if i > 0:
                                         common.log(
                                             'try download music %s again, time: %d'
                                             % (songId, i))
                                     if dwn_music[0].transfer(
                                             songlink, songId,
                                             'audio/mpeg'):
                                         break
                                     elif i == 2:
                                         db_.add_failed(
                                             songlink, songId, 'audio/mpeg',
                                             1)
                                 if lrclink.endswith('.lrc'):
                                     for i in range(0, 3):
                                         if i > 0:
                                             common.log(
                                                 'try download lrc %s again, time: %d'
                                                 % (songId, i))
                                         if dwn_lrc[0].transfer(
                                                 lrclink, songId,
                                                 'text/plain'):
                                             break
                                         elif i == 2:
                                             db_.add_failed(
                                                 lrclink, songId,
                                                 'text/plain', 2)
                                 Order_[0] = Order_[0] + 1
                         #Order_[0] = Order_[0] + 1
                         print 'song %d has been saved.' % songId
                     Find_Song_Switch_[0] = True
     except Exception, e:
         common.log('Find_Song_Link: ' + str(e))
예제 #5
0
                        Find_Song_Switch_[0] = True
        except Exception, e:
            common.log('Find_Song_Link: ' + str(e))

    parser = HTMLParser()
    parser.handle_starttag = Find_Song_Link

    for k_ in artist_list:
        print 'start process artist %s ...' % k_
        Order_[0] = 0
        SongNameMap = {}
        s_ = 0
        Find_Song_Switch_[0] = True
        while (Find_Song_Switch_[0]):
            Find_Song_Switch_[0] = False
            raw_content = common.http_read(GetSongs_URL_Template_ % (s_, k_))
            s_ = s_ + 25
            if raw_content is None:
                continue
            try:
                raw_object = json.loads(raw_content)
            except Exception, e:
                common.log('json.loads: ' + str(e))
            try:
                raw_content = raw_object['data']['html']
            except Exception, e:
                common.log('extract html from json object: ' + str(e))
            try:
                raw_content = raw_content.decode('unicode_escape')
            except Exception, e:
                common.log('str.decode: ' + str(e))
예제 #6
0
 def Find_Song_Link(tag, attrs):
     try:
         if tag == "a":
             for k, v in attrs:
                 if k and k == "href" and v and v.find("/song/") != -1:
                     href_ = v[v.find("/song/") + len("/song/") :]
                     if href_.find("/") != -1:
                         href_ = href_[: href_.find("/")]
                     # Song_List_.add(href_)
                     raw_content = common.http_read(SongLink_URL_Template_ % href_)
                     if raw_content is None:
                         continue
                     raw_object = json.loads(raw_content)
                     songList = raw_object["data"]["songList"]
                     if len(songList) > 0:
                         song_ = songList[0]
                         songId = song_["songId"]
                         songName = song_["songName"]
                         lrclink = PRE_URL_ + song_["lrcLink"]
                         songlink = song_["songLink"]
                         rate = song_["rate"]
                         size = song_["size"]
                         artist_id = Artist_Id_
                         if songName not in SongNameMap:
                             SongNameMap[songName] = None
                             if order > Order_[0] and songlink and songlink != "":  # important
                                 db_.add_song(songId, songName, lrclink, songlink, rate, size, artist_id, Order_[0])
                                 obj = {
                                     "songId": songId,
                                     "songName": songName,
                                     "rate": rate,
                                     "size": size,
                                     "order": Order_[0],
                                     "artistId": artist_id,
                                 }
                                 elsup[0].transfer(json.dumps(obj), songId)
                                 # elsup[0].transfer('{'\
                                 #    '"songId": %d,'\
                                 #    '"songName": "%s",'\
                                 #    '"rate": %d,'\
                                 #    '"size": %d,'\
                                 #    '"order": %d,'\
                                 #    '"artistId": "%s"}' % (songId, songName, rate, size, Order_[0], artist_id), songId)
                                 for i in range(0, 3):
                                     if i > 0:
                                         common.log("try download music %s again, time: %d" % (songId, i))
                                     if dwn_music[0].transfer(songlink, songId, "audio/mpeg"):
                                         break
                                     elif i == 2:
                                         db_.add_failed(songlink, songId, "audio/mpeg", 1)
                                 if lrclink.endswith(".lrc"):
                                     for i in range(0, 3):
                                         if i > 0:
                                             common.log("try download lrc %s again, time: %d" % (songId, i))
                                         if dwn_lrc[0].transfer(lrclink, songId, "text/plain"):
                                             break
                                         elif i == 2:
                                             db_.add_failed(lrclink, songId, "text/plain", 2)
                                 Order_[0] = Order_[0] + 1
                         # Order_[0] = Order_[0] + 1
                         print "song %d has been saved." % songId
                     Find_Song_Switch_[0] = True
     except Exception, e:
         common.log("Find_Song_Link: " + str(e))
예제 #7
0
                        Find_Song_Switch_[0] = True
        except Exception, e:
            common.log("Find_Song_Link: " + str(e))

    parser = HTMLParser()
    parser.handle_starttag = Find_Song_Link

    for k_ in artist_list:
        print "start process artist %s ..." % k_
        Order_[0] = 0
        SongNameMap = {}
        s_ = 0
        Find_Song_Switch_[0] = True
        while Find_Song_Switch_[0]:
            Find_Song_Switch_[0] = False
            raw_content = common.http_read(GetSongs_URL_Template_ % (s_, k_))
            s_ = s_ + 25
            if raw_content is None:
                continue
            try:
                raw_object = json.loads(raw_content)
            except Exception, e:
                common.log("json.loads: " + str(e))
            try:
                raw_content = raw_object["data"]["html"]
            except Exception, e:
                common.log("extract html from json object: " + str(e))
            try:
                raw_content = raw_content.decode("unicode_escape")
            except Exception, e:
                common.log("str.decode: " + str(e))
예제 #8
0
                        artid_ = v[v.find('/artist/') + len('/artist/'):]
                    if(k and k == 'title'):
                        title_ = v
                if(artid_ != ''):
                    if artid_.isdigit():
                        #Artist_List_[artid_] = title_
                        db_.add_artist(artid_, title_)
                    elif Category_List_Switch_:
                        Category_List_.add(PRE_URL_ + href_)
        except Exception, e:
            common.log('Find_Artist_Link: ' + str(e))  
    
    parser = HTMLParser()
    parser.handle_starttag = Find_Artist_Link

    raw_content = common.http_read(URL_)
    
    try:
        parser.feed(raw_content)
    except Exception, e:
        common.log('HTMLParser.feed: ' + str(e))
    print '"' + URL_ + '" has been processed.'
    
    Category_List_Switch_ = False
    
    for l_ in Category_List_:
        raw_content = common.http_read(l_)
        try:
            parser.feed(raw_content)
        except Exception, e:
            common.log('HTMLParser.feed: ' + str(e))