Example #1
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

artist_ = 'artist8'
song_ = 'song9'
artist_log_ = 'artist_log9'
failed_ = 'failed'

import threading

import common
import pymongo

hot = int(common.get_argv('-hot', 10000))

class db(object):

    def mode(self):
        artist_list = []
        for (k_, v_) in self.__artist_map.items():
            if k_ and k_ not in self.__artist_log_map and v_ and v_ == -1:
                artist_list.append(k_)
        return artist_list

    def mode2(self):
        artist_list = []
        for (k_, v_) in self.__artist_map.items():
            if k_ and k_ not in self.__artist_log_map and v_ and v_ > hot:
                artist_list.append(k_)
        return artist_list
Example #2
0
reload(sys)
sys.setdefaultencoding('utf8')

import os, time, math, pdb, threading

import common, db, job_artist, job_song, job_hotnum

#just once
db_ = db.db('mongodb://192.168.20.66:27017/', 'local')

rs = job_artist.Start(db_)

artist_list = db_.mode()
count = len(artist_list)

t = int(common.get_argv('-t', 1))

print 'Thread : ' + str(t)

b = int(math.ceil(count / t))

threads = []

if '--debug' in sys.argv:
    job_hotnum.Start(db_, artist_list)
else:
    print 't is %d, b is %d' % (t, b)
    for i in range(0, t):
        begin = b * i
        end = b * (i + 1)
        if end >= count:
Example #3
0
def Start(db_, artist_list):

    GetSongs_URL_Template_ = 'http://music.baidu.com/data/user/getsongs?start=%s&ting_uid=%s&order=hot'
    SongLink_URL_Template_ = 'http://play.baidu.com/data/music/songlink?songIds=%s'
    PRE_URL_ = 'http://play.baidu.com'

    Find_Song_Switch_ = [False]
    Artist_Id_ = ''
    Order_ = [0]
    SongNameMap = {}

    h = '127.0.0.1:8098'
    if '-h' in sys.argv:
        h_index = sys.argv.index('-h')
        if h_index and h_index > 0 and len(sys.argv) > h_index + 1:
            h = sys.argv[h_index + 1]

    order = int(common.get_argv('-order', 25))

    RIAK_HOSTNAME = h
    RIAK_URL_TEMPLATE = '/buckets/music/keys/%s'
    RIAK_LRC_URL_TEMPLATE = '/buckets/lrc/keys/%s'

    dwnn = int(common.get_argv('-dwnn', 25))

    ELS_HOSTNAME = str(common.get_argv('-esh', 'localhost:9200'))
    ELS_URL_TEMPLATE = '/local/music/%s'

    dwn_music = [common.Downloader(RIAK_HOSTNAME, RIAK_URL_TEMPLATE, dwnn)]
    dwn_lrc = [common.Downloader(RIAK_HOSTNAME, RIAK_LRC_URL_TEMPLATE, dwnn)]
    elsup = [common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)]

    def elsup_destruct(this_):
        elsup[0].close()
        elsup[0] = common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)
        elsup[0].evtExpire = elsup_destruct

    elsup[0].evtExpire = elsup_destruct

    def dwn_music_destruct(this_):
        dwn_music[0].close()
        dwn_music[0] = common.Downloader(RIAK_HOSTNAME, RIAK_URL_TEMPLATE,
                                         dwnn)
        dwn_music[0].evtExpire = dwn_music_destruct

    dwn_music[0].evtExpire = dwn_music_destruct

    def dwn_lrc_destruct(this_):
        dwn_lrc[0].close()
        dwn_lrc[0] = common.Downloader(RIAK_HOSTNAME, RIAK_LRC_URL_TEMPLATE,
                                       dwnn)
        dwn_lrc[0].evtExpire = dwn_lrc_destruct

    dwn_lrc[0].evtExpire = dwn_lrc_destruct

    def Find_Song_Link(tag, attrs):
        try:
            if tag == 'a':
                for k, v in attrs:
                    if (k and k == 'href' and v and v.find('/song/') != -1):
                        href_ = v[v.find('/song/') + len('/song/'):]
                        if href_.find('/') != -1:
                            href_ = href_[:href_.find('/')]
                        #Song_List_.add(href_)
                        raw_content = common.http_read(SongLink_URL_Template_ %
                                                       href_)
                        if raw_content is None:
                            continue
                        raw_object = json.loads(raw_content)
                        songList = raw_object['data']['songList']
                        if len(songList) > 0:
                            song_ = songList[0]
                            songId = song_['songId']
                            songName = song_['songName']
                            lrclink = PRE_URL_ + song_['lrcLink']
                            songlink = song_['songLink']
                            rate = song_['rate']
                            size = song_['size']
                            artist_id = Artist_Id_
                            if songName not in SongNameMap:
                                SongNameMap[songName] = None
                                if (order > Order_[0] and songlink
                                        and songlink != ''):  #important
                                    db_.add_song(songId, songName, lrclink,
                                                 songlink, rate, size,
                                                 artist_id, Order_[0])
                                    obj = {
                                        "songId": songId,
                                        "songName": songName,
                                        "rate": rate,
                                        "size": size,
                                        "order": Order_[0],
                                        "artistId": artist_id
                                    }
                                    elsup[0].transfer(json.dumps(obj), songId)
                                    #elsup[0].transfer('{'\
                                    #    '"songId": %d,'\
                                    #    '"songName": "%s",'\
                                    #    '"rate": %d,'\
                                    #    '"size": %d,'\
                                    #    '"order": %d,'\
                                    #    '"artistId": "%s"}' % (songId, songName, rate, size, Order_[0], artist_id), songId)
                                    for i in range(0, 3):
                                        if i > 0:
                                            common.log(
                                                'try download music %s again, time: %d'
                                                % (songId, i))
                                        if dwn_music[0].transfer(
                                                songlink, songId,
                                                'audio/mpeg'):
                                            break
                                        elif i == 2:
                                            db_.add_failed(
                                                songlink, songId, 'audio/mpeg',
                                                1)
                                    if lrclink.endswith('.lrc'):
                                        for i in range(0, 3):
                                            if i > 0:
                                                common.log(
                                                    'try download lrc %s again, time: %d'
                                                    % (songId, i))
                                            if dwn_lrc[0].transfer(
                                                    lrclink, songId,
                                                    'text/plain'):
                                                break
                                            elif i == 2:
                                                db_.add_failed(
                                                    lrclink, songId,
                                                    'text/plain', 2)
                                    Order_[0] = Order_[0] + 1
                            #Order_[0] = Order_[0] + 1
                            print 'song %d has been saved.' % songId
                        Find_Song_Switch_[0] = True
        except Exception, e:
            common.log('Find_Song_Link: ' + str(e))
Example #4
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding('utf8')

from HTMLParser import HTMLParser
import urllib, json

import common, db

dwnn = int(common.get_argv('-dwnn', 25))

ELS_HOSTNAME = str(common.get_argv('-esh', 'localhost:9200'))
ELS_URL_TEMPLATE = '/local/artist/%s'

elsup = [common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)]


def elsup_destruct(this_):
    elsup[0].close()
    elsup[0] = common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)
    elsup[0].evtExpire = elsup_destruct


elsup[0].evtExpire = elsup_destruct


class HotNumParser(HTMLParser):
    def __init__(self):
Example #5
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding('utf8')

from HTMLParser import HTMLParser
import urllib, json

import common, db

dwnn = int(common.get_argv('-dwnn', 25))

ELS_HOSTNAME = str(common.get_argv('-esh', 'localhost:9200'))
ELS_URL_TEMPLATE = '/local/artist/%s'

elsup = [common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)]

def elsup_destruct(this_):
    elsup[0].close()
    elsup[0] = common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)
    elsup[0].evtExpire = elsup_destruct

elsup[0].evtExpire = elsup_destruct

class HotNumParser(HTMLParser):
    def __init__(self):
        self.processing = None
        self.hotnum = 0
        HTMLParser.__init__(self)
Example #6
0
def Start(db_, artist_list):

    GetSongs_URL_Template_ = "http://music.baidu.com/data/user/getsongs?start=%s&ting_uid=%s&order=hot"
    SongLink_URL_Template_ = "http://play.baidu.com/data/music/songlink?songIds=%s"
    PRE_URL_ = "http://play.baidu.com"

    Find_Song_Switch_ = [False]
    Artist_Id_ = ""
    Order_ = [0]
    SongNameMap = {}

    h = "127.0.0.1:8098"
    if "-h" in sys.argv:
        h_index = sys.argv.index("-h")
        if h_index and h_index > 0 and len(sys.argv) > h_index + 1:
            h = sys.argv[h_index + 1]

    order = int(common.get_argv("-order", 25))

    RIAK_HOSTNAME = h
    RIAK_URL_TEMPLATE = "/buckets/music/keys/%s"
    RIAK_LRC_URL_TEMPLATE = "/buckets/lrc/keys/%s"

    dwnn = int(common.get_argv("-dwnn", 25))

    ELS_HOSTNAME = str(common.get_argv("-esh", "localhost:9200"))
    ELS_URL_TEMPLATE = "/local/music/%s"

    dwn_music = [common.Downloader(RIAK_HOSTNAME, RIAK_URL_TEMPLATE, dwnn)]
    dwn_lrc = [common.Downloader(RIAK_HOSTNAME, RIAK_LRC_URL_TEMPLATE, dwnn)]
    elsup = [common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)]

    def elsup_destruct(this_):
        elsup[0].close()
        elsup[0] = common.ElsUploader(ELS_HOSTNAME, ELS_URL_TEMPLATE, dwnn)
        elsup[0].evtExpire = elsup_destruct

    elsup[0].evtExpire = elsup_destruct

    def dwn_music_destruct(this_):
        dwn_music[0].close()
        dwn_music[0] = common.Downloader(RIAK_HOSTNAME, RIAK_URL_TEMPLATE, dwnn)
        dwn_music[0].evtExpire = dwn_music_destruct

    dwn_music[0].evtExpire = dwn_music_destruct

    def dwn_lrc_destruct(this_):
        dwn_lrc[0].close()
        dwn_lrc[0] = common.Downloader(RIAK_HOSTNAME, RIAK_LRC_URL_TEMPLATE, dwnn)
        dwn_lrc[0].evtExpire = dwn_lrc_destruct

    dwn_lrc[0].evtExpire = dwn_lrc_destruct

    def Find_Song_Link(tag, attrs):
        try:
            if tag == "a":
                for k, v in attrs:
                    if k and k == "href" and v and v.find("/song/") != -1:
                        href_ = v[v.find("/song/") + len("/song/") :]
                        if href_.find("/") != -1:
                            href_ = href_[: href_.find("/")]
                        # Song_List_.add(href_)
                        raw_content = common.http_read(SongLink_URL_Template_ % href_)
                        if raw_content is None:
                            continue
                        raw_object = json.loads(raw_content)
                        songList = raw_object["data"]["songList"]
                        if len(songList) > 0:
                            song_ = songList[0]
                            songId = song_["songId"]
                            songName = song_["songName"]
                            lrclink = PRE_URL_ + song_["lrcLink"]
                            songlink = song_["songLink"]
                            rate = song_["rate"]
                            size = song_["size"]
                            artist_id = Artist_Id_
                            if songName not in SongNameMap:
                                SongNameMap[songName] = None
                                if order > Order_[0] and songlink and songlink != "":  # important
                                    db_.add_song(songId, songName, lrclink, songlink, rate, size, artist_id, Order_[0])
                                    obj = {
                                        "songId": songId,
                                        "songName": songName,
                                        "rate": rate,
                                        "size": size,
                                        "order": Order_[0],
                                        "artistId": artist_id,
                                    }
                                    elsup[0].transfer(json.dumps(obj), songId)
                                    # elsup[0].transfer('{'\
                                    #    '"songId": %d,'\
                                    #    '"songName": "%s",'\
                                    #    '"rate": %d,'\
                                    #    '"size": %d,'\
                                    #    '"order": %d,'\
                                    #    '"artistId": "%s"}' % (songId, songName, rate, size, Order_[0], artist_id), songId)
                                    for i in range(0, 3):
                                        if i > 0:
                                            common.log("try download music %s again, time: %d" % (songId, i))
                                        if dwn_music[0].transfer(songlink, songId, "audio/mpeg"):
                                            break
                                        elif i == 2:
                                            db_.add_failed(songlink, songId, "audio/mpeg", 1)
                                    if lrclink.endswith(".lrc"):
                                        for i in range(0, 3):
                                            if i > 0:
                                                common.log("try download lrc %s again, time: %d" % (songId, i))
                                            if dwn_lrc[0].transfer(lrclink, songId, "text/plain"):
                                                break
                                            elif i == 2:
                                                db_.add_failed(lrclink, songId, "text/plain", 2)
                                    Order_[0] = Order_[0] + 1
                            # Order_[0] = Order_[0] + 1
                            print "song %d has been saved." % songId
                        Find_Song_Switch_[0] = True
        except Exception, e:
            common.log("Find_Song_Link: " + str(e))
Example #7
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

artist_ = 'artist8'
song_ = 'song9'
artist_log_ = 'artist_log9'
failed_ = 'failed'

import threading

import common
import pymongo

hot = int(common.get_argv('-hot', 10000))


class db(object):
    def mode(self):
        artist_list = []
        for (k_, v_) in self.__artist_map.items():
            if k_ and k_ not in self.__artist_log_map and v_ and v_ == -1:
                artist_list.append(k_)
        return artist_list

    def mode2(self):
        artist_list = []
        for (k_, v_) in self.__artist_map.items():
            if k_ and k_ not in self.__artist_log_map and v_ and v_ > hot:
                artist_list.append(k_)
        return artist_list