Esempio n. 1
0
    def __init__(self,ids=518725853,comment='song',):

        super(Multi_Comment,self).__init__(ids=ids,category='comment',comment=comment)
     
        self.comments=[]
        self.mysql=Cloud_Music_MySQL()
        self.comment=comment
Esempio n. 2
0
    def __init__(self,
                 song_ids=518725853,
                 refer=2147483647):  ##2147483647为32位操作系统int的最大值 即2^31-1

        super(Multi_Song_Info, self).__init__(ids=song_ids, category='url')
        self.refer = refer
        self.mysql = Cloud_Music_MySQL()
Esempio n. 3
0
class Multi_Song_Info(Cloud_Music):
    ##不能多继承 重名了get_response()函数

    def __init__(self,
                 song_ids=518725853,
                 refer=2147483647):  ##2147483647为32位操作系统int的最大值 即2^31-1

        super(Multi_Song_Info, self).__init__(ids=song_ids, category='url')
        self.refer = refer
        self.mysql = Cloud_Music_MySQL()

    def get_info(self, ):

        s = Song(ids=self.ids)
        info = s.get_song_info()

        self.song = info['song']
        self.singer = info['singer']
        logging.debug(u'id=%s song is:%s,singer is:%s' %
                      (self.ids, self.song, self.singer))

    def get_url(self, ):

        response = self.get_response().json()
        self.url = response['data'][0]['url']
        logging.debug(u'the url of song id=%s is:%s' % (self.ids, self.url))

    def insert_song(self, ):

        t1 = time.time()

        self.get_info()
        # self.get_url() url会失效,不获取了
        self.url = ''
        try:
            self.mysql.insert_table_song(song=self.song,
                                         singer=self.singer,
                                         url=self.url,
                                         ids=self.ids,
                                         refer=self.refer)
            ##已经有了try except 语句,所有线程一定会结束
        except:
            traceback.print_exc()
        finally:
            self.mysql.close_connect()
        t2 = time.time()

        logging.info(u'try to close mysql connect avoid to many connect')
        print u'线程执行完毕!一共花费%s秒,关闭数据库连接' % (t2 - t1)
        logging.debug(u'finish insert_song threading,total cost time is:%s' %
                      (t2 - t1))
        ##需要关闭连接,不然的话,线程就不能即时结束,会产生过多数据库连接。使用连接池会忽略close.connect,因而不会发生mysql server has gone away
        return t2 - t1
Esempio n. 4
0
    def __init__(self,):

        self.max_connect=11
        self.run_connect=Queue()
        for i in range(self.max_connect):
  
            self.run_connect.put(Cloud_Music_MySQL())
Esempio n. 5
0
def put_song_comment_queue():

    mysql = Cloud_Music_MySQL()

    def put_queue():
        song_comment_queue = manager.get_song_comment_queue()

        data = mysql.check_table_song()
        mysql.close_connect()
        for i in data[1]:
            ids = i['ids']
            song_comment_queue.put(ids)
            logging.info(u'put %s into song_comment_queue' % ids)

    put_queue()
    while not song_comment_queue.empty():

        print u'[%s] current queue size:%s' % (time.asctime(),
                                               song_comment_queue.qsize())

        logging.info(
            u'check whether queue is empty after 10 second,current qsize is:%s'
            % (song_comment_queue.qsize()))
        time.sleep(10)

    logging.info(u'Queue empty , generate new queue')
Esempio n. 6
0
def put_queue_model(query,queue,queue_name):

    mysql=Cloud_Music_MySQL()
    data=getattr(mysql,query)(limit=10000)
    mysql.close_connect()

    for d in data[1]:
        queue.put(d)
        logging.info(u'put %s into %s'%(d,queue_name))

    while not queue.empty():

        print u'[%s] current %s size:%s'%(time.asctime(),queue_name,queue.qsize())
        logging.info(u'check whether %s is empty after 10 second,current qsize is:%s'%(queue_name,queue.qsize()))
        time.sleep(10)

    logging.info(u'Queue empty , generate new queue')
Esempio n. 7
0
def put_song_info_queue_by_model():

    mysql=Cloud_Music_MySQL()

    logging.info(u'更新playlist中status的值')
    mysql.auto_update_playlist_status()
    
    logging.info(u'获取playlist中status小于75的数据')
    data=mysql.check_table_playlist_status() 

    mysql.close_connect()

    
    queue=manager.get_song_info_queue()
    queue_name='song_info_queue'

    def parse():
        for i in data[1]:
            song_ids=i['song_ids'].split(',')
            refer=i['ids']
            for ids in song_ids:
                d=dict(ids=ids,refer=refer)
                yield d

    put_queue_model_special(parse=parse,queue=queue,queue_name=queue_name)
Esempio n. 8
0
def put_song_comment_queue_by_model():

    data = Cloud_Music_MySQL().check_table_song()
    queue = manager.get_song_comment_queue()

    def parse_data():
        for i in data[1]:
            ids = i['ids']
            yield ids

    put_queue_model(parse_data=parse_data, queue=queue)
Esempio n. 9
0
    def __init__(self, ):

        super(Discover_Playlist, self).__init__()

        self.params = dict(
            order='hot',
            cat='全部',
            limit=35,
            offset=35,
        )

        self.url = 'http://music.163.com/discover/playlist'
        self.soup = self.get_soup()
        self.mysql = Cloud_Music_MySQL()
Esempio n. 10
0
    def __init__(self, ):
        # self.mysql=Cloud_Music_MySQL()  ###所有mysql对象使用同一个mysql连接,会发生插入错误
        self.playlist_queue = Queue()
        self.table_playlist_info_queue = Queue()
        self.table_playlist_comment_queue = Queue()
        self.song_info_queue = Queue()
        self.song_comment_queue = Queue()
        self.mysql = Cloud_Music_MySQL()

        self.playlist_thread_pool = ThreadPoolExecutor(
            100)  ##专门用来执行更新playlist comment任务的线程,因为comment任务中有等待超时
        self.song_thread_pool = ThreadPoolExecutor(
            100)  ##专门用来执行更新playlist comment任务的线程
        self.thread_pool = ThreadPoolExecutor(100)  ##执行其余任务的线程
Esempio n. 11
0
def put_song_info_queue():

    mysql = Cloud_Music_MySQL()

    def put_queue():
        song_info_queue = manager.get_song_info_queue()
        # logging.info(u'更新playlist set status=1 歌单中85%以上的歌曲被抓取可以将status更改为1')
        # print u'更新playlist set status=1 歌单中85%以上的歌曲被抓取可以将status更改为1'
        # mysql.auto_update_playlist_status()

        logging.info(u'查询playlist中status小于75的歌单')
        # print u'查询playlist中status小于75的歌单'
        data = mysql.check_table_playlist_status()
        mysql.close_connect()
        # print data
        for i in data[1]:
            refer = i['ids']
            song_ids = i['song_ids'].split(',')
            for ids in song_ids:
                d = dict(ids=ids, refer=refer)
                song_info_queue.put(d)
                # print u'put dict into song_info_queue %s'%d
                logging.info(u'put dict into song_info_queue %s' % d)

    put_queue()

    while not song_info_queue.empty():

        print u'[%s] current queue size:%s' % (time.asctime(),
                                               song_info_queue.qsize())

        logging.info(
            u'check whether queue is empty after 10 second,current qsize is:%s'
            % (song_info_queue.qsize()))
        time.sleep(10)

    logging.info(u'Queue empty , generate new queue')
Esempio n. 12
0
def model_run(queue,
              func):  ##需要传入两个参数,queue为manage中管理的queue,func为需要提交的线程任务,需要带参数
    '''主函数'''

    avg_time = model_test(queue=queue, func=func)  ##平均一个线程花费的时间
    logging.info(u'from function test get avg_time=%s' % avg_time)
    print u'from function test get avg_time=%s' % avg_time
    count = 0
    start_time = time.time()
    multiple = g_multiple

    while True:
        try:
            logging.info(u'max wait 100 second try to get element from queue')
            # d=song_info_queue.get(timeout=100)
            d = queue.get(timeout=100)

            logging.info(u'get %s from queue' % d)

            # msi=Multi_Song_Info(song_ids=d['ids'],refer=d['refer'])
            # pool.submit(msi.insert_song)

            pool.submit(func, d)

            logging.info(
                u'%s start new threading, get song ids=%s,threading name is:%s,pid is:%s'
                % (sys._getframe().f_code.co_name, d['ids'],
                   threading.current_thread().name, os.getpid()))
            count += 1

            if count >= max_pool * multiple:
                sleeptime = multiple * avg_time
                print u'generate %s threading,so sleep %s second! current active threading num=%s' % (
                    count, sleeptime, threading.active_count())
                time.sleep(sleeptime)
                count = 0

                end_time = time.time()
                if end_time - start_time >= 600:  ##每过10十分做一次检测,确保数据库连接数最好介于30到150之间,保证程序稳定运行

                    mysql = Cloud_Music_MySQL()
                    Threads_connected = mysql.show_Threads_connected()
                    mysql.close_connect()

                    if Threads_connected <= 30:  ##说明程序运行效率不高,可以适当提高multiple,或是降低avg_time
                        avg_time = test()
                        multiple = multiple + 1
                        info = u'程序闲置过多,current Threads_connected=%s,重设avg_time=%s,multiple=%s' % (
                            Threads_connected, avg_time, multiple)
                        print info
                        logging.info(info)

                    elif Threads_connected >= 150:  ##说明程序负荷过重,可以适当降低multiple,或是提高avg_time
                        avg_time = test()
                        multiple = max(multiple - 1, 2)  ##multiple最小为2
                        info = u'程序负荷过重,current Threads_connected=%s,重设avg_time=%s,multiple=%s' % (
                            Threads_connected, avg_time, multiple)
                        print info
                        logging.info(info)

                    else:
                        info = u'程序运行良好,current Threads_connected=%s,保持avg_time=%s,multiple=%s' % (
                            Threads_connected, avg_time, multiple)
                        print info
                        logging.info(info)

                    start_time = time.time()  ##重设start_time

        except Exception, e:
            if str(e):
                e = str(e)
            else:  ##queue raise error e , str(e)为空
                e = 'queue empty'

            logging.warn(
                u' function %s raise  error cause by %s,traceback info is:%s '
                % (sys._getframe().f_code.co_name, e, traceback.format_exc()))
            print u'error info is:%s' % e

            if 'many connections' in e:  ##最好使用joinablequeue,##经过600秒一次的性能检测,很难抛出too many connections 异常了
                print u'current too many connections,sleep 3 second wait runing connections close'
                # song_info_queue.put(d)
                queue.put(d)
                print u'catch too many connections error ,so put d=%s back into queue' % d
                logging.info(
                    u'catch too many connections error ,so put d=%s back into queue'
                    % d)

                ##发生异常在于数据库操作,d的值可以获取到,所以把他重新放回queue中,所以不需要joinablequeue了

                mysql = Cloud_Music_MySQL()
                Threads_connected = mysql.show_Threads_connected()

                while Threads_connected >= 100:
                    info = u'current Threads_connected is:%s,also too much,so sleep 3 second!' % Threads_connected
                    print info
                    logging.debug(info)
                    time.sleep(3)
                    Threads_connected = mysql.show_Threads_connected()
                mysql.close_connect()
                continue

            elif 'empty' in e:
                print u'empty queue,break loop!'
                print u'wait 20 second ensure runing threading done'
                time.sleep(20)
                break

            else:
                info = u'unexcept error,here is traceback info:%s' % (
                    traceback.format_exc())
                print info
                logging.error(info)
                # song_info_queue.put(d)
                queue.put(d)
                print u'catch unexcept error ,so put d=%s back into queue' % d
                break
Esempio n. 13
0
 def __init__(self,ids=2190625773,mysql=Cloud_Music_MySQL()):
     super(Multi_Playlist_Info,self).__init__(ids=ids)
     self.mysql=mysql##persistentdb 线程池
Esempio n. 14
0
class Multi_Comment(Cloud_Music):
    '''利用进程池和线程池'''
    def __init__(self,ids=518725853,comment='song',):

        super(Multi_Comment,self).__init__(ids=ids,category='comment',comment=comment)
     
        self.comments=[]
        self.mysql=Cloud_Music_MySQL()
        self.comment=comment

    def parse_comments(self,response):
        comment_json=''
        content=response.json()
        comments=content['comments']
        for comment in comments:
            d=dict(
                    content=comment['content'].strip(),
                    nickname=comment['user']['nickname'],
                    userid=comment['user']['userId'],
                    likedcount=comment['likedCount'],
                    time=comment['time'],
                     )

            j=json.dumps(d,ensure_ascii=False)
  

            comment_json+=j+'\n'
        
        return comment_json
  
    def get_first_comment(self,):
        '''通过绑定属性,将会获得三个返回值,self.page,self.total'''

        self.page=self.get_page()
        response=self.first_comment
        logging.info(u'ids 为%s的歌曲,评论总页码为:%s'%(self.ids,self.page))
        comment=self.parse_comments(response)
        self.comments.append(comment)
        

    def get_other_comment(self,page=2):

        # logging.info(u'正在抓取ids=%s第%s页评论'%(self.ids,page))
        response=self.get_response(page=page)
        comment=self.parse_comments(response)
        logging.info(u'正在抓取ids=%s第%s页评论'%(self.ids,page))
        self.comments.append(comment)

    def get_all_comment(self,max_page=5):
        self.min_page=min(max_page,self.page)##最多抓取页数,避免抓取过多
        for page in range(2,self.min_page+1):
            self.get_other_comment(page=page)
            logging.info(u'获取ids=%s的%s的第%s页评论'%(self.ids,self.comment,page))


    def unique_comment(self,):
        info=u'得到%s页评论中的%s页评论,准备合并评论'%(self.min_page,len(self.comments))
        print info
        logging.info(info)
        return '\n'.join(self.comments)



    def update_song_comment(self,comments):
        # mysql=Cloud_Music_MySQL()   ##可以做成线程池
        self.mysql.update_table_song(comments=comments,comment_count=self.total,ids=self.ids)
        self.mysql.close_connect()

    def update_playlist_comment(self,comments):

        # mysql=Cloud_Music_MySQL()   ##可以做成线程池
        self.mysql.update_table_playlist_comment(comments=comments,comment_count=self.total,ids=self.ids)
        self.mysql.close_connect()