コード例 #1
0
    def getOnePageSongList(self, page):
        s = requests.session()
        play_url = self.__play_url + str(page * 35)
        try:
            s = BeautifulSoup(
                s.get(play_url, headers=self.__headers).content, "lxml")
            lst = s.find('ul', {'class': 'm-cvrlst f-cb'})

            values = []
            # 一次插入多条记录
            sql = "insert into rap_playlist163 (`title`, `link`, `play_num`)" \
                  "values(%s,%s,%s)"

            for play in lst.find_all('div', {'class': 'u-cover u-cover-1'}):
                title = MySQLdb.escape_string(
                    play.find('a', {'class': 'msk'})['title'].encode('utf-8'))
                link = MySQLdb.escape_string(
                    play.find('a', {'class': 'msk'})['href'].encode('utf-8'))
                playNum = MySQLdb.escape_string(
                    play.find('span', {
                        'class': 'nb'
                    }).text.encode('utf-8'))

                values.append((title, link, playNum))

            self.__db.batchInsertSQL(sql, values)
            print 'page:%s页 歌单入库' % page

        except Exception as err:
            # 打印异常堆栈
            exstr = traceback.format_exc()
            print exstr
            c.Log('{} : {} {}'.format("ERROR 104 ", "URL", play_url))
コード例 #2
0
ファイル: db.py プロジェクト: leoyoung1991/rap
 def insertSQL(self, sql):
     try:
         self.__cursor.execute(sql)
         self.__db.commit()
     except Exception as err:
         # 打印异常堆栈
         self.__db.rollback()
         exstr = traceback.format_exc()
         print exstr
         c.Log("ERROR 909 : SQL " + sql)
コード例 #3
0
ファイル: db.py プロジェクト: leoyoung1991/rap
    def batchInsertSQL(self, sql, values):
        cursor = self.__db.cursor()
        cursor.execute("SET NAMES utf8mb4")

        try:
            cursor.executemany(sql, values)
            self.__db.commit()
        except Exception as err:
            # 打印异常堆栈
            self.__db.rollback()
            exstr = traceback.format_exc()
            print exstr
            c.Log("ERROR 909 : SQL " + sql)
        finally:
            cursor.close()
コード例 #4
0
ファイル: wordService.py プロジェクト: leoyoung1991/rap
    def getOneSong(self, lyric, id):

        try:
            # 先改变状态到生成歌曲中, 锁住
            self.dbManager.execute(
                "update rap_music163 set status = 2 where status = 1 and id = '"
                + str(id) + "'")

            # 结巴分词
            print len(lyric)

            # 打开并行
            # jieba.enable_parallel(4)
            # 关闭并行
            jieba.disable_parallel()

            words = [x for x in jieba.cut(lyric) if len(x) >= 2]
            jieba.disable_parallel()
            from collections import Counter
            count = Counter(words).most_common(20)
            print count

            for vo in count:
                word = vo[0]
                number = vo[1]
                # 自增有序集合内value对应的分数
                self.r.zincrby(self.sortedSetKey, word,
                               number)  # 自增zset_name对应的有序集合里a1对应的分数

            print self.r.zcard(self.sortedSetKey)

            # # 获取关键词
            # tags = jieba.analyse.extract_tags(lyric, topK=3)
            # print u"关键词:"
            # print " ".join(tags)

            # 循环每个词,数据库里确认是插入还是更新   redis更好

            self.dbManager.execute(
                "update rap_music163 set status = 3 where status = 2 and id = '"
                + str(id) + "'")

        except Exception as err:
            # 打印异常堆栈
            exstr = traceback.format_exc()
            print exstr
            c.Log('{} : {}'.format("Error 901", err))
コード例 #5
0
ファイル: music.py プロジェクト: leoyoung1991/rap
    def getOneSongList(self, songListlink):
        global content, lyric
        s = requests.session()
        url = self.__url + str(songListlink)

        # request = urllib2.Request(url=url, headers=self.__headers)
        # response = urllib2.urlopen(request)
        # page = response.read().decode('utf-8')
        b = random.sample(self.allValidIp, 1)
        proxy = b[0]

        try:

            try:
                content = s.get(url, headers=self.__headers,
                                proxies=proxy).content
            except:
                # 将这个代理去掉
                # self.allValidIp.remove(proxy)
                # 改变状态到歌曲获取失败
                self.dbManager.execute(
                    "update rap_playlist163 set status = 9 where link = '" +
                    str(songListlink) + "'")

                return

                # content = s.get(url).content
            # s = BeautifulSoup(page, "lxml")

            page = BeautifulSoup(content, "lxml")
            musics = page.find('ul', {'class': 'f-hide'})

            if musics is None:
                # 将这个代理去掉
                # self.allValidIp.remove(proxy)
                # 改变状态到歌曲获取失败
                self.dbManager.execute(
                    "update rap_playlist163 set status = 9 where link = '" +
                    str(songListlink) + "'")
                print '%s return 503' % songListlink

                return

            # 先改变状态到生成歌曲中, 锁住
            self.dbManager.execute(
                "update rap_playlist163 set status = 1 where status = 0 and link = '"
                + str(songListlink) + "'")

            # 一次插入多条记录
            sql = "insert into rap_music163 (`song_id`, `name`, `link`, `lyric`, `status`)" \
                  "values(%s,%s,%s,%s,%s)"
            status = 1
            values = []
            for music in musics:
                songLink = MySQLdb.escape_string(
                    music.find('a')['href'].encode('utf-8'))
                name = MySQLdb.escape_string(music.text.encode('utf-8'))
                o = re.match(r'.*id=(.*)', songLink, re.M | re.I)
                id = int(o.group(1))

                # 根据songLink获取歌词
                lrc_url = self.lyricAPI + str(id)

                try:
                    lyric = s.get(lrc_url,
                                  headers=self.__headers,
                                  proxies=proxy)
                except:
                    # 将这个代理去掉
                    # self.allValidIp.remove(proxy)
                    # 改变状态到歌曲获取失败
                    # self.dbManager.execute(
                    #     "update rap_playlist163 set status = 9 where link = '" + str(songListlink) + "'")

                    return

                time.sleep(0.5)  # 休眠0.1秒

                json_obj = lyric.text
                j = json.loads(json_obj)
                code = j['code']
                if code == 200 and j.has_key('lrc') and j['lrc'].has_key(
                        'lyric'):
                    lrc = j['lrc']['lyric']
                    pat = re.compile(r'\[.*\]')
                    lrc = re.sub(pat, "", lrc)
                    lrc = lrc.strip()
                    # print(lrc)

                    if not self.isDuplicate(songLink):
                        values.append((id, name, songLink, lrc, status))
                        print name

            # 由于有歌词 所以存在sql过长问题
            self.dbManager.batchInsertSQL(sql, values)
            print values

            # 再改变状态到生成歌曲完成
            self.dbManager.execute(
                "update rap_playlist163 set status = 2 where status =1 and link = '"
                + str(songListlink) + "'")

        except Exception as err:
            # 打印异常堆栈
            exstr = traceback.format_exc()
            print exstr
            c.Log('{} : {}'.format("Error 901", err))