Exemple #1
0
 def getVideoList(self, seterId, setLink):
     '''
     获取影片集下的所有影片列表
     '''
     Util.info("Task:{} getVideoList seterId {}".format(self._taskName, seterId))
     self._taskObj.getVideoList(seterId, setLink)
     Util.info("Task:{} getVideoList end".format(self._taskName))
Exemple #2
0
 def exists(self, name, setId):
     exists = self._db.find_one({
         "name":
         Util.conv2(name, self.videoListFields['name']),
         'setId':
         Util.conv2(setId, self.videoListFields['setId'])
     })
     return True if exists else False
Exemple #3
0
 def exists(self, title, platform):
     exists = self._db.find_one({
         "title":
         Util.conv2(title, self.videoSetFields['title']),
         'platform':
         Util.conv2(platform, self.videoSetFields['platform'])
     })
     return True if exists else False
Exemple #4
0
 def __init__(self, args):
     '''
     -O FILE, --output-filename FILE
                     Set output filename
     -o DIR, --output-dir DIR
                     Set output directory
     '''
     Util.info('Do {} downloader'.format(self._downloader))
     self._args = args
Exemple #5
0
    def getSetContent(self, link):
        '''
        获取本影片集信息, 保存影片集信息
        '''
        Util.info("Task:{} getSetContent link {}".format(self._taskName, link))
        seterId = self._taskObj.getSetContent(link)

        Util.info("Task:{} getSetContent end".format(self._taskName))
        if False != seterId:
            self.getVideoList(seterId, link)
Exemple #6
0
    def getSetContent(self, link):
        '''
        获取本影片集信息
        '''
        movieInfo = {}
        r = Util.getPage(link, proxy=self._isProxy)
        html = BeautifulSoup(r.text, self.getConfig('FETCH_LIBRARY'))
        # print(html)
        movieInfo['link'] = link # 原始链接
        # 是否是 vip 片
        movieInfo['is_vip'] = 1 if html.find_all('img', {'class': 'icon-viedo-mr'}) else 0 
        movieInfo['title'] = html.find('a', {'class': 'info-intro-title'}).text
        # summary 如果有完整的取完整的
        summaryList = html.find_all('span', {'class': "briefIntroTxt"})
        movieInfo['summary'] = summaryList[-1].text
        movieInfo['img'] = html.find('div', {'class': 'info-img'}).img['src'] # 小图
        movieInfo['img_large'] = movieInfo['img'].replace('195_260', '480_360') # 大图
        try:
            movieInfo['area'] = html.find("p", {'class': "episodeIntro-area"}).a.text
        except AttributeError:
            pass
        try:
            movieInfo['lang'] = html.find('p', {'class': 'episodeIntro-lang'}).a.text
        except AttributeError:
            pass
        try:
            movieInfo['category'] = [x.text for x in html.find('p', {'class': "episodeIntro-type"}).find_all('a')]
        except AttributeError:
            pass

        try:
            movieInfo['hot'] = html.find("span", {"class": "heat-info"}).text.replace('热度', '').strip()
        except AttributeError:
            pass
        
        # 获取影片集评分
        tvid = html.find("span", {'class': "effect-score"})['data-score-tvid']
        movieInfo['score'] = self.getSetScore(tvid, tvid)

        # 统一单次导入
        movieInfo['episode_over'] = 2

        # 影片集本平台不重复添加
        setExists = self.getModel('VideoSet').exists(movieInfo['title'], self.platform)

        if True == setExists:
            Util.info('Set exists {}'.format(movieInfo['title']))
            return False

        # 保存set
        seterId = self.getModel('VideoSet').newSet(movieInfo, self.platform)

        return seterId
Exemple #7
0
 def parseArgs(self):
     '''
     使用参数执行
     -d Download
     -t Test 不写入信息, 仅测试抓取并立即返回
     --download=NAME Download
     --task=TASKNAME Task
     --background=BGNAME Background
     OPTIONAL
     --process=PROCESSNAME 指定调用方法
     --params=PARAMS 指定方法的参数
     '''
     try:
         opts, args = getopt.getopt(sys.argv[1:],"hdt:",["download=", 'task=', 'background=', 'process='])
         Util.info((opts, args))
         for opt, arg in opts:
             if opt == '-h':
                 print('Run.py')
                 print('Add a tasker')
                 print('     -d <download> --download=<download name>')
                 print('     --task=<task name>')
                 print('     --background=<background name>')
                 print('Optional add a process')
                 print('     --process=<process name>')
                 print('Optional add some process params')
                 print('     --params=<params>')
                 sys.exit()
             elif opt in ("-d", "--download"): 
                 self._taskType = 'download'
                 self._taskName = arg
             elif opt == "--task":
                 self._taskType = 'task'
                 self._taskName = arg
             elif opt == "--background":
                 self._taskType = 'background'
                 self._taskName = arg
             if opt == "--process":
                 self._process = arg
             if opt == "--params":
                 self._args = arg
             # 测试
             if opt == '-t':
                 self._test = True
     except getopt.GetoptError:
         print('Key -h see keymap.')
         sys.exit()
     finally:
         pass
Exemple #8
0
 def getUnDlVideo(self, setId, uid):
     return self._db.find_one({
         "setId":
         Util.conv2(setId, self.videoListFields['setId']),
         "plays." + str(uid): {
             '$exists': False
         }
     })
Exemple #9
0
    def getCategoryList(self, fLink):
        '''
        获取分类页数据 
        '''
        Util.info('Task:Iqiyi getCategoryList')
        Util.info("Do on Cate: {}".format(fLink))
        r = Util.getPage(fLink, proxy=self._isProxy)
        # 获取分类页内容
        html = BeautifulSoup(r.text, self.getConfig('FETCH_LIBRARY'))
        # a = html.find('div', {'class': 'mod-page'})
        mainContent = html.find('ul', {'class': 'site-piclist site-piclist-180236 site-piclist-auto'})
        allLi = mainContent.find_all('li')
        cateData = []
        for li in allLi:
            try:
                # 其他信息去除,都从获取影片集信息中获得
                c_link = li.a['href']
                cateData.append(c_link.strip())
                del c_link
            except KeyError:
                Util.info("未找到影片集链接跳过")
                continue

        del r, html, mainContent, allLi, fLink

        # 每取一页处理一页
        return cateData
Exemple #10
0
    def createRepo(self, lastRepoId):

        # repo name
        self._apiCreateRepo['params']['name'] = '{}'.format(lastRepoId)
        info = Util.postPage(
            self._apiCreateRepo['url'].format(self._config.WAREHOUSE['token']),
            json.dumps(self._apiCreateRepo['params']))
        # 已经存在
        if 422 == info.status_code:
            Util.info('仓库已存在')
            return True
        elif 201 == info.status_code:
            self.createPage(lastRepoId)
            self.setRepoCname(lastRepoId)
            return True
        else:
            Util.error('本地仓库创建失败')
            return False
Exemple #11
0
    def setRepoCname(self, repoName):
        # self._apiUpdateCname['params']['cname'] = "{}".format(self._config.WAREHOUSE['host'])
        cnameRes = Util.putPage(
            self._apiUpdateCname['url'].format(
                self._config.WAREHOUSE['repoName'], repoName,
                self._config.WAREHOUSE['token']),
            json.dumps(self._apiUpdateCname['params']),
            headers={
                'Accept':
                'application/vnd.github.mister-fantastic-preview+json'
            })

        if 400 == cnameRes.status_code:
            Util.info('仓库创建成功')
            return True
        else:
            Util.info('仓库创建失败 {}'.format(cnameRes))
            return False
Exemple #12
0
 def getDledVideoListCount(self, setId, uid):
     listCount = self._db.find({
         "setId":
         Util.conv2(setId, self.videoListFields['setId']),
         "plays." + str(uid): {
             '$exists': True
         }
     }).count()
     return listCount
Exemple #13
0
    def __init__(self):
        super().__init__()
        Util.info('Run init')
        self.parseArgs()

        # 测试用
        # self._taskType = 'Download'
        # self._taskName = ''
        # self._process = 'dlFile'
        # self._args = {'videoId': '5c19fddde203c64bdc19299b'}
        self._taskType = 'Background'
        self._taskName = 'ToWarehouse'
        self._process = 'process'
        self._args = {'file': '0125/VEYCaSRIBFn.mp4', 'id': '5c19fddde203c64bdc19299b'}
        # 测试用 end

        # 根据传入参数拉起一个任务
        taskFactory = self.getFactory(self._taskType)(self._taskName, isTest = self._test)
        # 拉起执行方法
        getattr(taskFactory, self._process)(self._args)
Exemple #14
0
 def __init__(self, args):
     '''
     --get-duration                   Simulate, quiet but print video length
     --get-filename                   Simulate, quiet but print output filename
     --get-format                     Simulate, quiet but print output format
     --proxy URL                      Use the specified HTTP/HTTPS/SOCKS proxy.
                                      To enable SOCKS proxy, specify a proper
                                      scheme. For example
                                      socks5://127.0.0.1:1080/. Pass in an empty
                                      string (--proxy "") for direct connection
     -f, --format FORMAT              Video format code, see the "FORMAT
                                     SELECTION" for all the info
     --all-formats                    Download all available video formats
     --prefer-free-formats            Prefer free video formats unless a specific
                                     one is requested
     -o, --output TEMPLATE            Output filename template, see the "OUTPUT
                                     TEMPLATE" for all the info
     '''
     Util.info('Do {} downloader'.format(self._downloader))
     self._args = args
Exemple #15
0
 def modifyEpisode(self, data, _id):
     if not data['episode']:
         return False
     if not isinstance(_id, ObjectId):
         _id = ObjectId(_id)
     modify = self._db.update_one({"_id": _id}, {
         "$set": {
             "episode":
             Util.conv2(data['episode'], self.videoSetFields['episode'])
         }
     })
     return True if modify else False
Exemple #16
0
    def getCategoryList(self, args):
        '''
        一次性获取所有分类内容
        '''
        Util.info("Task:{} getCategoryList args {}".format(self._taskName, args))
        if not self._taskObj.cateLinks or len(self._taskObj.cateLinks) == 0:
            Util.error('Task:{} cateLinks can not empty on getCategoryList'.format(self._taskName))

        startPage = args[0] if args[0] else 1
        endPage = args[1] if args[1] else 10
        # 取所有链接
        for link in self._taskObj.cateLinks:
            # 每一页
            for page in range(startPage, startPage + endPage):
                print("Do page {}".format(page))
                # 带 page 的 链接
                fLink = link.format(str(page))
                # 影片集内容
                setList = self._taskObj.getCategoryList(fLink)
                # 没有待处理数据
                if len(setList) <= 0:
                    Util.info('Category {} with page {} empty'.format(link, page))
                    continue

                # 测试用
                # print(setList)
                # exit()
                # 测试用 end

                # 处理每个影片集
                for setInfo in setList:
                    self.getSetContent(setInfo)
Exemple #17
0
 def getSetScore(self, tvid, qipu_ids):
     '''
     获取影片集评分
     '''
     try:
         res = Util.getPage(self._scoreUrl.format(qipu_ids, tvid), proxy=self._isProxy)
         resText = res.text
         startTxt = '"sns_score":'
         resStart = resText.find(startTxt)
         resText = resText[resStart + len(startTxt):resText.find("}]});")]
         return resText
     except KeyError:
         return 0
Exemple #18
0
    def commitFiles(self, file, repoName, fileSize):
        '''
            file 待提交文件
            repoName 待提交创建名称
            fileSize 待提交文件大小 MB
        '''

        os.chdir(os.path.join(self._config.TASK['repoDir'], repoName))
        os.system('git pull origin gh-pages')
        fileList = []
        # 超过单个文件大小限制的
        if fileSize > self._maxSize:
            fileDuration = subprocess.check_output(
                self._getDuration.format(
                    os.path.join(self._config.TASK['fileDir'], file)))
            fileDuration = fileDuration.decode('UTF-8')
            fileDuration = fileDuration[
                fileDuration.find('duration=', 0) + 9:fileDuration.
                find('size=', fileDuration.find('duration=', 0))]
            segmentTime = float(fileDuration) / float(fileSize) * self._maxSize
            Util.info('Files duration')
            Util.info(segmentTime, float(fileDuration), float(fileSize),
                      self._maxSize)
            # 创建新的批量文件名 newFileName_000.mp4
            newFileName = Util.genRandName(11)
            os.system(
                self._doSegment.format(
                    os.path.join(self._config.TASK['fileDir'], file),
                    segmentTime, newFileName))
            # 最大文件数
            fileNum = math.ceil(float(fileDuration) / segmentTime)
            # 确认文件列表
            print('filenummmmm', fileNum)
            for num in range(fileNum):
                if True == os.path.isfile("%s_%03d.mp4" % (newFileName, num)):
                    fileList.append("https://%s/%s/%s_%03d.mp4" %
                                    (self._config.WAREHOUSE['host'], repoName,
                                     newFileName, num))
        else:
            shutil.copyfile(
                os.path.join(self._config.TASK['fileDir'], file),
                os.path.join(self._config.TASK['repoDir'], repoName,
                             os.path.basename(file)))
            fileList.append("https://{}/{}/{}".format(
                self._config.WAREHOUSE['host'], repoName,
                os.path.basename(file)))
        os.system(
            'git add *.mp4 && git add *.jpg && git add *.png && git add *.jpeg'
        )
        os.system('git commit -m {}'.format(os.path.basename(file)))
        os.system('git push origin gh-pages')
        Util.info("文件添加至仓库成功")
        return fileList
Exemple #19
0
    def dlFile(self, link, rdlPath, rfileName, dlfileName):
        Util.info("Dl without proxy")
        try:
            subprocess.check_call([
                self._args['params']['youGet'], link, '-o', rdlPath, '-O',
                dlfileName
            ])
        except subprocess.CalledProcessError as err:
            Util.error(err)  # 记录 Error 至 db
            Util.info('Youget:影片未成功下载')
            return False

        return dlfileName
Exemple #20
0
    def newList(self, data):
        if not isinstance(data, list):
            Util.error('Data must be a list')
            return False

        requireFields = ['setId', 'name', 'summary', 'link', 'img']
        # assert common.checkRequire(data, requireFields)
        requireCheckRe = Util.checkRequire(data, requireFields)
        if True != requireCheckRe:
            Util.error('{} Require field {} not found'.format(
                'saveVideoList', requireCheckRe))
            return False
        data = Util.removeUnsafeFields(data, self.videoListFields.keys(),
                                       self.videoListFields)
        return self._db.insert_many(data)
Exemple #21
0
    def dlFile(self, link, rdlPath, dlfileName):
        Util.info("Dl without proxy")

        # 正常平台下载
        try:
            filename = subprocess.check_output([
                self._args['params']['youtubeDl'], link, '--get-filename',
                '-o', '{}'.format(dlfileName)
            ])
            subprocess.check_call([
                self._args['params']['youtubeDl'], link, '-o',
                '{}/{}'.format(rdlPath,
                               dlfileName), '-f', 'bestvideo+bestaudio/best'
            ])
        except subprocess.CalledProcessError as err:
            Util.error(err)  # 记录 Error 至 db
            Util.info('Youtubedl:影片未成功下载')
            return False

        return filename.decode('UTF-8').strip()
Exemple #22
0
    def newSet(self, data, platform):
        if not isinstance(data, dict):
            Util.error('Data must be a dict')
            return False

        requireFields = [
            'title', 'link', 'summary', 'link', 'img', 'episode_over',
            'is_vip', 'area', 'lang'
        ]
        # assert common.checkRequire(data, requireFields)
        requireCheckRe = Util.checkRequire(data, requireFields)
        if True != requireCheckRe:
            Util.error('{} Require field {} not found'.format(
                'saveVideoSet', requireCheckRe))
            return False
        data = Util.removeUnsafeFields(data, self.videoSetFields.keys(),
                                       self.videoSetFields)
        # 哪个平台的
        data['platform'] = int(platform)
        setId = self._db.insert(data)
        return setId
Exemple #23
0
    def process(self, args):
        if 'file' not in args:
            Util.error('File can not found')
            return False

        if 'id' not in args:
            Util.error("Video id can not found")
            return False

        size = os.path.getsize(
            os.path.join(self._config.TASK['fileDir'], args['file']))
        # warehouse 单个文件不能超过 45MB
        sizeMb = size / 1024 / 1024
        # if sizeMb > 45:
        #     Util.error('{} 文件超过单个文件大小限制 size: {}MB'.format(args['file'], sizeMb))
        #     return False

        settingInfo = self.getModel('Setting').getSetting(
            self._config.WAREHOUSE['uid'])
        if 'lastRepoId' not in settingInfo:
            lastRepoId = "1"
        else:
            lastRepoId = str(settingInfo['lastRepoId'] + 1)

        # 本地文件夹不存在时
        if not os.path.exists(
                os.path.join(self._config.TASK['repoDir'], lastRepoId)):
            if False == self.createRepo(lastRepoId):
                # 创建仓库失败
                return False

        # 开始提交文件
        fileList = self.commitFiles(args['file'], lastRepoId, sizeMb)
        Util.info('更新远程地址 {}'.format(fileList))
        # 更新远程地址至 远程 UID
        self.getModel('VideoList').newPlay(args['id'],
                                           self._config.WAREHOUSE['uid'],
                                           fileList)
        return True
Exemple #24
0
 def setFreeSpace(self, uid, space):
     return self._db.update_one({"uid": str(uid)}, {
         "$set": {
             'freeSpace': Util.conv2(space, self.settingFields['freeSpace'])
         }
     })
Exemple #25
0
    def getVideoList(self, seterId, setLink):
        r = Util.getPage(setLink, proxy=self._isProxy)
        html = BeautifulSoup(r.text, self.getConfig('FETCH_LIBRARY'))
        # 处理 影片集下的所有影片列表
        # album
        albumHtml = r.text
        try:
            albumId = albumHtml[albumHtml.index("albumId:") + 8 : albumHtml.index("tvId:")]
        except ValueError:
            return False
        # albumHtml = html.gethtml.get_text().index("albumId:")html.get_text().index(",tvId:")
        albumId = albumId.replace(',', '').replace("\"", "").strip()
        del r, html
        # 通过 json 获取所有集数

        # 所有视频列表
        videoList = []
        page = 0
        while True:
            page = page + 1
            url = self._moviceListUrl.format(str(albumId), str(page))
            Util.info("Set video list : {}".format(url))
            r = Util.getPage(url, proxy=self._isProxy)
            # 变为 json
            r = json.loads(r.text.replace('var tvInfoJs=', ''))

            if not r or 'data' not in r:
                Util.info('Video is empty {}'.format(url))
                continue
            # 所有内容
            """
            {'mdown': 0, 'vn': '汪汪队立大功全集 第1集', 'vpic': 'http://pic4.qiyipic.com/image/20170923/55/5b/v_111715119_m_601_m1.jpg', 'lgh': [], 'vurl': 'http://www.iqiyi.com/v_19rralnqpo.html', 'purType': 0, 'payMark': 0, 'id': 608736400, 'plcdown': {'17': 0, '15': 0}, 'desc': '精通科技的10岁男孩Ryder在拯救了6条小狗之后,将他们训练成了一组本领高强的狗狗巡逻队。每个小狗都性格鲜明,也各有特长。斑点狗Marshall擅长火中急救;斗牛犬Rubble精通工程机械;牧羊犬Chase是个超级特工;混血儿Rocky是个维修能手;拉布拉多犬Zuma最熟悉水中救援;而可卡颇犬Skye掌握着各种航空技术。拥有这么多解决问题的能力,在加上Ryder提供的炫酷装备支持,不管遇到多么困难和危险的救援任务,他们还总是忘不了相互玩闹,制作轻松的气氛,而每次幽默乐观的狗狗能总能顺利完成任务。', 'pds': '1', 'vt': '海上救援', 'shortTitle': '汪汪队立大功全集 第1集', 'isProduced': 0, 'pd': 1, 'tvQipuId': 608736400, 'type': '1', 'vid': 'e50a9d800b84f5bc42b0b87a82df5dac', 'exclusive': 0, 'videoFocuses': [], 'publishTime': 1485254688000, 'timeLength': 660, 'wmarkPos': 0}
            """
            if 0 == r['data']['pn']:
                break
            # 更新总集数数据
            if page == 1:
                self.getModel('VideoSet').modifyEpisode({'episode': r['data']['allNum'] if r['data']['allNum'] else 1}, seterId)
            cateData = r['data']['vlist']
            for data in cateData:
                # 查询单集是否存在
                movieExists = self.getModel("VideoList").exists(data['vn'], seterId)
                # 单集不存在 并且有数据的情况下写 videolist
                if True == movieExists:
                    Util.info('单集 {} 已存在于 setId {}'.format(data['vn'], seterId))
                    continue

                if 'vurl' in data and 'vn' in data:
                    videoList.append({
                        'setId': seterId,
                        'name': data['vn'],
                        'summary': data['desc'],
                        'link': data['vurl'],
                        'img': data['vpic'],
                        'duration': '%02d:%02d' % (data['timeLength'] // 60,data['timeLength']  % 60),
                        'created_at': int(data['publishTime'])
                    })


        # 所有分集信息
        if len(videoList) > 0:
            self.getModel("VideoList").newList(videoList)
            Util.info('成功保存单集 {} 部'.format(len(videoList)))
        else:
            Util.info('setId: {} 没有找到任何影片,删除影片集'.format(seterId))
            # 如果 没有找到任何分集信息 删除该剧集
            self.getModel("VideoSet").remove(seterId)

        return True
Exemple #26
0
    def dlFile(self, args={}):
        '''
        下载影片
        videoId 指定下载视频 id
        dlMachine 指定下载方法 youget / youtubedl 默认自动,即不可用时切换
        '''
        if 'videoId' in args:
            # 指定视频
            videoInfo = self.getModel('VideoList').getVideo(args['videoId'])
        else:
            # 获取一个未下载的视频 @todo 平台/获取规则 修改
            videoInfo = self.getModel('VideoSet').getUnDlRes(
                self.configList['uid'], 1)

        if not videoInfo:
            Util.info('该设备 {} 没有需要下载的资源'.format(self.configList['uid']))
            return False

        Util.info("Download:{} dlFile".format(self._taskName))
        Util.info("正在下载影片 {}, videoId: {} setId: {}".format(
            videoInfo['name'], videoInfo['_id'], videoInfo['setId']))
        # 月日 文件夹
        dlPath = time.strftime("%m%d", time.localtime())
        # 绝对路径
        rdlPath = os.path.join(self.configList['params']['dir'], dlPath)
        if not os.path.exists(rdlPath):
            os.mkdir(rdlPath)
        # 文件名重新命名
        fileName = Util.genRandName(11)  # 10位文件夹的 video 为 17版本, 11位的为 18版本

        # 是否使用代理
        doDl = 'dlFile'
        if int(videoInfo['platform']) in self.configList['proxyIds']:
            doDl = 'dlFileWithProxy'

        Util.info("Download to {}".format(os.path.join(rdlPath, fileName)))
        # 下载过程
        dlFileName = getattr(self._taskObj, doDl)(videoInfo['link'], rdlPath,
                                                  fileName)

        # 下载完成后首先确认文件是否存在
        videoExists = False
        for ext in self._videoExt:
            if os.path.exists(
                    os.path.join(rdlPath, "{}.{}".format(dlFileName, ext))):
                dlFileName = "{}.{}".format(dlFileName, ext)
                videoExists = True
                break

        switchMatchine = True
        # 下载成功
        if False == videoExists:
            # 每次执行允许切换一次
            if False == switchMatchine:
                return False
            switchMatchine = False
            # 换下载方法进行下载
            tmpMatchine = self._dlMatchines
            tmpMatchine.remove(self._taskName.lower())
            return self.getNewMatchine(tmpMatchine[0].capitalize())
        else:
            # 开始转码 转为 web 可用格式
            webVideo = self.getService('Background.Convert').toMp4({
                'dlPath':
                dlPath,
                'inputFile':
                dlFileName
            })
            # 下载完成写入新记录
            self.getModel('VideoList').newPlay(videoInfo['_id'],
                                               self.configList['uid'],
                                               webVideo)
            # 影片集 总下载数  + 1
            self.getModel("VideoSet").setCanPlayNum(videoInfo['setId'],
                                                    self.configList['uid'])

            # 下载至 warehouse
            if self.configList['uid'] == self.configList['warehouse']['uid']:
                pass

            Util.info("Download:{} dlFile end".format(self._taskName))
            self.getFreeDisk()