コード例 #1
0
ファイル: api.py プロジェクト: pengjinfu/webvideo-downloader
def getPartUrl(partUrl, partCid, basePlayInfoUrl, sessCookie):
    def sortBandWidth(item):
        return item['id'] * (10**10) + item['bandwidth']

    headers = getHeaders(partUrl)
    headers['Cookie'] = "CURRENT_FNVAL=16"
    content = tools.getText(partUrl, headers)

    match = re.search(r'<script>window\.__playinfo__=(.+?)</script>', content)

    if match: 
        data = match.group(1)
        data = json.loads(data)['data']
    else: 
        playInfoUrl = basePlayInfoUrl + '&cid=' + str(partCid)
        headers = { 'Cookie': sessCookie }
        data = json.loads(tools.getText(playInfoUrl, headers))
        data = data.get('data', None) or data.get('result', None)

    if 'dash' in data:
        # 音视频分离
        data = data['dash']
        data['audio'].sort(key=sortBandWidth, reverse=True)
        data['video'].sort(key=sortBandWidth, reverse=True)
        combineVideoUrl = data['audio'][0]['baseUrl'] + '|' + data['video'][0]['baseUrl']
    elif 'durl' in data:
        # 视频分段
        data = data['durl']
        urls = list(map(lambda each: each['url'], data))
        combineVideoUrl = '|'.join(urls)

    return combineVideoUrl
コード例 #2
0
ファイル: api.py プロジェクト: pengjinfu/webvideo-downloader
def parseIqiyiUrl(url, headers = {}):
    data = json.loads(tools.getText(url, headers))
    program = data['data']['program']
    if type(program) == list:
        print('服务器返回错误,可能原因:愛奇藝台灣站需要使用代理下载(http_proxy/https_proxy)')
        exit()

    subtitles = []
    filterVideos = list(filter(lambda each: each.get('m3u8'), program['video']))

    if len(filterVideos):
        content = filterVideos[0]['m3u8']

        if content.startswith('#EXTM3U'):
            videoType = 'hls'
            audioUrls, videoUrls = [], tools.filterHlsUrls(content)
        else:
            videoType = 'dash'
            audioUrls, videoUrls = parseIqiyiMpd(content, headers)
    else:
        filterVideos = list(filter(lambda each: each.get('fs'), program['video']))
        fsList = filterVideos[0]['fs']
        basePath = data['data']['dd']
        infoUrls = list(map(lambda each: basePath + each['l'], fsList))
        videoType = 'partial'
        audioUrls, videoUrls = [], parseIqiyiInfoUrls(infoUrls, headers)

    if 'stl' in program:
        defaultSrts = list(filter(lambda x: x.get('_selected'), program['stl']))
        srts = defaultSrts + list(filter(lambda x: not x.get('_selected'), program['stl']))
        basePath = data['data']['dstl']
        subtitles = [ (srt.get('_name', 'default'), basePath + srt['srt']) for srt in srts ]
    return videoType, audioUrls, videoUrls, subtitles
コード例 #3
0
def parseIqiyiUrl(url, headers={}):
    data = json.loads(tools.getText(url, headers))
    program = data['data']['program']
    if type(program) == list:
        print('服务器返回错误,可能原因:愛奇藝台灣站需要使用代理下载(http_proxy/https_proxy)')
        exit()

    videos = program['video']
    filterVideos = list(filter(lambda each: each.get('m3u8'), videos))

    if len(filterVideos):
        content = filterVideos[0]['m3u8']

        if content.startswith('#EXTM3U'):
            videoType = 'hls'
            audioUrls, videoUrls = [], tools.filterHlsUrls(content)
        else:
            videoType = 'dash'
            audioUrls, videoUrls = parseIqiyiMpd(content, headers)
    else:
        filterVideos = list(filter(lambda each: each.get('fs'), videos))
        fsList = filterVideos[0]['fs']
        basePath = data['data']['dd']
        infoUrls = list(map(lambda each: basePath + each['l'], fsList))
        videoType = 'partial'
        audioUrls, videoUrls = [], parseIqiyiInfoUrls(infoUrls, headers)
    return videoType, audioUrls, videoUrls
コード例 #4
0
ファイル: api.py プロジェクト: pengjinfu/webvideo-downloader
def parseIqiyiInfoUrls(urls, headers = {}):
    print('共%d段视频,正在获取各段视频的真实链接' % len(urls))

    videoUrls = []
    for url in urls:
        data = json.loads(tools.getText(url, headers, timeout=10))
        videoUrls.append(data['l'])
    return videoUrls
コード例 #5
0
def parseIqiyiUrl(url, headers={}):
    data = json.loads(tools.getText(url, headers))
    videos = data['data']['program']['video']
    videos = list(filter(lambda each: each.get('m3u8'), videos))
    content = videos[0]['m3u8']

    if content.startswith('#EXTM3U'):
        videoType = 'hls'
        audioUrls, videoUrls = [], tools.filterHlsUrls(content)
    else:
        videoType = 'dash'
        audioUrls, videoUrls = parseIqiyiMpd(content, headers)
    return videoType, audioUrls, videoUrls
コード例 #6
0
ファイル: getStats.py プロジェクト: aboSamoor/lydia
def parsePostNER(fin):
    statistics = {}
    text = tools.getText(fin)
    for line in text.splitlines():
        if line != '':
            cols = [word for word in line.split('\t')]
            if len(cols) != 5:
                print line
                print "not well formatted line"
                continue
            else:
                if cols[1] == 'NNP':
                    if not statistics.has_key(cols[0]):
                        statistics[cols[0]] = {}
                    if not statistics[cols[0]].has_key(cols[4]):
                            statistics[cols[0]][cols[4]]=0
                    statistics[cols[0]][cols[4]] += 1
    return statistics
コード例 #7
0
ファイル: api.py プロジェクト: pengjinfu/webvideo-downloader
def parseIqiyiMpd(content, headers = {}):
    mediaUrls = {
        'audio': [],
        'video': [],
    }
    root = XMLUtils.parse(content)
    items = XMLUtils.findall(root, 'Period/AdaptationSet/Representation')

    for item in items:
        mType = item.attrib['mimeType'].split('/')[0]
        segName = XMLUtils.findtext(item, 'BaseURL')
        clipItems = XMLUtils.findall(root, "clip_list/clip[BaseURL='%s']" % segName)

        for clip in clipItems:
            infoUrl = XMLUtils.findtext(clip, 'remote_path').replace('&amp;', '&')
            mediaInfo = json.loads(tools.getText(infoUrl, headers))
            mediaUrls[mType].append(mediaInfo['l'])

    return mediaUrls['audio'], mediaUrls['video']
コード例 #8
0
ファイル: compile.py プロジェクト: joeyoun9/UUAdip
def compile (object, stream=False):
	"""
		compile all the native data formats from this project into native format
		data files in the specified cache directory. Object is the project DataFetch object.

		uses the compile method if present within each identified stream.
		OR it can run only on a specified stream.
	"""
	print "Compiling your project to the cache directory"
	if object.proj.c_version < 3:
		print "You need to be using version 3+ of the XML config to use the cache tag."
		return False
	count = 0
	cache = object.proj.cache
	streams = object.proj.findsource('',find='all') 
	# now we have an XML object of all the streams... whoopie.
	# now loop through them, import, check for compile options, and run if necessary
	for s in streams:
                        name = getText(s.getElementsByTagName('name')[0].childNodes)
			if stream and not stream == name:
				# then continue
				continue
			# now import the module, and check for a compile method
			pkg = object.proj.findsource(name, find='package')
			#package = getText(s.getElementsByTagName('type')[0].childNodes)
                        #exec "import uudewey.readers."+package+" as c"
			__import__(pkg) # ooh! all newfangled!
			c = sys.modules[pkg]
			object.nowPkg = name # needed for the find_files method to work!
	
			if 'compile' in dir(c):
				# then run compile! it's that simple!
				print "Compiling:",name
				count += 1
				object.nowPkg = name
				pkg_id = name.lower().replace(' ','')
				c.compile(object,pkg_id)
			else:
				print name,"is not a compilable datatype currently. (",pkg,")"
	# well, if we got to this point, then the project has no streams capable of compiling
	print "Project Compiled: streams successfully compiled: ",count
コード例 #9
0
ファイル: api.py プロジェクト: pengjinfu/webvideo-downloader
def getAllPartInfo(url):
    content = tools.getText(url, getHeaders(url))

    # 获取分p名称和cid
    match = re.search(r'<script>window\.__INITIAL_STATE__=(.+?});.+?</script>', content)
    data = json.loads(match.group(1))
    isOpera = 'epList' in data
    pages = data['epList'] if isOpera else data['videoData']['pages']

    allPartInfo = []
    for page in pages:
        if isOpera:
            name, partUrl = page['longTitle'], re.sub(r'\d+$', str(page['id']), url)
        else:
            name, partUrl = page['part'], url + '?p=' + str(page['page'])
        allPartInfo.append({
            'cid': page['cid'],
            'name': name,
            'url': partUrl,
        })

    return allPartInfo
コード例 #10
0
ファイル: api.py プロジェクト: pengjinfu/webvideo-downloader
def parseHls(url, headers = {}):
    content = tools.getText(url, headers)
    return tools.filterHlsUrls(content, url)