Example #1
0
def getUrl(url):
    startTime = time.time()
    code, headers, body = http_utils.getUrl(url)
    print('Info: get %s took %s' % (url, time.time() - startTime))
    if code == 0:
        print(body)
    return (code, body)
Example #2
0
	def retrieveUrl(self, url):
		startTime = time.time()
		code, headers, data = http_utils.getUrl(url)
		if code == 0:
			self.writeOutput(data)
		self.writeOutput('Info: get %s took %s, size %s cksum %s' % (url, time.time() - startTime, len(data), self.md5sum(data)))
		return code, data
def getHdsManifestUrls(baseUrl, urlContent, headers):
	result = []
	
	# parse the xml
	parsed = parseString(urlContent)
	
	# get the bootstraps
	segmentIndexes = {}
	for node in parsed.getElementsByTagName('bootstrapInfo'):
		atts = getAttributesDict(node)
		if atts.has_key('url'):
			curUrl = getAbsoluteUrl(atts['url'], baseUrl)
			result.append(curUrl)
			
			# get the bootstrap info
			code, _, bootstrapInfo = http_utils.getUrl(curUrl, headers)
			if code != 200 or len(bootstrapInfo) == 0:
				continue
		else:
			bootstrapInfo = base64.b64decode(node.firstChild.nodeValue)
		bootstrapId = atts['id']
		segmentIndexes[bootstrapId] = getHdsSegmentIndexes(bootstrapInfo)
	
	# add the media urls
	for node in parsed.getElementsByTagName('media'):
		atts = getAttributesDict(node)
		bootstrapId = atts['bootstrapInfoId']
		if not segmentIndexes.has_key(bootstrapId):
			continue
		
		url = atts['url']
		for curSeg in segmentIndexes[bootstrapId]:
			result.append(getAbsoluteUrl('%s/%sSeg1-Frag%s' % (baseUrl, url, curSeg)))

	return result
Example #4
0
	def getUrl(self, url):
		startTime = time.time()
		code, headers, body = http_utils.getUrl(url)
		if code == 0:
			self.writeOutput(body)
		self.writeOutput('Info: get %s took %s' % (url, time.time() - startTime))
		return (code, body)
def getUrl(url):
	startTime = time.time()
	code, headers, body = http_utils.getUrl(url)
	print ('Info: get %s took %s' % (url, time.time() - startTime))
	if code == 0:
		print body
	return (code, body)
	def getURL(self, hostHeader, url):
		headers = {}
		headers.update(EXTRA_HEADERS)
		headers['Host'] = hostHeader
		code, headers, body = http_utils.getUrl(url, headers)
		if code == 0:
			self.writeOutput(body)
		return code, headers, body
Example #7
0
 def getUrl(self, url):
     startTime = time.time()
     code, headers, body = http_utils.getUrl(url)
     if code == 0:
         self.writeOutput(body)
     self.writeOutput('Info: get %s took %s' %
                      (url, time.time() - startTime))
     return (code, body)
	def getURL(self, hostHeader, url):
		headers = {}
		headers.update(EXTRA_HEADERS)
		headers['Host'] = hostHeader
		code, headers, body = http_utils.getUrl(url, headers)
		if code == 0:
			self.writeOutput(body)
		return code, headers, body
Example #9
0
 def retrieveUrl(self, url):
     startTime = time.time()
     code, headers, data = http_utils.getUrl(url)
     if code == 0:
         self.writeOutput(data)
     self.writeOutput(
         'Info: get %s took %s, size %s cksum %s' %
         (url, time.time() - startTime, len(data), self.md5sum(data)))
     return code, data
 def getURL(self, hostHeader, url, range=None):
     headers = {}
     headers.update(EXTRA_HEADERS)
     headers['Host'] = hostHeader
     headers['Accept-encoding'] = 'gzip'
     if range != None:
         headers['Range'] = 'bytes=%s' % range
     code, headers, body = http_utils.getUrl(url, headers)
     if code == 0:
         self.writeOutput(body)
     return code, headers, body
Example #11
0
	def getURL(self, hostHeader, url, range = None):
		headers = {}
		headers.update(EXTRA_HEADERS)
		headers['Host'] = hostHeader
		headers['Accept-encoding'] = 'gzip'
		if range != None:
			headers['Range'] = 'bytes=%s' % range
		code, headers, body = http_utils.getUrl(url, headers)
		if code == 0:
			self.writeOutput(body)
		return code, headers, body
def getHdsManifestUrls(baseUrl, urlContent, headers):
    result = []

    # parse the xml
    parsed = parseString(urlContent)

    # get the bootstraps
    segmentIndexes = {}
    for node in parsed.getElementsByTagName('bootstrapInfo'):
        atts = getAttributesDict(node)
        if atts.has_key('url'):
            curUrl = getAbsoluteUrl(atts['url'], baseUrl)
            result.append(curUrl)

            # get the bootstrap info
            code, _, bootstrapInfo = http_utils.getUrl(curUrl, headers)
            if code != 200 or len(bootstrapInfo) == 0:
                continue
        else:
            bootstrapInfo = base64.b64decode(node.firstChild.nodeValue)
        bootstrapId = atts['id']
        segmentIndexes[bootstrapId] = getHdsSegmentIndexes(bootstrapInfo)

    # add the media urls
    for node in parsed.getElementsByTagName('media'):
        atts = getAttributesDict(node)
        bootstrapId = atts['bootstrapInfoId']
        if not segmentIndexes.has_key(bootstrapId):
            continue

        url = atts['url']
        url = url.split('?')[0]
        fragments = []
        for curSeg in segmentIndexes[bootstrapId]:
            fragments.append(
                getAbsoluteUrl('%s/%sSeg1-Frag%s' % (baseUrl, url, curSeg)))

        result += filterChunkList(fragments)

    return result
def getHlsMasterPlaylistUrls(baseUrl, urlContent, headers):
    result = []
    for curLine in urlContent.split('\n'):
        curLine = curLine.strip()
        if len(curLine) == 0:
            continue
        # get the current url
        if curLine[0] == '#':
            if not 'URI="' in curLine:
                continue
            curUrl = curLine.split('URI="')[1].split('"')[0]
        else:
            curUrl = curLine
        curUrl = getAbsoluteUrl(curUrl, baseUrl)
        result.append(curUrl)

        # get the segments of the current url
        code, _, mediaContent = http_utils.getUrl(curUrl, headers)
        if code != 200 or len(mediaContent) == 0:
            continue
        curBaseUrl = curUrl.rsplit('/', 1)[0] + '/'
        result += getHlsMediaPlaylistUrls(curBaseUrl, mediaContent)
    return result
def getHlsMasterPlaylistUrls(baseUrl, urlContent, headers):
	result = []
	for curLine in urlContent.split('\n'):
		curLine = curLine.strip()
		if len(curLine) == 0:
			continue
		# get the current url
		if curLine[0] == '#':
			if not 'URI="' in curLine:
				continue
			curUrl = curLine.split('URI="')[1].split('"')[0]
		else:
			curUrl = curLine
		curUrl = getAbsoluteUrl(curUrl, baseUrl)
		result.append(curUrl)
		
		# get the segments of the current url
		code, _, mediaContent = http_utils.getUrl(curUrl, headers)
		if code != 200 or len(mediaContent) == 0:
			continue
		curBaseUrl = curUrl.rsplit('/', 1)[0]
		result += getHlsMediaPlaylistUrls(curBaseUrl, mediaContent)
	return result
Example #15
0
import manifest_utils
import http_utils
import sys
import os

if len(sys.argv) < 3:
    print 'Usage:\n\tpython %s <manifest url> <output path>' % os.path.basename(
        __file__)
    sys.exit(1)

_, manifestUrl, outputPath = sys.argv

code, headers, body = http_utils.getUrl(manifestUrl, {})
mimeType = headers['content-type'][0]
urls = manifest_utils.getManifestUrls(manifestUrl, body, mimeType, {})

for curUrl in [manifestUrl] + urls:
    fileName = os.path.join(outputPath, os.path.split(curUrl)[1].split('?')[0])
    if os.path.exists(fileName):
        print 'Error: %s already exists' % fileName
        break
    http_utils.downloadUrl(curUrl, fileName)
import manifest_utils
import http_utils
import sys
import os

if len(sys.argv) < 3:
	print 'Usage:\n\tpython %s <manifest url> <output path>' % os.path.basename(__file__)
	sys.exit(1)

_, manifestUrl, outputPath = sys.argv

code, headers, body = http_utils.getUrl(manifestUrl, {})
mimeType = headers['content-type'][0]
urls = manifest_utils.getManifestUrls(manifestUrl, body, mimeType, {})

for curUrl in [manifestUrl] + urls:
	fileName = os.path.join(outputPath, os.path.split(curUrl)[1])
	if os.path.exists(fileName):
		print 'Error: %s already exists' % fileName
		break
	http_utils.downloadUrl(curUrl, fileName)