def getUrl(url): startTime = time.time() code, headers, body = http_utils.getUrl(url) print('Info: get %s took %s' % (url, time.time() - startTime)) if code == 0: print(body) return (code, body)
def retrieveUrl(self, url): startTime = time.time() code, headers, data = http_utils.getUrl(url) if code == 0: self.writeOutput(data) self.writeOutput('Info: get %s took %s, size %s cksum %s' % (url, time.time() - startTime, len(data), self.md5sum(data))) return code, data
def getHdsManifestUrls(baseUrl, urlContent, headers): result = [] # parse the xml parsed = parseString(urlContent) # get the bootstraps segmentIndexes = {} for node in parsed.getElementsByTagName('bootstrapInfo'): atts = getAttributesDict(node) if atts.has_key('url'): curUrl = getAbsoluteUrl(atts['url'], baseUrl) result.append(curUrl) # get the bootstrap info code, _, bootstrapInfo = http_utils.getUrl(curUrl, headers) if code != 200 or len(bootstrapInfo) == 0: continue else: bootstrapInfo = base64.b64decode(node.firstChild.nodeValue) bootstrapId = atts['id'] segmentIndexes[bootstrapId] = getHdsSegmentIndexes(bootstrapInfo) # add the media urls for node in parsed.getElementsByTagName('media'): atts = getAttributesDict(node) bootstrapId = atts['bootstrapInfoId'] if not segmentIndexes.has_key(bootstrapId): continue url = atts['url'] for curSeg in segmentIndexes[bootstrapId]: result.append(getAbsoluteUrl('%s/%sSeg1-Frag%s' % (baseUrl, url, curSeg))) return result
def getUrl(self, url): startTime = time.time() code, headers, body = http_utils.getUrl(url) if code == 0: self.writeOutput(body) self.writeOutput('Info: get %s took %s' % (url, time.time() - startTime)) return (code, body)
def getUrl(url): startTime = time.time() code, headers, body = http_utils.getUrl(url) print ('Info: get %s took %s' % (url, time.time() - startTime)) if code == 0: print body return (code, body)
def getURL(self, hostHeader, url): headers = {} headers.update(EXTRA_HEADERS) headers['Host'] = hostHeader code, headers, body = http_utils.getUrl(url, headers) if code == 0: self.writeOutput(body) return code, headers, body
def retrieveUrl(self, url): startTime = time.time() code, headers, data = http_utils.getUrl(url) if code == 0: self.writeOutput(data) self.writeOutput( 'Info: get %s took %s, size %s cksum %s' % (url, time.time() - startTime, len(data), self.md5sum(data))) return code, data
def getURL(self, hostHeader, url, range=None): headers = {} headers.update(EXTRA_HEADERS) headers['Host'] = hostHeader headers['Accept-encoding'] = 'gzip' if range != None: headers['Range'] = 'bytes=%s' % range code, headers, body = http_utils.getUrl(url, headers) if code == 0: self.writeOutput(body) return code, headers, body
def getURL(self, hostHeader, url, range = None): headers = {} headers.update(EXTRA_HEADERS) headers['Host'] = hostHeader headers['Accept-encoding'] = 'gzip' if range != None: headers['Range'] = 'bytes=%s' % range code, headers, body = http_utils.getUrl(url, headers) if code == 0: self.writeOutput(body) return code, headers, body
def getHdsManifestUrls(baseUrl, urlContent, headers): result = [] # parse the xml parsed = parseString(urlContent) # get the bootstraps segmentIndexes = {} for node in parsed.getElementsByTagName('bootstrapInfo'): atts = getAttributesDict(node) if atts.has_key('url'): curUrl = getAbsoluteUrl(atts['url'], baseUrl) result.append(curUrl) # get the bootstrap info code, _, bootstrapInfo = http_utils.getUrl(curUrl, headers) if code != 200 or len(bootstrapInfo) == 0: continue else: bootstrapInfo = base64.b64decode(node.firstChild.nodeValue) bootstrapId = atts['id'] segmentIndexes[bootstrapId] = getHdsSegmentIndexes(bootstrapInfo) # add the media urls for node in parsed.getElementsByTagName('media'): atts = getAttributesDict(node) bootstrapId = atts['bootstrapInfoId'] if not segmentIndexes.has_key(bootstrapId): continue url = atts['url'] url = url.split('?')[0] fragments = [] for curSeg in segmentIndexes[bootstrapId]: fragments.append( getAbsoluteUrl('%s/%sSeg1-Frag%s' % (baseUrl, url, curSeg))) result += filterChunkList(fragments) return result
def getHlsMasterPlaylistUrls(baseUrl, urlContent, headers): result = [] for curLine in urlContent.split('\n'): curLine = curLine.strip() if len(curLine) == 0: continue # get the current url if curLine[0] == '#': if not 'URI="' in curLine: continue curUrl = curLine.split('URI="')[1].split('"')[0] else: curUrl = curLine curUrl = getAbsoluteUrl(curUrl, baseUrl) result.append(curUrl) # get the segments of the current url code, _, mediaContent = http_utils.getUrl(curUrl, headers) if code != 200 or len(mediaContent) == 0: continue curBaseUrl = curUrl.rsplit('/', 1)[0] + '/' result += getHlsMediaPlaylistUrls(curBaseUrl, mediaContent) return result
def getHlsMasterPlaylistUrls(baseUrl, urlContent, headers): result = [] for curLine in urlContent.split('\n'): curLine = curLine.strip() if len(curLine) == 0: continue # get the current url if curLine[0] == '#': if not 'URI="' in curLine: continue curUrl = curLine.split('URI="')[1].split('"')[0] else: curUrl = curLine curUrl = getAbsoluteUrl(curUrl, baseUrl) result.append(curUrl) # get the segments of the current url code, _, mediaContent = http_utils.getUrl(curUrl, headers) if code != 200 or len(mediaContent) == 0: continue curBaseUrl = curUrl.rsplit('/', 1)[0] result += getHlsMediaPlaylistUrls(curBaseUrl, mediaContent) return result
import manifest_utils import http_utils import sys import os if len(sys.argv) < 3: print 'Usage:\n\tpython %s <manifest url> <output path>' % os.path.basename( __file__) sys.exit(1) _, manifestUrl, outputPath = sys.argv code, headers, body = http_utils.getUrl(manifestUrl, {}) mimeType = headers['content-type'][0] urls = manifest_utils.getManifestUrls(manifestUrl, body, mimeType, {}) for curUrl in [manifestUrl] + urls: fileName = os.path.join(outputPath, os.path.split(curUrl)[1].split('?')[0]) if os.path.exists(fileName): print 'Error: %s already exists' % fileName break http_utils.downloadUrl(curUrl, fileName)
import manifest_utils import http_utils import sys import os if len(sys.argv) < 3: print 'Usage:\n\tpython %s <manifest url> <output path>' % os.path.basename(__file__) sys.exit(1) _, manifestUrl, outputPath = sys.argv code, headers, body = http_utils.getUrl(manifestUrl, {}) mimeType = headers['content-type'][0] urls = manifest_utils.getManifestUrls(manifestUrl, body, mimeType, {}) for curUrl in [manifestUrl] + urls: fileName = os.path.join(outputPath, os.path.split(curUrl)[1]) if os.path.exists(fileName): print 'Error: %s already exists' % fileName break http_utils.downloadUrl(curUrl, fileName)