def getVideoDownloadAddress(url): content = httputil.fetchContent(url) print(content) soup = BeautifulSoup(content, "html.parser") results = soup.find_all("title") title = results[0].text results = soup.find_all("video", class_="video-js vjs-default-skin") tag = results[0] ones = tag.find_all("source") hight = 0 targetUrl = "" for one in ones: print(one['res']) print(one['src']) oneLevelStr = one['res'] oneLevel = int(oneLevelStr) if oneLevel > hight: hight = oneLevel targetUrl = one['src'] index = targetUrl.rfind(".") type = targetUrl[index:] targetResult = {"title": title, "url": targetUrl, "type": type} return targetResult
def fetchTargetPage(self, url): """ 获取指定页面的视频link :return: """ content = httputil.fetchContent(url) result = httputil.fecthActualPageMessage(content) return result
def fetchTargetPage(self, page=1): url = 'https://www.girl-atlas.com/index1?p=' + str(page) rawHtml = httputil.fetchContent(url) soup = BeautifulSoup(rawHtml, "html.parser") targets = soup.find_all("div", class_="album-item row") results = [] for target in targets: print("-------------") idContent = target.find("h2") idContent = idContent.find("a") title = idContent.text id = idContent['href'] id = id.replace("/album/", "") albumContent = target.find("p", class_="desp") # print(id) # print(title) picNumber = self.reRemove("含", "张", albumContent.text) picNumber = int(picNumber) # print(picNumber) author = self.reRemove("由", "在", albumContent.text) # print(author) date = self.reRemove("在", "创", albumContent.text) watchTimes = self.reRemove("了", "次", albumContent.text) albumURL = "https://www.girl-atlas.com/album/" + id + "?display=2" # print(date) # print(watchTimes) # print(albumURL) print("标题:" + title + " 相册共" + str(picNumber) + "张 " + "地址:" + albumURL) result = { "albumId": id, "title": title, "picNumber": picNumber, "author": author, "date": date, "watchTimes": watchTimes, "albumURL": albumURL } results.append(result) print("-------------") return results
def handleXVideoContent(url): content = httputil.fetchContent(url) # print(content) soup = BeautifulSoup(content, "html.parser") results = soup.find_all("script") # print(len(results)) result = None for one in results: strData = str(one) if 'html5player.setVideoUrl' in strData: print("find it") result = strData return result
def fetchAlbum(self, url): urls = [] rawHtml = httputil.fetchContent(url) soup = BeautifulSoup(rawHtml, "html.parser") contents = soup.find_all("ul", class_="gridview") if len(contents) == 0: return urls targets = contents[0].find_all("li") # print(len(targets)) for target in targets: content = target.find("a")['href'] urls.append(content) return urls
def fetchMaxPageNumber(self, url='https://www.girl-atlas.com/index1'): logging.debug("fetchMaxPageNumber") rawHtml = httputil.fetchContent(url) # print(rawHtml) soup = BeautifulSoup(rawHtml, "html.parser") targets = soup.find_all("ul", class_="pagination") size = len(targets) # print('size 是' + str(size)) if size == 0: return 0 target = targets[0].find_all("li") numbers = [] isShowMaxPage = False for one in target: oneContent = one.find("a") value = oneContent.text if ">" in value: print("----------->") isShowMaxPage = False continue if "(current)" in value: value = value.replace("(current)", "") if "..." in value: continue value = value.strip() numbers.append(int(value)) max = 0 for number in numbers: if number > max: max = number return max
def fetchMaxPageNumber(self, url): """ 获取最大页码 """ global number global hasNextNavi global content content = httputil.fetchContent(url) hasNextNavi = httputil.isPageNaviHasNext(content) number = httputil.fetchMaxPageNumber(content) if hasNextNavi: number = number + 1 # print("has" + str(hasNextNavi) + " number:" + str(number)) # # url = "http://91.7h5.space/v.php?next=watch&page=" + str(number) # content = httputil.fetchContent(url) # # hasNextNavi = httputil.isPageNaviHasNext(content) # number = httputil.fetchMaxPageNumber(content) return number
def fetch(self, url): """ 获取指定链接下的视频链接 :param url: :return: """ while True: url = httputil.convertURL(url) content = httputil.fetchContent(url) # print(content) result = httputil.fetchActualMessage(content) if result != None: break else: print("sleep ") time.sleep(IP_LIMIT_TIME) return result
def handleVideoContent(url): disk.mkdir(basePath) content = httputil.fetchContent(url) # print(content) soup = BeautifulSoup(content, "html.parser") results = soup.find_all("span", class_="title") global result global title global hlsViedoUrl if len(results) > 0: targetSoup = results[0] title = str(targetSoup.text) title = httputil.__escape_file_name_str(title) results = soup.find_all("div", class_="dz") if len(results) > 0: targetSoup = results[0] childSoup = targetSoup.find("p") hlsViedoUrl = str(childSoup.text) result = {"title": title, "hlsViedoUrl": hlsViedoUrl} return result
def getVideoDownloadAddress(url): content = httputil.fetchContent(url) print(content)
# # response = requests.get(hlsVideoUrl, stream=True) # # with open(targetPath, 'wb') as out_file: # # shutil.copyfileobj(response.raw, out_file) # # del response # hlsFile = open(targetPath,"r+") # outFile = open("convert.m3u8","w+") # print(hlsFile.name) # # line = hlsFile.readline() # while(line!= ''): # if "#" in line: # pass # else: # line = baseURL + line # outFile.write(line) # line = hlsFile.readline() # # # # print("end") # hlsFile.close() # outFile.close() # url = "http://video1.feimanzb.com:8091/20171215/RKI-413-C/550kb/hls/gUYZa0Kw2426000.ts" # index = url.rfind(".") # print(url[index:]) url = "https://www.avbyebye.com" url = "http://www.allitebooks.com/powershell-for-sql-server-essentials/" content = httputil.fetchContent(url) print(content)
def main(): pron = Pron91() db = Databasemanager() running = True targetPron = db.getPronToDownload() lastPron = db.getLastPron() if lastPron != None: lastPronId = lastPron['_id'] while (targetPron != None and running): _id = targetPron['_id'] url = targetPron['targetURL'] viewkey = targetPron['viewkey'] progress = int(_id) / int(lastPronId) print("progress is " + str(progress)) urll = httputil.convertURL(url) contentCheck = httputil.fetchContent(urll) isMiss = httputil.fetchIsVideoMiss(contentCheck) if isMiss == False: result = pron.fetch(url) downloadURL = result['downloadURL'] title = result['title'] type = result['type'] pronData = { "viewkey": viewkey, "originalURL": url, "title": title, "type": type, "actDownloadURL": downloadURL, "downloadStatus": "0" } db.insertOrUpdatePron(pronData) print(result) file = title + "." + type try: isHaveSpace = httputil.downloadVideo(downloadURL, file) print("isHaveSpace" + str(isHaveSpace)) if isHaveSpace: db.updatePronDownloadStatus(viewkey, 1) targetPron = db.getPronToDownload() else: running = False except: db.updatePronDownloadStatus(viewkey, 0) foldersize = get_size(httputil.BaseDownloadPath) foldersize = convertToGb(foldersize) if foldersize > MAX_DOWNLOAD_SIZE: running = False else: #文件miss print('文件miss') db.updatePronDownloadStatus(viewkey, 2) targetPron = db.getPronToDownload() time.sleep(SLEEP_per_Video) print("End")