def download_idm(url, referer_url, file_type, title=''): print("下载:", url) file_name = '{}_{}.{}'.format(title, str(int(round(time.time() * 1000))), file_type) downloader = IDMan() downloader.download(url, path_to_save=before_video_dir, output=file_name, referrer=referer_url) print("下载完成:", url) return os.path.join(before_video_dir, file_name)
def __init__(self, anime_data, options=Options()): self.options = options self.valid_animes = anime_data[0] self.already_downloaded_animes = anime_data[1] self.already_downloaded_eps = anime_data[2] self.chrome_options = None self.driver = None self.initialize_driver() self.downloader = IDMan() # anime_data = [{"name", "ep_no", "link"}] # complete_anime_data = [{"name", "ep_no", "page_link", "download_link", "file_name", "status"}] self.downloading = [] # in anime_data_list format self.not_downloading = [] # in anime_data_list format self.not_valid = [] # in anime_data_list format self.complete_data = [] # in complete_anime_data format self.unable_to_find = [] # in complete_anime_data format self.errors = [] # string array self.logs = [] # string array # log statement = "Valid Animes: " self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) for anime in self.valid_animes: statement = "\t\t" + anime self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) statement = "Already Downloaded Animes: " self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) for i in range(len(self.already_downloaded_animes)): statement = "\t\t" + self.already_downloaded_animes[ i] + ", EP: " + self.already_downloaded_eps[i] self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement)
def download(cls, url, download_path=os.getcwd(), download_name=None, confirm=False): if sys.platform == 'win32': try: from idm import IDMan dm = IDMan() dm.download(url, download_path, download_name, confirm=confirm) except: from pywget import wget if download_name: print(make_colors("Download Name:", 'lw', 'bl') + " " + make_colors(download_name, 'lw', 'm')) download_path = os.path.join(download_path, download_name) wget.download(url, download_path) elif 'linux' in sys.platform: return cls.download_linux(url, download_path, download_name) else: print(make_colors("Your system not supported !", 'lw', 'lr', ['blink']))
def download(self, url, download_path=os.getcwd(), confirm=False, use_wget=False): print( make_colors("DOWNLOAD PATH:", 'bl') + " " + make_colors(download_path, 'y')) if 'sourceforge' in url: return self.sourceforge(url, download_path) if use_wget: wget.download(url, download_path) else: try: idm = IDMan() idm.download(url, download_path, confirm=confirm) except: if sys.platform == 'win32': traceback.format_exc() wget.download(url, download_path)
def startDownload(): while True: h = datetime.datetime.now().hour m = datetime.datetime.now().minute if h == 4: break else: print(" > Waiting Until 4 To Start Downloading.(Checked At " + str(h) + ":" + str(m) + ")") time.sleep(600) downloader = IDMan() links = selectLinks() downloadSpeed = 8000000 internet.buy() time.sleep(15) internet.turnOn() print(" > Downloading Started.") for link in links: size = int(urllib.urlopen(link).info()["Content-Length"]) t = int(size / downloadSpeed) downloader.download(link, sys.path[0] + "\\Downloads", output=None, referrer=None, cookie=None, postData=None, user=None, password=None, confirm=False, lflag=None, clip=False) print(" > Waiting for " + str(t) + " Seconds To Download From " + link + ".") time.sleep(t) print(" > Downloading Finished.") internet.turnOff()
path = r'' save_path = os.path.join(path, series_name) ep_list = os.listdir(save_path) try: # creates directory if none exists os.mkdir(save_path) print(f'Creating {series_name} Folder...') except FileExistsError: print(f'{series_name} Folder already exists') for episode in soup.find_all('span', class_='notranslate')[:25]: try: # retrieves download link ep_link = episode.a['href'] ep_name = ep_link.split('/')[-1] except TypeError: print('Download link unavailable...') continue else: # checks if file already exists, # ignores if it exits and downloads if it doesn't. if ep_name in ep_list: print(f"{ep_name} already exists") else: # The file is downloaded with IDM downloader = IDMan() downloader.download(ep_link, save_path) print(f'{ep_name} downloading...')
def generate(self, link, clip=None, quality=None, verbosity=None, support= False, direct_download=None, download_path=".", pcloud = False, pcloud_username = None, pcloud_password = None, pcloud_folderid = '0', pcloud_rename = None, pcloud_foldername = None, proxy = None, fast = False, bypass_regenerate = False, cliped = False, name = None, wget = False): if pcloud_rename and not name: name = pcloud_rename if name and not pcloud_rename: pcloud_rename = name debug.debug(link0 = link) if cliped: link = clipboard.paste() debug.debug(link1 = link) choices = ['red', 'yellow', 'cyan', 'green', 'white', 'blue', 'magenta'] if not proxy: proxy = self.proxy if support: print "\n" print self.support() print "\n" if link == None: if self.link == None: return False, None else: pass if 'youtu' in link: self.youtube(link, direct_download, download_path, True, pcloud, pcloud_username, pcloud_password, pcloud_folderid, pcloud_rename, pcloud_foldername) else: debug.debug(link2 = link) if self.get_netloc(link) == 'siotong' or self.get_netloc(link) == 'coeg' or self.get_netloc(link) == 'telondasmu' or self.get_netloc(link) == 'siherp' or self.get_netloc(link) == 'greget' or self.get_netloc(link) == 'tetew' or self.get_netloc(link) == 'anjay': link = self.siotong(link) debug.debug(link = link) if self.get_netloc(link) == 'zonawibu': link = self.zonawibu(link) a, out_name, error = self.get_req(link) if not len(a) > 5: print make_colors(name, 'white', 'red', ['blink']) qr = raw_input(make_colors('Re-Generate again', 'white', 'blue') + " " + make_colors('[Y/N]', 'white', 'red') + ': ') if str(qr).lower() == 'n': sys.exit(0) elif str(qr).lower() == 'y': return self.generate(link, clip, quality, verbosity, support, direct_download, download_path, pcloud, pcloud_username, pcloud_password, pcloud_folderid, pcloud_rename, pcloud_foldername, proxy, fast) else: sys.exit('SAVE EXIT') if out_name == 'Never Give Up ! Generate again and try it up to 10x Generate Link !' or a == 'Never Give Up ! Generate again and try it up to 10x Generate Link !' or error == 'Never Give Up ! Generate again and try it up to 10x Generate Link !': if bypass_regenerate: return a, out_name else: qr = raw_input(make_colors('Re-Generate again', 'white', 'blue') + " " + make_colors('[Y/N]', 'white', 'red') + ': ') if str(qr).lower() == 'n': sys.exit(0) elif str(qr).lower() == 'y': return self.generate(link, clip, quality, verbosity, support, direct_download, download_path, pcloud, pcloud_username, pcloud_password, pcloud_folderid, pcloud_rename, pcloud_foldername, proxy, fast) else: sys.exit('SAVE EXIT') if a: if not name: name = '' name = os.path.basename(name) print termcolor.colored('GENERATED : ', 'white', 'on_yellow') + termcolor.colored(a, 'white', 'on_red') print termcolor.colored('NAME : ', 'white', 'on_yellow') + termcolor.colored(out_name, 'white', 'on_blue') print termcolor.colored('DOWNLOAD NAME : ', 'white', 'on_yellow') + termcolor.colored(name, 'white', 'on_blue') if out_name == 'Generate Failed!': sys.exit('FAILED!') if not name: name = out_name if name: name = os.path.basename(name) if pcloud and not direct_download: if os.path.isfile(download_path): download_path = os.path.dirname(download_path) name = os.path.basename(download_path) print make_colors('Upload to PCloud ...', 'white', 'magenta') self.pcloud(a, pcloud_username, pcloud_password, name, pcloud_folderid, pcloud_foldername, False, download_path) if pcloud and direct_download: if os.path.isfile(download_path): download_path = os.path.dirname(download_path) name = os.path.basename(download_path) print make_colors('Upload to PCloud and download it...', 'white', 'magenta') self.pcloud(a, pcloud_username, pcloud_password, name, pcloud_folderid, pcloud_foldername, direct_download, download_path) if clip: if name: clipboard.copy(name) if a: clipboard.copy(a) if direct_download and not pcloud: if os.path.isfile(download_path): download_path = os.path.dirname(download_path) name = os.path.basename(download_path) print make_colors('Download it...', 'white', 'blue') #filename = wget.download(a.get('href'), download_path) if 'youtu' in link: name = self.download(str(youtube_list.get(int(q))), download_path, wget = wget) try: idm = IDMan() idm.download(a, download_path, name) except: wget.download(a, out= os.path.join(download_path, name)) return a, name return a, name
class Crawler: def __init__(self, anime_data, options=Options()): self.options = options self.valid_animes = anime_data[0] self.already_downloaded_animes = anime_data[1] self.already_downloaded_eps = anime_data[2] self.chrome_options = None self.driver = None self.initialize_driver() self.downloader = IDMan() # anime_data = [{"name", "ep_no", "link"}] # complete_anime_data = [{"name", "ep_no", "page_link", "download_link", "file_name", "status"}] self.downloading = [] # in anime_data_list format self.not_downloading = [] # in anime_data_list format self.not_valid = [] # in anime_data_list format self.complete_data = [] # in complete_anime_data format self.unable_to_find = [] # in complete_anime_data format self.errors = [] # string array self.logs = [] # string array # log statement = "Valid Animes: " self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) for anime in self.valid_animes: statement = "\t\t" + anime self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) statement = "Already Downloaded Animes: " self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) for i in range(len(self.already_downloaded_animes)): statement = "\t\t" + self.already_downloaded_animes[ i] + ", EP: " + self.already_downloaded_eps[i] self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) def initialize_driver(self): self.chrome_options = webdriver.ChromeOptions() if not self.options.logs: self.chrome_options.add_argument('log-level=3') if self.options.silent: self.chrome_options.add_argument('--headless') self.driver = webdriver.Chrome(chrome_options=self.chrome_options) self.driver.set_page_load_timeout(30) def get_initial_data(self): anime_data = self.get_initial_data_from_url("https://animekisa.tv/") for i in range(1, self.options.pages_to_scan): anime_data = anime_data + self.get_initial_data_from_url( "https://animekisa.tv/latest/" + str(i)) return anime_data def get_initial_data_from_url(self, url): self.driver.get(url) anime_boxes = self.driver.find_elements_by_class_name('episode-box') anime_data = [] for box in anime_boxes: name = box.find_element_by_class_name("title-box-2").text link = box.find_element_by_tag_name('a').get_attribute("href") temp = link.split("-") ep_no = temp[len(temp) - 1] anime_data.append({"name": name, "link": link, "ep_no": ep_no}) return anime_data def filter_anime_data(self, anime_data): anime_to_download = [] found_but_not_downloading = [] invalid_animes = [] # log statement = "Filtering the following animes: " self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) for anime in anime_data: valid = False downloaded = False # log statement = "\t\t" + anime["name"] + ", EP: " + anime["ep_no"] self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) if anime["name"].lower() in self.valid_animes: valid = True if (anime["name"].lower() in self.already_downloaded_animes) and ( anime["ep_no"] == self.already_downloaded_eps[ self.already_downloaded_animes.index( anime["name"].lower())]): downloaded = True if valid: if downloaded: found_but_not_downloading.append(anime) else: anime_to_download.append(anime) else: invalid_animes.append(anime) return anime_to_download, found_but_not_downloading, invalid_animes def get_download_link_per_anime(self, link): try: self.driver.get(link) except common.exceptions.InvalidSessionIdException: self.initialize_driver() self.driver.get(link) button = self.driver.find_element_by_class_name('server_button_l') button.click() handles = self.driver.window_handles self.driver.close() self.driver.switch_to.window(handles[1]) download_links = {} name = self.driver.find_element_by_id('title').text link_containers = self.driver.find_element_by_class_name( "mirror_link").find_elements_by_class_name('dowload') for linkContainer in link_containers: link_title = linkContainer.text link_url = linkContainer.find_element_by_tag_name( 'a').get_attribute('href') download_links[link_title] = link_url link, status = self.choose_from_quality(download_links) name = name.replace(":", " -") + ".mp4" return name, link, status def choose_from_quality(self, qualities): if self.options.quality["type"] == "force_res": if self.options.quality["value"] == "360p": if 'DOWNLOAD (360P - MP4)' in qualities: return qualities['DOWNLOAD (360P - MP4)'], "200" else: if not self.options.alternate_quality: return 'none', "404" elif self.options.quality["value"] == "480p": if 'DOWNLOAD (480P - MP4)' in qualities: return qualities['DOWNLOAD (480P - MP4)'], "200" else: if not self.options.alternate_quality: return 'none', "404" elif self.options.quality["value"] == "720p": if 'DOWNLOAD (720P - MP4)' in qualities: return qualities['DOWNLOAD (720P - MP4)'], "200" else: if not self.options.alternate_quality: return 'none', "404" elif self.options.quality["value"] == "1080p": if 'DOWNLOAD (1080P - MP4)' in qualities: return qualities['DOWNLOAD (1080P - MP4)'], "200" else: if not self.options.alternate_quality: return 'none', "404" elif self.options.quality["value"] == "source": if 'DOWNLOAD (HDP - MP4)' in qualities: return qualities['DOWNLOAD (HDP - MP4)'], "200" else: if not self.options.alternate_quality: return 'none', "404" else: return 'none', "404" elif self.options.quality["type"] == "comparative": if self.options.quality["value"] == "best": return qualities[qualities.keys()[0]], "200" elif self.options.quality["value"] == "worst": return qualities[qualities.keys()[len(qualities)]], "200" if self.options.quality[ "type"] == "force_res" and self.options.alternate_quality: if 'DOWNLOAD (HDP - MP4)' in qualities: return qualities['DOWNLOAD (HDP - MP4)'], "200" else: return "none", "404" def start_part_1(self): initial_data = self.get_initial_data() # log statement = "no of all animes: " + str(len(initial_data)) self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) self.downloading, self.not_downloading, self.not_valid = self.filter_anime_data( initial_data) # log statement = "downloading :" + str(len(self.downloading)) + " - already downloading :" +\ str(len(self.not_downloading)) + " - not valid :" + str(len(self.not_valid)) self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) statement = "Anime to download :" self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) for anime in self.downloading: statement = "\t\t" + anime["name"] + ", EP: " + anime["ep_no"] self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) statement = "Anime that have already been downloaded :" self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) for anime in self.not_downloading: statement = "\t\t" + anime["name"] + ", EP: " + anime["ep_no"] self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) statement = "Anime that are not valid :" self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) for anime in self.not_valid: statement = "\t\t" + anime["name"] + ", EP: " + anime["ep_no"] self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) def get_mid_term_data(self): return self.downloading, self.not_downloading, self.not_valid, self.errors def start_part_2(self): for anime_data in self.downloading: success = [False] # log statement = "Attempting to find the download link for the anime: " + anime_data["name"] + ", url: " + \ anime_data["link"] self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) for exception_count in range(self.options.max_refresh_page): try: file_name, download_link, status = self.get_download_link_per_anime( anime_data["link"]) if status == "200": self.complete_data.append({ "name": anime_data["name"], "ep_no": anime_data["ep_no"], "page_link": anime_data["link"], "file_name": file_name, "download_link": download_link, "status": status }) # log statement = "The download link for the anime: " + anime_data["name"] + ", url: " + \ anime_data["link"] + " was found successfully, the link is: " + download_link self.logs.append("[" + str(datetime.now().time()) + "]:" + statement) if self.options.logs: print(statement) elif status == "404": self.unable_to_find.append({ "name": anime_data["name"], "ep_no": anime_data["ep_no"], "page_link": anime_data["link"], "file_name": file_name, "download_link": download_link, "status": status }) # log - exception exception_statement = "Unable to find anime: " + anime_data["name"] + ", url: " +\ anime_data["link"] + " in the specified quality" self.errors.append(exception_statement) self.logs.append("[" + str(datetime.now().time()) + "]:" + exception_statement) if self.options.logs: print(exception_statement) success[0] = True break except Exception as ex: if not self.options.auto_exception_control: self.save_logs() raise ex # log - exception exception_statement = "Error occurred loading page of anime: " + anime_data["name"] + ", url: " +\ anime_data["link"] + ", error: " + str(ex.args[0]) + ", trying again attempt no :" + \ str((exception_count+1)) + " out of " + str(self.options.max_refresh_page) self.errors.append(exception_statement) self.logs.append("[" + str(datetime.now().time()) + "]:" + exception_statement) if self.options.logs: print(exception_statement) if not success[0]: self.unable_to_find.append({ "name": anime_data["name"], "ep_no": anime_data["ep_no"], "page_link": anime_data["link"], "file_name": "404", "download_link": "404", "status": "504" }) # log - exception exception_statement = "Max page refresh failed for the anime: " + anime_data["name"] + ", url: " + \ anime_data["link"] + ". It cannot be downloaded" self.errors.append(exception_statement) self.logs.append("[" + str(datetime.now().time()) + "]:" + exception_statement) if self.options.logs: print(exception_statement) def get_end_term_data(self): return self.complete_data, self.unable_to_find, self.errors def get_save_data(self): download = [] download_ep = [] for data in self.complete_data: download.append(data["name"]) download_ep.append(data["ep_no"]) for data in self.not_downloading: download.append(data["name"]) download_ep.append(data["ep_no"]) return [download, download_ep] def download_animes(self): for anime in self.complete_data: self.downloader.download(anime["download_link"], self.options.download_path, output=anime["file_name"], confirm=self.options.confirmation) def save_logs(self): if self.options.generate_log_file: log_file = open("core.log", "w", encoding="UTF-8") for log in self.logs: log = log.replace(r"\n", r"\\n") log_file.write(log + "\n") log_file.write("[" + str(datetime.now().time()) + "]:" + "Execution Completed Successfully") log_file.close() def close_crawler(self): self.save_logs() self.driver.quit()