def check(url, base_url, author): resolution = cfg.read_attempts() try: url = url_format(url, resolution) except IndexError as error: return test_link(base_url if base_url else url, author=author, resolution=resolution, error=error) try: filename = media_files.rename( url.split("?name=")[1].split("&token=ip=")[0] + ".crdownload") except IndexError: filename = False error = "This link does not go to a supported site!" try: request = req.get(url, headers=headers, stream=True, timeout=(cfg.timeout / 2, cfg.timeout)) except (req.exceptions.ConnectionError, req.exceptions.InvalidURL, req.exceptions.ReadTimeout): print("DEBUG: error") error = f"Connection error with {media.format_title(filename)}." filename = False if not filename: print(error) log(error) return False return filename, request, resolution
def search(self, url, media_type=0): if media_type == 0: # Movie (HD) element_class = "item_hd" description_class = "_smQamBQsETb" elif media_type == 1: # Movie (CAM) element_class = "item_cam" description_class = "_smQamBQsETb" elif media_type >= 2: # TV Show element_class = "item_series" description_class = "_skQummZWZxE" self.open_link(url) results, descriptions = self.get_results_from_search( element_class=element_class, decription_class=description_class) if not results: if media_type >= 2: # TV Show raise NoResults media_type += 1 return self.search(url, media_type=media_type) if media_type == 1: log("**INFO:** Film is in CAM quality.", silent=False) if not descriptions: # this is the same as "if results and not descriptions:" description_class = "_smQamBQsETb" results, descriptions = self.get_results_from_search( element_class=element_class, decription_class=description_class) metadata = {} result_index = 1 media_type_index = 1 for description in descriptions: result_index += 1 if description.get_attribute("data-filmname") != description.text: continue poster_url = self.get_movie_poster_url(result_index, media_type_index) metadata[description.text.replace(":", "")] = { "data-filmname": description.get_attribute("data-filmname").replace(":", ""), "data-year": description.get_attribute("data-year"), "data-imdb": description.get_attribute("data-imdb").split(": ")[1], "data-duration": description.get_attribute("data-duration"), "data-country": description.get_attribute("data-country"), "data-genre": description.get_attribute("data-genre"), "data-descript": description.get_attribute("data-descript"), "img": poster_url } return results, metadata
def file_size(filename, count, start_time=None, target_size=None): size = media.size(filename) size_MB = round(size / 1024 / 1024, 2) if ((count + 1) % 25 == 0 or count == 3) and start_time and target_size: filename = media.format_title(filename) remaining_size = target_size - size speed = size / (time.time() - start_time) speed_MB = round(speed * 8 / (1024 * 1024), 2) ETA = time.strftime("%Hh %Mm %Ss", time.gmtime(remaining_size / speed)) size_MB, target_size = int(size_MB), int(target_size / 1024 / 1024) msg = f"Downloading {filename} at ~{speed_MB} Mbps, ETA: {ETA} ({size_MB}/{target_size} MB)." log(msg, silent=False) return size
def check_for_captcha_solve(timeout=100): if __name__ == "__main__": media.write_file("captcha.txt", input("Solve the captcha:\n> ")) filename = "captcha.txt" for _ in range(timeout * 2): time.sleep(0.5) if os.path.isfile(filename): solved_captcha = media.read_file(filename)[0] media.remove_file(filename) return solved_captcha log(f"Captcha was not solved withing {timeout} seconds.\nAborting download.", silent=False) return False
def run(self, resolution_override=None): # Function should return True when the download is complete and False if it perminantly failed self.url, request, resolution = self.best_quality(self.url) if self.url is False: return False filmname = self.metadata["data-filmname"] year = self.metadata["data-year"] if "Season" in filmname and "Episode" in filmname: print("Media is detected as TV Show.") show_title = filmname.split(" - ")[0] season = filmname.split(" - Season ")[1].split( " Episode")[0].split(" [")[0] season = season if len(season) >= 2 else "0" + season episode = filmname.split(" Episode ")[1].split(": ")[0] try: episode_title = filmname.split(": ")[(1 if " [" not in filmname else 2)] # filename = f"{show_title} - s{season}ep{episode} - {episode_title}" filename = f"{show_title} - s{season}ep{episode}" except IndexError: filename = f"{show_title} - s{season}ep{episode}" absolute_path = os.path.abspath( f"../TV SHOWS/{show_title}/Season {season}/{filename}.crdownload" ) else: print("Media is detected as Movie/Film.") filename = (f"{filmname} ({year})" if filmname[-1] != ")" else filmname) absolute_path = os.path.abspath( f"../MOVIES/{filename}/{filename}.crdownload") stream = Stream( request, absolute_path, (resolution_override if resolution_override else resolution), ) stream.stream() filename = filename.replace(".crdownload", ".mp4") file_size = round( int(request.headers.get("content-length", 0)) / 1024 / 1024, 2) media.credit(self.author, filename=filename, resolution=resolution, file_size=file_size) log(f"Finished download of {filename} in {resolution}p ({file_size} MB).", silent=False) return True
def search(self, query): search_arg = "%20".join(query.split()) self.driver.get(f"https://gomovies-online.cam/search/{search_arg}") try: self.click( "/html/body/main/div/div/section/div[1]/div/movies[1]/div/div/div/div/a" ) except NoSuchElementException: # no results error = f"Search for {query} yielded no results." print(error) log(error) self.driver.quit() return False url = self.driver.current_url + "-online-for-free.html" self.driver.get(url) return self.run()
def check_captcha(self): # "Myles" - Myles # "Liam" - Liam try: captcha_image = self.wait_until_element( By.XPATH, "//*[@id=\"checkcapchamodelyii-captcha-image\"]", timeout=1.5) captcha_input = self.driver.find_element( By.XPATH, "//*[@id=\"checkcapchamodelyii-captcha\"]") captcha_submit = self.driver.find_element( By.XPATH, "//*[@id=\"player-captcha\"]/div[3]/div/div") except TimeoutException: return None, None, None if captcha_image: print("DEBUG: Captcha!") log("Captcha! Solve using the command:\n```beta solve <captcha_solution>```" ) return captcha_image, captcha_input, captcha_submit
def run(self, xpath="//*[@id=\"_skqeqEJBSrS\"]/div[2]/video", attr="src"): print("WEB SCRAPING") log("Waiting on web scraper (up to 35 seconds).") self.check_captcha() print("DEBUG: Finished check_captcha function") # self.run() try: element = WebDriverWait(self.driver, 30).until( EC.visibility_of_element_located((By.XPATH, xpath))) element = self.driver.find_element_by_xpath(xpath) data = element.get_attribute(attr) while len(data) < 100: print("DEBUG: No data!") self.driver.refresh() data = self.run(xpath, attr) self.driver.quit() return data except TimeoutException: print("DEBUG: Link invalid, scraping failed.") return False
def test_link(url, author, start_time=0, resolution=0, filename=False, error=False): if filename: filename = media.format_title(filename) if ((time() - start_time) < 10) or error: if int(resolution) >= len(quality) - 1: error = f"Failed download of {filename if filename else url}, link is invalid." print(error) log(error) cfg.reset_attempts() return False cfg.increment_attempts() # print("FAILED (lowering quality)") download(url, author=author) return False error = f"Failed download of {filename if filename else url}.\nRestarting download..." print(error) log(error) download(url, author=author) return False
def check_captcha(self, xpath="//*[@id=\"checkcapchamodelyii-captcha-image\"]", attr="src"): try: captcha_element = WebDriverWait(self.driver, 5).until( EC.visibility_of_element_located((By.XPATH, xpath))) captcha_element = self.driver.find_element_by_xpath(xpath) captcha = captcha_element.get_attribute(attr) filename = self.screenshot_captcha(captcha_element) print(f"DEBUG: Captcha, {captcha}") except TimeoutException: print("DEBUG: No captcha") captcha = False if captcha: if __name__ != "__main__": log("Captcha! Please solve using the command: ```!solve <captcha_solution>```\nREMIND IAN TO FIX THIS --> please don't mess up or the download will fail." ) log(f"--file={filename}") filename = "solved_captcha.txt" solved_captcha = False while not solved_captcha and (time() - self.start_time) < 60: sleep(1) # print(f"DEBUG: Checking for {filename}") if os.path.isfile(filename): solved_captcha = media.read_file(filename)[0] media.remove_file(filename) print( f"DEBUG: Solved captcha, {solved_captcha}, {not solved_captcha}/{(time() - self.start_time) < 60}" ) else: solved_captcha = input("Enter the solved captcha:\n> ") if solved_captcha: self.solve_captcha(solved_captcha) self.submit_captcha() # self.run() return captcha
def write(self): self.verify_path() with open(self.filename, "wb") as file: title = self.filename.split(".")[0].split("/")[-1:][0] size_MB = round(self.target_size / 1024 / 1024, 2) start_time = time.time() msg = f"Downloading {title} in {self.resolution}p ({size_MB} MB)..." log(msg, silent=False) try: for count, chunk in enumerate( self.request.iter_content(chunk_size=self.chunk_size)): file.write(chunk) progress.file_size(self.filename, count, start_time, target_size=self.target_size) # except ConnectionResetError as e: except Exception as e: log(f"ERROR with {title}: Connection Reset!\nRetrying download..." ) log(str(e)) self.write()
def best_quality(self, url): if not url: log("ERROR: No URL! Maybe there were no search results?", silent=False) return False, None, None if not isinstance(url, str): url = url.get_attribute("src") valid_resolutions = [] for target_res in resolution_list: # TODO: The proccess of checking every resolution's status code takes too long (fix me) valid_resolution, request = validate_url(url, target_res) valid_resolutions.append(valid_resolution) if valid_resolutions[-1] == 200: url = url_format(url, target_res) break if valid_resolutions[-1] == 403: filmname = self.metadata["data-filmname"] log(f"ERROR: Link expired while scraping \"{filmname}\".") return False, None, None if 200 not in valid_resolutions: log(f"ERROR: Status code {valid_resolutions[-1]}.") return False, None, None return url, request, target_res
filename=filename, resolution=resolution, file_size=file_size) log(f"Finished download of {filename} in {resolution}p ({file_size} MB).", silent=False) return True if __name__ == "__main__": def run_download(url, metadata, author): download_function = Download(url, metadata, author) threaded_download = Thread(target=download_function.run) threaded_download.start() scraper = Scraper(minimize=False) search = input("Enter a Title to search for:\n> ") while search: download_queue = scraper.download_first_from_search(search) if download_queue: for data in download_queue: if None in data: log("No results!", silent=False) run_download(data[0], data[1][list(data[1])[0]], data[2]) search = input("Enter a Title to search for:\n> ") else: log("No results!", silent=False)
def download(url, base_url, author): global start_time data = check(url, base_url, author=author) if url else url print(data) if not data: error = "Scraping failed. Link is invalid or captcha was not solved." print(error) log(error) return False filename, request, resolution = data # msg = f"Atempting download in {quality[int(resolution)]}p..." # print(msg, end=" ", flush=True) # log(msg) target_size = request.headers.get("content-length", 0) rounded_target_size = round(int(target_size) / 1024 / 1024, 2) absolute_path = f"{media_files.path}/{filename}" make_directory() start_time = time() try: stream.download_file(request, absolute_path, resolution, start_time=start_time) except (ConnectionError, ConnectionResetError, ChunkedEncodingError, SSLError): log(f"Connection error while downloading {media.format_title(filename)}.\nRestarting download..." ) download(base_url if base_url else url, author=author) return False except req.exceptions.HTTPError as error: return test_link(base_url if base_url else url, author=author, error=error) file_size = round(size(absolute_path) / 1024 / 1024, 2) if file_size == 0: return test_link(base_url if base_url else url, start_time, resolution) with open(absolute_path, "r") as file: try: for count, line in enumerate(file): if count > 20: break if "403 Forbidden" in line: return test_link(base_url if base_url else url, start_time, resolution) except UnicodeDecodeError: pass cfg.reset_attempts() filename = media.format_title(filename) resolution = quality[int(resolution)] if file_size != rounded_target_size: msg = f"{file_size}/{rounded_target_size} MB" msg = f"Error while downloading {filename}, incomplete file ({msg}).\nRestarting download..." print(msg) log(msg) download(base_url if base_url else url, author=author) return False else: final_msg = f"Finished download of {filename} in {resolution}p ({file_size} MB)." media.credit(author, filename=filename, resolution=resolution, file_size=file_size) complete = media.rename(absolute_path, absolute_path.replace(".crdownload", ".mp4")) absolute_path = absolute_path.replace(".crdownload", ".mp4") if not complete: final_msg = f"Error while finishing {filename}, that file already exists.\nCould not complete." print(final_msg) log(final_msg) return final_msg