def download(self, url: str, connections: int = 2**3, cal_hash: bool = False, quiet: bool = False) -> dict: self.url = url self.file_size = self.__get_file_size(url) self.__create_empty_file() if not quiet: p(f"[MFD] Downloading {url} with {connections} connections", end="") for i in range(0, self.file_size // self.piece_size + 1): self.failed_parts.append(i) self.retry_download(connections) retry_ct = 0 while len(self.failed_parts) > 0: if retry_ct >= self.retry: raise Exception( f"failed to download {self.url} after {self.retry} retries" ) self.retry_download(connections) retry_ct += 1 _f = join_path(self.save_dir, self.filename) if not quiet: p(f"\r[MFD] Downloaded {url} => " + _f) if cal_hash: fd = open(_f, "rb") # return {"md5": md5hd(fd), "crc32": crc32hd(fd), "sha1": sha1hd(fd), "file_path": _f} return {"sha1": sha1hd(fd), "file_path": _f} else: return {"file_path": _f}
def __download(self, i: int) -> None: header = self.__header.copy() end = (i + 1) * self.piece_size - 1 if i == self.file_size // self.piece_size + 1: end = self.file_size header["Range"] = header["Range"].format(i * self.piece_size, end) try: content = requests.get(self.url, headers=header).content self.parts.put((i, content)) except: p(f"failed to download {self.url} range " + header["Range"])
def delete(self, identifier: str, path: str): headers = { # "authorization": f"LOW {self.access}:{self.secret}", # "User-Agent": USER_AGENT(self.access), "x-archive-queue-derive": "0", "x-archive-cascade-delete": "1" } p(f"[Deleting] {identifier} {path}", end="") r = self.__session.delete(f"https://s3.us.archive.org/{identifier}/{path}", headers=headers) if r.status_code not in [200, 204]: raise Exception(f"failed to delete {identifier} {path}", r.status_code, r.content) p(f"\r[Deleted] {identifier} {path}")
def AFBFFF(item: str, db: str, big_item_split_parts: int = -1, split: bool = False, split_size: int = 1024 * 1024 * 4000, host: str = "AnonFiles", mirror: bool = False, _7z_exe: str = r"C:\Program Files\7-Zip\7z.exe", temp_dir: str = None, _depth: int = 0) -> None: if not temp_dir: temp_dir = os.environ["TEMP"] if not os.path.isabs(db): db = join_path(abs_main_dir(2), db) if not os.path.isabs(item): item = join_path(abs_main_dir(2), item) p(f"[Started] {item}") try: if os.path.isfile(item) and not split: files = [item] else: basename = os.path.basename(item) + ".zip" temp = randstr(2**3) + "_" + str(int(time.time())) dest = join_path(temp_dir, temp, basename) fs = file_size(item) if big_item_split_parts > 1: if fs >= (big_item_split_parts - 1)**2 + 1: import math split_size = math.ceil(fs / big_item_split_parts) else: raise Exception( f"{item} is too small ({fs}B) to split into {big_item_split_parts} parts" ) cmd = [ _7z_exe, "a", "-tzip", f"-v{split_size}b", "-mx=0", dest, item ] if os.path.isdir(item): cmd.append("-r") p(f"[Zipping] {item}", cmd) process = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) process.communicate() files = [ join_path(temp_dir, temp, file) for file in os.listdir(join_path(temp_dir, temp)) ] p(f"[Zipped] {item} has {len(files)} parts", files) for file in files: # AFBFFF(file, db=db, host=host, mirror=mirror, _depth=_depth+1) if not mirror: globals()[host](db).upload(filename=file) else: AnonFiles(db).upload(filename=file) BayFiles(db).upload(filename=file) ForumFiles(db).upload(filename=file) except Exception as e: raise Exception(f"{item} failed to upload", e) p(f"[Ended] {item}")
def download(self, save_dir: str, identifier: str, path: str, piece_size: int = 1024*1024*(2**4), connections: int = 2**3, cal_hash: bool = False) -> dict: try: os.makedirs(save_dir) except: pass url = f"https://archive.org/download/{identifier}/{path}" p(f"[Downloading] <{identifier}> {path} => {save_dir}", end="") _mfd = mfd.MFD(save_dir, piece_size=piece_size) _f = _mfd.download(url, connections=connections, cal_hash=cal_hash, quiet=True) _mfd.stop() _f["file_path"] = self.uncloak_file_ext(_f["file_path"]) p(f"\r[Downloaded] {identifier} {path} => "+_f["file_path"]) return _f
def rename(self, identifier: str, old_item: str, new_item: str): p(f"[Renaming] {identifier} {old_item} => {new_item}", end="") headers = { # "authorization": f"LOW {self.access}:{self.secret}", "x-amz-copy-source": "/"+urllib.parse.quote(identifier+"/"+old_item), "x-amz-metadata-directive": "COPY", "x-archive-keep-old-version": "0", "x-archive-queue-derive": "0", # "User-Agent": USER_AGENT(self.access) } url = "https://s3.us.archive.org" url += "/"+urllib.parse.quote(identifier+"/"+new_item) r = self.__session.put(url, headers=headers) if r.status_code != 200: raise Exception(f"failed to copy {identifier} {old_item} => {new_item}", r.status_code, r.content) p(f"\r[Renamed] {identifier} {old_item} => {new_item}") self.delete(identifier, old_item)
def __init__(self, onedrive_links: list, save_dir: str, chromedriver_location: str = None, throttle_fallback: bool = False): self.static = 5 self.timeout = 5 self.save_dir = save_dir self.throttle_fallback = throttle_fallback driver = None try: if chromedriver_location is not None: if open(chromedriver_location, "rb").read().find(b"$cdc") > 0: raise Exception( "{} is detected as bots".format(chromedriver_location)) chrome_options = ChromeOptions() chrome_prefs = { "download.default_directory": save_dir, "download.prompt_for_download": False, "profile.default_content_setting_values.automatic_downloads": 1 } chrome_options.add_experimental_option("prefs", chrome_prefs) chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option( 'useAutomationExtension', False) driver = webdriver.Chrome( executable_path=chromedriver_location, options=chrome_options) driver.execute_cdp_cmd( "Page.addScriptToEvaluateOnNewDocument", { "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" }) driver.maximize_window() for onedrive_link in onedrive_links: p(onedrive_link) driver.get(onedrive_link) self.loop_folder(driver, root=True) except: traceback.print_exc() finally: driver.quit()
def loop_folder(self, d, location=None, root=False): if location is None: location = [] time.sleep(self.static) try: self.scroll_to_bottom_4(d) except: return self.return_parent(d, root, False) try: current_root_folder = self.get_current_folder(d) except: return self.return_parent(d, root, False) if not current_root_folder: d.quit() raise Exception("empty current_root_folder") try: folder_items = self.get_folder_items(d) except: return self.return_parent(d, root, False) location.append(current_root_folder) for i in range(0, len(folder_items)): try: folder_items = self.get_folder_items(d) except: return self.return_parent(d, root, False) current_folder = "\\".join(location) current_location = " > ".join(location + [folder_items[i][0]]) if not folder_items[i][4]: p(current_location) if os.path.isfile(self.save_dir + current_folder + "\\" + folder_items[i][0]): continue self._download(d, folder_items[i], current_folder) else: folder_items[i][1].click() time.sleep(self.static) if not self.loop_folder(d, location.copy()): if not self.throttle_fallback: d.quit() raise Exception("onedrive throttle") folder_items = self.get_folder_items(d) p(current_location) self._download(d, folder_items[i], current_folder) return self.return_parent(d, root, True)
def new_identifier(self, identifier: str, title: str = None, description: str = None): p(f"[Identifier] Creating new {identifier}", end="") thumbnail_path = text2png.TextToPng(64).create(title or identifier) remote_filename = os.path.basename(thumbnail_path) org_title = title title = urllib.parse.quote(title) if title else identifier description = urllib.parse.quote(description) if description else "" headers = { # "authorization": f"LOW {self.access}:{self.secret}", "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Type": "multipart/form-data; charset=UTF-8", # "Referer": f"https://archive.org/upload/", # "User-Agent": USER_AGENT(self.access), "x-amz-acl": "bucket-owner-full-control", "x-amz-auto-make-bucket": "1", # "x-archive-interactive-priority": "1", "x-archive-queue-derive": "0", "x-archive-meta-mediatype": "uri(data)", "x-archive-meta01-collection": "uri(opensource_media)", "x-archive-meta01-description": f"uri(video%20software%20data%20{description})", "x-archive-meta01-noindex": "uri(true)", "x-archive-meta01-private": "uri(true)", "x-archive-meta01-scanner": "uri(Internet%20Archive%20HTML5%20Uploader%201.6.4)", # "x-archive-meta01-subject": f"uri({title})", "x-archive-meta01-subject": f"uri(video%3Bsoftware%3Bdata)", "x-archive-meta01-title": f"uri({title})", "x-archive-size-hint": str(file_size(thumbnail_path)), "X-File-Size": str(file_size(thumbnail_path)), "Content-Length": str(file_size(thumbnail_path)), "X-File-Name": f"uri({remote_filename})", "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-site" } url = f"https://s3.us.archive.org/" url_path = identifier+"/"+remote_filename url_path = url_path.replace("//", "/") uri = url+urllib.parse.quote(url_path, safe="") r = self.__session.put(uri, data=open(thumbnail_path, "rb"), headers=headers) if r.status_code != 200: raise Exception(f"failed to create {identifier}", r.status_code, r.content) p(f"\r[Identifier] Created {org_title} => https://archive.org/download/{identifier}")
def worker(self) -> None: while not self.terminate: file_path, mode, content = self.fileq.get() if not os.path.isfile(file_path): try: os.makedirs(os.path.dirname(file_path)) except: pass try: open(file_path, "ab").close() except: pass try: open(file_path, mode).write(content) except Exception as e: what = content p(f"cannot write {what} to file {file_path} using mode {mode} due to {e}" ) traceback.print_exc() self.fileq.task_done()
def upload(self, identifier: str, root: str, path: str, check_overwrite, check_skip_same_size): path_prefix = identifier.split("/")[1:] identifier = identifier.split("/")[0] file = join_path(root, path) file = self.cloak_file_ext(file) remote_filename = os.path.basename(file) _path = "/".join(path_prefix+file.replace(root, "")[1:].split(os.path.sep)) if not check_overwrite(_path) and check_skip_same_size(_path): p("[Upload] [Warning] File {} is skipped due to existing remote file".format(join_path(root, path))) self.uncloak_file_ext(file) return fs = str(file_size(file)) headers = { # "authorization": f"LOW {self.access}:{self.secret}", "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Type": "multipart/form-data; charset=UTF-8", "Referer": f"https://archive.org/upload/?identifier={identifier}", # "User-Agent": USER_AGENT(self.access), "x-amz-acl": "bucket-owner-full-control", "x-amz-auto-make-bucket": "1", # "x-archive-interactive-priority": "1", "x-archive-queue-derive": "0", "x-archive-size-hint": fs, "X-File-Size": fs, "Content-Length": fs, "X-File-Name": f"uri({remote_filename})", "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-site", "X-Requested-With": "XMLHttpRequest" } url = f"https://s3.us.archive.org/" # url_path = identifier+"/"+path.replace("\\", "/")+"/"+remote_filename url_path = identifier+"/"+_path url_path = url_path.replace("//", "/") uri = url+urllib.parse.quote(url_path, safe="") p(f"[Uploading] {file} => {uri}", end="") fo = open(file, "rb") while True: try: fo.seek(0) r = self.__session.put(uri, data=fo, headers=headers) break except requests.exceptions.RequestException as ex: import time print(ex) for i in range(0, 10): time.sleep(1) print("\rretry in", i, end="", flush=True) print(flush=True) except KeyboardInterrupt as e: fo.close() self.uncloak_file_ext(file) raise e fo.close() self.uncloak_file_ext(file) if r.status_code != 200: raise Exception(f"failed to upload {file} => {uri}", r.status_code, r.request.headers, r.content) p(f"\r[Uploaded] {file} => https://archive.org/download/{url_path}")
def metadata(self, identifier: str, op: str, k: str, v: str): url = f"https://archive.org/metadata/{identifier}" data = { "-patch": [ { "op": op, "path": "/"+k } ], "-target": "metadata", "priority": -5, "access": self.__session.headers["authorization"][4:].split(":")[0], "secret": self.__session.headers["authorization"][4:].split(":")[1] } if op != "remove": data["-patch"][0]["value"] = v p(f"[Metadata] Pending {identifier} {op} {k}: {v}", end="") data["-patch"] = jd(data["-patch"]) r = self.__session.post(url, data=data) if r.status_code != 200: raise Exception(f"failed metadata {identifier} {op} {k}: {v}", r.status_code, r.content) p(f"\r[Metadata] Done {identifier} {op} {k}: {v}")