def download(self, media, api): return_bool = True if not overwrite_files and media.downloaded: return count = 0 sessions = [x for x in api.sessions if media.link in x.links] if not sessions: return session = sessions[0] while count < 11: links = [media.link] def choose_link(session, links): for link in links: r = api.json_request(link, session, "HEAD", stream=True, json_format=False) if not isinstance(r, requests.Response): continue header = r.headers content_length = header.get('content-length') if not content_length: continue content_length = int(content_length) return [link, content_length] result = choose_link(session, links) if not result: count += 1 continue link = result[0] content_length = result[1] media.size = content_length date_object = media.created_at download_path = os.path.join( media.directory, media.filename) timestamp = date_object.timestamp() if not overwrite_files: if main_helper.check_for_dupe_file(download_path, content_length): main_helper.format_image(download_path, timestamp) return_bool = False media.downloaded = True break r = api.json_request( link, session, stream=True, json_format=False) if not isinstance(r, requests.Response): return_bool = False count += 1 continue downloader = main_helper.downloader(r, download_path, count) if not downloader: count += 1 continue main_helper.format_image(download_path, timestamp) link_string = f"Link: {link}" path_string = f"Link: {download_path}" print(link_string) print(path_string) media.downloaded = True break return return_bool
def profile_scraper(api, directory, username): y = api.get_subscription(username) q = [] avatar = y.avatar header = y.header if avatar: q.append(["Avatars", avatar]) if header: q.append(["Headers", header]) for x in q: new_dict = dict() media_type = x[0] media_link = x[1] new_dict["links"] = [media_link] directory2 = os.path.join(directory, username, "Profile", media_type) os.makedirs(directory2, exist_ok=True) download_path = os.path.join( directory2, media_link.split("/")[-2]+".jpg") if not overwrite_files: if os.path.isfile(download_path): continue session = api.sessions[0] r = api.json_request(media_link, session, stream=True, json_format=False, sleep=False) if not isinstance(r, requests.Response): continue while True: downloader = main_helper.downloader(r, download_path) if not downloader: continue break
def download(medias): return_bool = True for media in medias: count = 0 session = media["session"] while count < 11: links = media["links"] def choose_link(session, links): for link in links: r = main_helper.json_request(session, link, "HEAD", stream=True, json_format=False) if not isinstance(r, requests.Response): continue header = r.headers content_length = header.get('content-length') if not content_length: continue content_length = int(content_length) return [link, content_length] result = choose_link(session, links) if not result: count += 1 continue link = result[0] content_length = result[1] date_object = datetime.strptime(media["postedAt"], "%d-%m-%Y %H:%M:%S") download_path = os.path.join(media["directory"], media["filename"]) timestamp = date_object.timestamp() if not overwrite_files: if main_helper.check_for_dupe_file(download_path, content_length): main_helper.format_image(download_path, timestamp) return_bool = False break r = main_helper.json_request(session, link, stream=True, json_format=False) if not isinstance(r, requests.Response): return_bool = False count += 1 continue downloader = main_helper.downloader(r, download_path, count) if not downloader: count += 1 continue main_helper.format_image(download_path, timestamp) log_download.info("Link: {}".format(link)) log_download.info("Path: {}".format(download_path)) break return return_bool
def download(self, post: format_content.post_item, api): return_bool = True for media in post.medias: if not overwrite_files and media.downloaded: continue count = 0 session = media.session if not session: continue while count < 11: links = media.links def choose_link(session, links): for link in links: r = api.json_request(link, session, "HEAD", stream=True, json_format=False) if not isinstance(r, requests.Response): continue header = r.headers content_length = header.get('content-length') if not content_length: continue content_length = int(content_length) return [link, content_length] result = choose_link(session, links) if not result: count += 1 continue link = result[0] content_length = result[1] media.size = content_length date_object = datetime.strptime( post.postedAt, "%d-%m-%Y %H:%M:%S") download_path = os.path.join( media.directory, media.filename) timestamp = date_object.timestamp() if not overwrite_files: if main_helper.check_for_dupe_file(download_path, content_length): main_helper.format_image(download_path, timestamp) return_bool = False media.downloaded = True break r = api.json_request( link, session, stream=True, json_format=False) if not isinstance(r, requests.Response): return_bool = False count += 1 continue downloader = main_helper.downloader(r, download_path, count) if not downloader: count += 1 continue main_helper.format_image(download_path, timestamp) log_download.info("Link: {}".format(link)) log_download.info("Path: {}".format(download_path)) media.downloaded = True break return return_bool
def profile_scraper(api: start, site_name, api_type, username, text_length, base_directory): reformats = {} reformats["metadata_directory_format"] = json_settings[ "metadata_directory_format"] reformats["file_directory_format"] = json_settings["file_directory_format"] reformats["file_directory_format"] = reformats[ "file_directory_format"].replace("{value}", "") reformats["filename_format"] = json_settings["filename_format"] option = {} option["site_name"] = site_name option["api_type"] = api_type option["username"] = username option["date_format"] = date_format option["maximum_length"] = text_length option["directory"] = base_directory a, b, c = prepare_reformat(option, keep_vars=True).reformat(reformats) print y = api.get_subscription(identifier=username) override_media_types = [] avatar = y.avatar header = y.header if avatar: override_media_types.append(["Avatars", avatar]) if header: override_media_types.append(["Headers", header]) for override_media_type in override_media_types: new_dict = dict() media_type = override_media_type[0] media_link = override_media_type[1] new_dict["links"] = [media_link] directory2 = os.path.join(b, media_type) os.makedirs(directory2, exist_ok=True) download_path = os.path.join(directory2, media_link.split("/")[-2] + ".jpg") if not overwrite_files: if os.path.isfile(download_path): continue session = api.sessions[0] r = api.json_request(media_link, session, stream=True, json_format=False, sleep=False) if not isinstance(r, requests.Response): continue while True: downloader = main_helper.downloader(r, download_path) if not downloader: continue break