def legacy_metadata_fixer(formatted_directories: dict, api: object) -> media_types: legacy_metadatas = formatted_directories["legacy_metadatas"] new_metadata_directory = formatted_directories["metadata_directory"] old_metadata_directory = os.path.dirname( legacy_metadatas["legacy_metadata"]) metadata_name = os.path.basename(f"{old_metadata_directory}.json") q = [] for key, legacy_directory in legacy_metadatas.items(): if legacy_directory == formatted_directories["metadata_directory"]: continue if os.path.exists(legacy_directory): folders = os.listdir(legacy_directory) metadata_names = media_types() metadata_names = [f"{k}.json" for k, v in metadata_names] print type_one_files = main_helper.remove_mandatory_files( folders, keep=metadata_names) new_format = [] for type_one_file in type_one_files: legacy_metadata_path = os.path.join(legacy_directory, type_one_file) legacy_metadata = import_archive(legacy_metadata_path) if "type" not in legacy_metadata: legacy_type_key = type_one_file.removesuffix(".json") legacy_metadata["type"] = legacy_type_key print for key, status in legacy_metadata.items(): if key == "type": continue status.sort(key=lambda x: x["post_id"], reverse=False) legacy_metadata[key] = [ list(g) for k, g in groupby(status, key=lambda x: x["post_id"]) ] status = legacy_metadata[key] new_format.append(legacy_metadata) old_metadata_object = prepare_metadata(new_format, api=api).metadata if legacy_directory != new_metadata_directory: import_path = os.path.join(legacy_directory, metadata_name) new_metadata_set = import_archive(import_path) if new_metadata_set: new_metadata_object2 = prepare_metadata(new_metadata_set, api=api).metadata print old_metadata_object = compare_metadata( new_metadata_object2, old_metadata_object) print q.append(old_metadata_object) print print results = media_types() for merge_into in q: print results = compare_metadata(results, merge_into) print print return results
def paid_content_scraper(api): paid_contents = api.get_paid_content(refresh=False) results = [] for paid_content in paid_contents: author = paid_content.get("author") author = paid_content.get("fromUser", author) subscription = create_subscription(author) subscription.sessions = api.sessions subscription.download_info["directory"] = j_directory username = subscription.username model_directory = os.path.join(j_directory, username) metadata_folder = os.path.join(model_directory, "Metadata") api_type = paid_content["responseType"].capitalize()+"s" metadata_path = os.path.join( metadata_folder, api_type+".json") site_name = "OnlyFans" media_type = format_media_types() formatted_directories = main_helper.format_directories( j_directory, site_name, username, media_type, api_type) new_item = media_scraper([paid_content], api, formatted_directories, username, api_type) for directory in new_item["directories"]: os.makedirs(directory, exist_ok=True) download_metadata = prepare_metadata(new_item).metadata subscription.set_scraped(api_type, download_metadata) metadata = prepare_metadata(new_item, export=True).metadata metadata = jsonpickle.encode( metadata, unpicklable=False) new_metadata = jsonpickle.decode(metadata) old_metadata = import_archive(metadata_path) if old_metadata: old_metadata = metadata_fixer(directory=metadata_path.replace( ".json", ""), metadata_types=old_metadata) unrefined = compare_metadata( new_metadata, old_metadata, new_chain=True) unrefined = prepare_metadata(unrefined, export=True).metadata new_metadata = jsonpickle.encode( unrefined, unpicklable=False) new_metadata = jsonpickle.decode(new_metadata) results.append(new_metadata) os.makedirs(model_directory, exist_ok=True) a = export_archive(new_metadata, metadata_path, json_settings) x = download_media(api, subscription) return results
def process_metadata(api: start, new_metadata, formatted_directories, subscription, api_type, api_path, archive_path, site_name): legacy_metadata_object = legacy_metadata_fixer(formatted_directories, api) new_metadata_object = prepare_metadata(new_metadata, api=api).metadata new_metadata_object = compare_metadata(new_metadata_object, legacy_metadata_object) old_metadata_set = import_archive(archive_path) old_metadata_object = prepare_metadata(old_metadata_set, api=api).metadata new_metadata_object = compare_metadata(new_metadata_object, old_metadata_object) if not subscription.download_info: subscription.download_info["metadata_locations"] = {} subscription.download_info["directory"] = j_directory subscription.download_info["webhook"] = webhook subscription.download_info["metadata_locations"][api_type] = archive_path subscription.set_scraped(api_type, new_metadata_object) new_metadata_object = ofrenamer.start(subscription, api_type, api_path, site_name, json_settings) subscription.set_scraped(api_type, new_metadata_object) return new_metadata_object
def legacy_metadata_fixer(legacy_directory, new_metadata): if os.path.exists(legacy_directory): folders = os.listdir(legacy_directory) new_format = [] for folder in (x for x in folders if "desktop.ini" not in folders): legacy_metadata_path = os.path.join(legacy_directory, folder) metadata_type = import_archive(legacy_metadata_path) valid = metadata_type["valid"] valid.sort(key=lambda x: x["post_id"], reverse=False) metadata_type["valid"] = [list(g) for k, g in groupby( valid, key=lambda x: x["post_id"])] new_format.append(metadata_type) old_metadata = metadata_fixer(metadata_types=new_format, export=False) old_metadata = prepare_metadata(old_metadata).metadata old_metadata = jsonpickle.encode(old_metadata, unpicklable=False) old_metadata = jsonpickle.decode(old_metadata) new_metadata = compare_metadata( new_metadata, old_metadata, new_chain=True) new_metadata = prepare_metadata(new_metadata).metadata new_metadata = jsonpickle.encode(new_metadata, unpicklable=False) new_metadata = jsonpickle.decode(new_metadata) return new_metadata
def start(metadata_filepath, json_settings): metadatas = json.load(open(metadata_filepath)) metadatas2 = prepare_metadata(metadatas).items username = os.path.basename(up(up(metadata_filepath))) site_name = os.path.basename(up(up(up(metadata_filepath)))) for metadata in metadatas2: metadata.valid = fix_metadata(metadata.valid, json_settings, username, site_name) metadata.invalid = fix_metadata(metadata.invalid, json_settings, username, site_name) metadatas2 = json.loads( json.dumps(metadatas2, default=lambda o: o.__dict__)) if metadatas != metadatas2: main_helper.update_metadata(metadata_filepath, metadatas2) return metadatas2
def start(metadata_filepath, json_settings): if os.path.getsize(metadata_filepath) > 0: metadatas = json.load(open(metadata_filepath, encoding='utf-8')) metadatas2 = prepare_metadata(metadatas).items model_path = up(up(metadata_filepath)) username = os.path.basename(model_path) site_name = os.path.basename(up(up(up(metadata_filepath)))) metadata_filename = os.path.basename(metadata_filepath) name = metadata_filename.split(".")[0] for metadata in metadatas2: category = os.path.join(name, metadata.type) metadata.valid = fix_metadata( metadata.valid, json_settings, username, site_name, category) metadata.invalid = fix_metadata( metadata.invalid, json_settings, username, site_name, category) metadatas2 = json.loads(json.dumps( metadatas2, default=lambda o: o.__dict__)) if metadatas != metadatas2: main_helper.update_metadata(metadata_filepath, metadatas2) return metadatas2
def start(metadata_filepath, json_settings): if os.path.getsize(metadata_filepath) > 0: metadatas = main_helper.import_archive(metadata_filepath) metadatas2 = prepare_metadata(metadatas).metadata model_path = up(up(metadata_filepath)) username = os.path.basename(model_path) site_name = os.path.basename(up(up(up(metadata_filepath)))) metadata_filename = os.path.basename(metadata_filepath) name = metadata_filename.split(".")[0] for key, metadata in metadatas2.items(): if key == "Texts": continue category = os.path.join(name, key) metadata.valid = fix_metadata(metadata.valid, json_settings, username, site_name, category) metadata.invalid = fix_metadata(metadata.invalid, json_settings, username, site_name, category) metadatas2 = json.loads( json.dumps(metadatas2, default=lambda o: o.__dict__)) if metadatas != metadatas2: main_helper.update_metadata(metadata_filepath, metadatas2) return metadatas2
def prepare_scraper(api, site_name, item): authed = api.auth sessions = api.sessions api_type = item["api_type"] api_array = item["api_array"] link = api_array["api_link"] subscription = api_array["subscription"] locations = api_array["media_types"] username = api_array["username"] directory = api_array["directory"] api_count = api_array["post_count"] master_set = [] media_set = [] metadata_set = [] pool = multiprocessing() formatted_directories = main_helper.format_directories( j_directory, site_name, username, locations, api_type) model_directory = formatted_directories["model_directory"] api_directory = formatted_directories["api_directory"] metadata_directory = formatted_directories["metadata_directory"] archive_directory = os.path.join(metadata_directory, api_type) archive_path = archive_directory+".json" imported = import_archive(archive_path) legacy_metadata_directory = os.path.join(api_directory, "Metadata") if api_type == "Profile": profile_scraper(api, directory, username) return if api_type == "Stories": master_set = subscription.get_stories() highlights = subscription.get_highlights() valid_highlights = [] for highlight in highlights: highlight = subscription.get_highlights( hightlight_id=highlight["id"]) valid_highlights.append(highlight) master_set.extend(valid_highlights) print if api_type == "Posts": master_set = subscription.get_posts() if api_type == "Archived": master_set = subscription.get_archived(api) if api_type == "Messages": unrefined_set = subscription.get_messages() if "list" in unrefined_set: unrefined_set = unrefined_set["list"] if subscription.is_me: mass_messages = authed["mass_messages"] unrefined_set2 = process_mass_message(api, subscription, metadata_directory, mass_messages) unrefined_set += unrefined_set2 print master_set = [unrefined_set] master_set2 = master_set parent_type = "" if "Archived" == api_type: unrefined_set = [] for master_set3 in master_set2: parent_type = master_set3["type"] results = master_set3["results"] unrefined_result = pool.starmap(media_scraper, product( results, [api], [formatted_directories], [username], [api_type], [parent_type])) unrefined_set.append(unrefined_result) unrefined_set = list(chain(*unrefined_set)) else: unrefined_set = pool.starmap(media_scraper, product( master_set2, [api], [formatted_directories], [username], [api_type], [parent_type])) unrefined_set = [x for x in unrefined_set] metadata_set = main_helper.format_media_set(unrefined_set) if not metadata_set: print("No "+api_type+" Found.") delattr(subscription.scraped, api_type) if metadata_set: if export_metadata: os.makedirs(metadata_directory, exist_ok=True) old_metadata = metadata_fixer(archive_directory) old_metadata_set = prepare_metadata(old_metadata).metadata old_metadata_set2 = jsonpickle.encode( old_metadata_set, unpicklable=False) old_metadata_set2 = jsonpickle.decode(old_metadata_set2) metadata_set = compare_metadata(metadata_set, old_metadata_set2) metadata_set = prepare_metadata(metadata_set).metadata metadata_set2 = jsonpickle.encode(metadata_set, unpicklable=False) metadata_set2 = jsonpickle.decode(metadata_set2) metadata_set2 = main_helper.filter_metadata(metadata_set2) metadata_set2 = legacy_metadata_fixer( legacy_metadata_directory, metadata_set2) main_helper.export_archive( metadata_set2, archive_directory, json_settings, legacy_directory=legacy_metadata_directory) else: metadata_set = prepare_metadata(metadata_set).metadata subscription = api.get_subscription(username) subscription.set_scraped(api_type, metadata_set) return [subscription.scraped]