def legacy_metadata_fixer(formatted_directories: dict, api: object) -> media_types: legacy_metadatas = formatted_directories["legacy_metadatas"] new_metadata_directory = formatted_directories["metadata_directory"] old_metadata_directory = os.path.dirname( legacy_metadatas["legacy_metadata"]) metadata_name = os.path.basename(f"{old_metadata_directory}.json") q = [] for key, legacy_directory in legacy_metadatas.items(): if legacy_directory == formatted_directories["metadata_directory"]: continue if os.path.exists(legacy_directory): folders = os.listdir(legacy_directory) metadata_names = media_types() metadata_names = [f"{k}.json" for k, v in metadata_names] print type_one_files = main_helper.remove_mandatory_files( folders, keep=metadata_names) new_format = [] for type_one_file in type_one_files: legacy_metadata_path = os.path.join(legacy_directory, type_one_file) legacy_metadata = import_archive(legacy_metadata_path) if "type" not in legacy_metadata: legacy_type_key = type_one_file.removesuffix(".json") legacy_metadata["type"] = legacy_type_key print for key, status in legacy_metadata.items(): if key == "type": continue status.sort(key=lambda x: x["post_id"], reverse=False) legacy_metadata[key] = [ list(g) for k, g in groupby(status, key=lambda x: x["post_id"]) ] status = legacy_metadata[key] new_format.append(legacy_metadata) old_metadata_object = prepare_metadata(new_format, api=api).metadata if legacy_directory != new_metadata_directory: import_path = os.path.join(legacy_directory, metadata_name) new_metadata_set = import_archive(import_path) if new_metadata_set: new_metadata_object2 = prepare_metadata(new_metadata_set, api=api).metadata print old_metadata_object = compare_metadata( new_metadata_object2, old_metadata_object) print q.append(old_metadata_object) print print results = media_types() for merge_into in q: print results = compare_metadata(results, merge_into) print print return results
def fix_metadata(self, metadata, standard_format=False, api_type: str = "") -> dict: new_format = {} new_format["version"] = 1 new_format["content"] = {} if isinstance(metadata, list): version = 0.3 for m in metadata: new_format["content"] |= self.fix_metadata(m)["content"] print metadata = new_format else: version = metadata.get("version", None) if any(x for x in metadata if x in media_types().__dict__.keys()): standard_format = True print if not version and not standard_format and metadata: legacy_metadata = metadata media_type = legacy_metadata.get("type", None) if not media_type: version = 0.1 media_type = api_type if api_type else media_type else: version = 0.2 if version == 0.2: legacy_metadata.pop("type") new_format["content"][media_type] = {} for key, posts in legacy_metadata.items(): if all(isinstance(x, list) for x in posts): posts = list(chain(*posts)) new_format["content"][media_type][key] = posts print print elif standard_format: if any(x for x in metadata if x in media_types().__dict__.keys()): metadata.pop("directories", None) for key, status in metadata.items(): for key2, posts in status.items(): if all(x and isinstance(x, list) for x in posts): posts = list(chain(*posts)) metadata[key][key2] = posts print print print new_format["content"] = metadata print else: if global_version == version: new_format = metadata else: print print if "content" not in new_format: print return new_format
def __init__(self, authed=None, version=None, old_content: dict = {}, export=False, reformat=False, args={}): class assign_state(object): def __init__(self) -> None: self.valid = [] self.invalid = [] def __iter__(self): for attr, value in self.__dict__.items(): yield attr, value old_content.pop("directories", None) new_content = media_types(assign_states=assign_state) for key, new_item in new_content: old_item = old_content.get(key) if not old_item: continue for old_key, old_item2 in old_item.items(): new_posts = [] if global_version == version: posts = old_item2 for old_post in posts: post = self.post_item(old_post) new_medias = [] for media in post.medias: media["media_type"] = key media2 = self.media_item(media) new_medias.append(media2) post.medias = new_medias new_posts.append(post) print elif version == 1: old_item2.sort(key=lambda x: x["post_id"]) media_list = [ list(g) for k, g in groupby(old_item2, key=lambda x: x["post_id"]) ] for media_list2 in media_list: old_post = media_list2[0] post = self.post_item(old_post) for item in media_list2: item["media_type"] = key media = self.media_item(item) post.medias.append(media) new_posts.append(post) else: media_list = [] input("METADATA VERSION: INVALID") setattr(new_item, old_key, new_posts) self.content = new_content
def legacy_metadata_fixer(formatted_directories: dict, api: object) -> tuple[create_metadata, list]: delete_legacy_metadatas = [] legacy_metadatas = formatted_directories["legacy_metadatas"] new_metadata_directory = formatted_directories["metadata_directory"] old_metadata_directory = os.path.dirname( legacy_metadatas["legacy_metadata"]) metadata_name = os.path.basename(f"{old_metadata_directory}.json") q = [] for key, legacy_directory in legacy_metadatas.items(): if legacy_directory == formatted_directories["metadata_directory"]: continue if os.path.exists(legacy_directory): folders = os.listdir(legacy_directory) metadata_names = media_types() metadata_names = [f"{k}.json" for k, v in metadata_names] print type_one_files = main_helper.remove_mandatory_files( folders, keep=metadata_names) new_format = [] for type_one_file in type_one_files: api_type = type_one_file.removesuffix(".json") legacy_metadata_path = os.path.join( legacy_directory, type_one_file) legacy_metadata = import_archive(legacy_metadata_path) if legacy_metadata: delete_legacy_metadatas.append(legacy_metadata_path) legacy_metadata = create_metadata( api, legacy_metadata, api_type=api_type).convert() new_format.append(legacy_metadata) new_format = dict( merge({}, *new_format, strategy=Strategy.ADDITIVE)) old_metadata_object = create_metadata(api, new_format) if legacy_directory != new_metadata_directory: import_path = os.path.join(legacy_directory, metadata_name) new_metadata_set = import_archive( import_path) if new_metadata_set: new_metadata_object2 = create_metadata( api, new_metadata_set) old_metadata_object = compare_metadata( new_metadata_object2, old_metadata_object) q.append(old_metadata_object) print print results = create_metadata() for merge_into in q: print results = compare_metadata( results, merge_into) print print return results, delete_legacy_metadatas
def __init__(self, api=None, subscription=None) -> None: if api: username = subscription.username download_info = subscription.download_info if download_info: self.downloaded = True metadata_locations = download_info["metadata_locations"] directory = download_info["directory"] for parent_type, value in metadata_locations.items(): for api_type, metadata_path in value.items(): Session, engine = db_helper.create_database_session( metadata_path) database_session = Session() database_name = api_type.lower() db_collection = db_helper.database_collection() database = db_collection.chooser(database_name) api_table = database.api_table media_table = database.media_table result = database_session.query(media_table).all() media_type_list = media_types() for r in result: item = getattr(media_type_list, r.media_type) item.append(r) media_type_list = media_type_list.__dict__ for location, v in media_type_list.items(): if location == "Texts": continue media_set = v string = "Download Processing\n" string += f"Name: {username} | Type: {api_type} | Count: {len(media_set)} {location} | Directory: {directory}\n" print(string) pool = multiprocessing() pool.starmap(self.download, product( media_set, [api])) database_session.commit() else: self.downloaded = False