def get_resources_in_firesbase(db) -> Dict[Resource, str]: docs = db.collection(FIREBASE_COLLECTION).stream() firestore_resources = dict() for doc in docs: try: resource = Resource.from_dict(doc.to_dict()) except KeyError: log(f"Document with id {doc.id} in Firestore has incorrect or missing fields" ) firestore_resources[resource] = doc.id return firestore_resources
def remove_duplicates(docs): seen = set() total = removed = 0 log("Delete duplicates:") for doc in docs: resource = Resource.from_dict(doc.to_dict()) if resource in seen: doc.reference.delete() log(f"\tDocument with title \"{resource.title}\"") removed += 1 else: seen.add(resource) total += 1 log(f"Found and deleted {removed} duplicates from {total} documents.")
def remove_duplicates(docs): seen = set() total = removed = 0 log("Delete duplicates:") for doc in docs: try: resource = Resource.from_dict(doc.to_dict()) if resource in seen: doc.reference.delete() log(f"\tDocument with title \"{resource.title}\"") removed += 1 else: seen.add(resource) total += 1 except KeyError: log(f"Document with id {doc.id} has incorrect or missing fields") log(f"Found and deleted {removed} duplicates from {total} documents.")
def get_tags_by_category(docs): category_tags = dict() log("Gathering tags by category...") for doc in docs: try: resource = Resource.from_dict(doc.to_dict()) if resource.category in category_tags: category_tags[resource.category] |= set(resource.tags) else: category_tags[resource.category] = set(resource.tags) except KeyError: log(f"Document with id {doc.id} has incorrect or missing fields") num_tags = 0 for category, tags in category_tags.items(): category_tags[category] = list(tags) num_tags += len(tags) log(f"Found {len(category_tags)} categories and {num_tags} tags in total") return category_tags
def remove_duplicates(category_docs): seen = set() total = removed = 0 log("Delete duplicates:") for category_doc in category_docs: category_doc.reference.update({"resource_list": set(category_doc.get("resource_list"))}) category_doc.reference.update({"tag_list": set(category_doc.get("tag_list"))}) for doc in category_doc.reference.collection("resources").stream(): try: resource = Resource.from_dict(doc.to_dict()) if resource in seen: doc.reference.delete() log(f"\tDocument with title \"{resource.title}\"") removed += 1 else: seen.add(resource) total += 1 except KeyError: log(f"Document with id {doc.id} in {category_doc.id} has incorrect or missing fields") log(f"Found and deleted {removed} duplicates from {total} documents.")