def get_resources_in_firesbase(db) -> Dict[Resource, str]:
    docs = db.collection(FIREBASE_COLLECTION).stream()
    firestore_resources = dict()
    for doc in docs:
        try:
            resource = Resource.from_dict(doc.to_dict())
        except KeyError:
            log(f"Document with id {doc.id} in Firestore has incorrect or missing fields"
                )
        firestore_resources[resource] = doc.id
    return firestore_resources
def remove_duplicates(docs):
    seen = set()
    total = removed = 0
    log("Delete duplicates:")
    for doc in docs:
        resource = Resource.from_dict(doc.to_dict())
        if resource in seen:
            doc.reference.delete()
            log(f"\tDocument with title \"{resource.title}\"")
            removed += 1
        else:
            seen.add(resource)
        total += 1
    log(f"Found and deleted {removed} duplicates from {total} documents.")
def remove_duplicates(docs):
    seen = set()
    total = removed = 0
    log("Delete duplicates:")
    for doc in docs:
        try:
            resource = Resource.from_dict(doc.to_dict())
            if resource in seen:
                doc.reference.delete()
                log(f"\tDocument with title \"{resource.title}\"")
                removed += 1
            else:
                seen.add(resource)
            total += 1
        except KeyError:
            log(f"Document with id {doc.id} has incorrect or missing fields")
    log(f"Found and deleted {removed} duplicates from {total} documents.")
Example #4
0
def get_tags_by_category(docs):
    category_tags = dict()
    log("Gathering tags by category...")
    for doc in docs:
        try:
            resource = Resource.from_dict(doc.to_dict())
            if resource.category in category_tags:
                category_tags[resource.category] |= set(resource.tags)
            else:
                category_tags[resource.category] = set(resource.tags)
        except KeyError:
            log(f"Document with id {doc.id} has incorrect or missing fields")

    num_tags = 0
    for category, tags in category_tags.items():
        category_tags[category] = list(tags)
        num_tags += len(tags)

    log(f"Found {len(category_tags)} categories and {num_tags} tags in total")
    return category_tags
def remove_duplicates(category_docs):
    seen = set()
    total = removed = 0
    log("Delete duplicates:")
    for category_doc in category_docs:
        category_doc.reference.update({"resource_list": set(category_doc.get("resource_list"))})
        category_doc.reference.update({"tag_list": set(category_doc.get("tag_list"))})
        for doc in category_doc.reference.collection("resources").stream():
            try:
                resource = Resource.from_dict(doc.to_dict())
                if resource in seen:
                    doc.reference.delete()
                    log(f"\tDocument with title \"{resource.title}\"")
                    removed += 1
                else:
                    seen.add(resource)
                total += 1
            except KeyError:
                log(f"Document with id {doc.id}  in {category_doc.id} has incorrect or missing fields")
    log(f"Found and deleted {removed} duplicates from {total} documents.")