Esempio n. 1
0
def sync_document_terms(db_session: SessionLocal, project: Project):
    """Performs term extraction from known documents."""
    p = plugin_service.get_active_instance(
        db_session=db_session, plugin_type="storage", project_id=project.id
    )

    if not p:
        log.debug("Tried to sync document terms but couldn't find any active storage plugins.")
        return

    terms = term_service.get_all(db_session=db_session, project_id=project.id).all()
    log.debug(f"Fetched {len(terms)} terms from database.")

    term_strings = [t.text.lower() for t in terms if t.discoverable]
    phrases = build_term_vocab(term_strings)
    matcher = build_phrase_matcher("dispatch-term", phrases)

    documents = get_all(db_session=db_session)
    for doc in documents:
        log.debug(f"Processing document. Name: {doc.name}")

        try:
            if "sheet" in doc.resource_type:
                mime_type = "text/csv"
            else:
                mime_type = "text/plain"

            doc_text = p.instance.get(doc.resource_id, mime_type)
            extracted_terms = list(set(extract_terms_from_text(doc_text, matcher)))

            matched_terms = (
                db_session.query(Term)
                .filter(func.upper(Term.text).in_([func.upper(t) for t in extracted_terms]))
                .all()
            )

            log.debug(f"Extracted the following terms from {doc.weblink}. Terms: {extracted_terms}")

            if matched_terms:
                doc.terms = matched_terms
                db_session.commit()

        except Exception as e:
            # even if one document fails we don't want them to all fail
            log.exception(e)
Esempio n. 2
0
def auto_tagger(db_session: SessionLocal, project: Project):
    """Attempts to take existing tags and associate them with incidents."""
    tags = tag_service.get_all(db_session=db_session, project_id=project.id).all()
    log.debug(f"Fetched {len(tags)} tags from database.")

    tag_strings = [t.name.lower() for t in tags if t.discoverable]
    phrases = build_term_vocab(tag_strings)
    matcher = build_phrase_matcher("dispatch-tag", phrases)

    for incident in get_all(db_session=db_session, project_id=project.id).all():
        plugin = plugin_service.get_active_instance(
            db_session=db_session, project_id=incident.project.id, plugin_type="storage"
        )

        log.debug(f"Processing incident. Name: {incident.name}")

        doc = incident.incident_document

        if doc:
            try:
                mime_type = "text/plain"
                text = plugin.instance.get(doc.resource_id, mime_type)
            except Exception as e:
                log.debug(f"Failed to get document. Reason: {e}")
                log.exception(e)
                continue

            extracted_tags = list(set(extract_terms_from_text(text, matcher)))

            matched_tags = (
                db_session.query(Tag)
                .filter(func.upper(Tag.name).in_([func.upper(t) for t in extracted_tags]))
                .all()
            )

            incident.tags.extend(matched_tags)
            db_session.commit()

            log.debug(
                f"Associating tags with incident. Incident: {incident.name}, Tags: {extracted_tags}"
            )
Esempio n. 3
0
def list_tasks(client: Any, file_id: str):
    """Returns all tasks in file."""
    doc = get_file(client, file_id)

    document_meta = {"document": {"id": file_id, "name": doc["name"]}}

    all_comments = list_comments(client, file_id)
    task_comments = filter_comments(all_comments)

    tasks = []
    for t in task_comments:
        status = get_task_status(t)
        assignees = [{
            "individual": {
                "email": x
            }
        } for x in get_assignees(t["content"])]
        description = t.get("quotedFileContent", {}).get("value", "")
        tickets = get_tickets(t["replies"])

        task_meta = {
            "task": {
                "resource_id":
                t["id"],
                "description":
                description,
                "created_at":
                t["createdTime"],
                "assignees":
                assignees,
                "tickets":
                tickets,
                "weblink":
                f'https://docs.google.com/a/{GOOGLE_DOMAIN}/document/d/{file_id}/edit?disco={t["id"]}',
            }
        }

        # this is a dirty hack because google doesn't return emailAddresses for comments
        # complete with conflicting docs
        # https://developers.google.com/drive/api/v2/reference/comments#resource
        from dispatch.database.core import SessionLocal
        from dispatch.individual.models import IndividualContact

        db_session = SessionLocal()
        owner = (db_session.query(IndividualContact).filter(
            IndividualContact.name == t["author"]["displayName"]).first())

        if owner:
            task_meta["task"].update(
                {"owner": {
                    "individual": {
                        "email": owner.email
                    }
                }})

        db_session.close()

        task_meta["task"].update(status)

        tasks.append({**document_meta, **task_meta})

    return tasks