def get_terms(db_session, text: str) -> List[str]: """Get terms from request.""" all_terms = db_session.query(Term).filter(Term.discoverable == True).all() # noqa phrases = build_term_vocab([t.text for t in all_terms]) matcher = build_phrase_matcher("dispatch-terms", phrases) extracted_terms = extract_terms_from_text(text, matcher) return extracted_terms
def message_tagging( config: SlackConversationConfiguration, user_id: str, user_email: str, channel_id: str, incident_id: int, event: EventEnvelope = None, db_session=None, slack_client=None, ): """Looks for incident tags in incident messages.""" text = event.event.text incident = incident_service.get(db_session=db_session, incident_id=incident_id) tags = tag_service.get_all(db_session=db_session, project_id=incident.project.id).all() tag_strings = [t.name.lower() for t in tags if t.discoverable] phrases = build_term_vocab(tag_strings) matcher = build_phrase_matcher("dispatch-tag", phrases) extracted_tags = list(set(extract_terms_from_text(text, matcher))) matched_tags = (db_session.query(Tag).filter( func.upper(Tag.name).in_([func.upper(t) for t in extracted_tags])).all()) incident.tags.extend(matched_tags) db_session.commit()
def sync_document_terms(db_session: SessionLocal, project: Project): """Performs term extraction from known documents.""" p = plugin_service.get_active_instance( db_session=db_session, plugin_type="storage", project_id=project.id ) if not p: log.debug("Tried to sync document terms but couldn't find any active storage plugins.") return terms = term_service.get_all(db_session=db_session, project_id=project.id).all() log.debug(f"Fetched {len(terms)} terms from database.") term_strings = [t.text.lower() for t in terms if t.discoverable] phrases = build_term_vocab(term_strings) matcher = build_phrase_matcher("dispatch-term", phrases) documents = get_all(db_session=db_session) for doc in documents: log.debug(f"Processing document. Name: {doc.name}") try: if "sheet" in doc.resource_type: mime_type = "text/csv" else: mime_type = "text/plain" doc_text = p.instance.get(doc.resource_id, mime_type) extracted_terms = list(set(extract_terms_from_text(doc_text, matcher))) matched_terms = ( db_session.query(Term) .filter(func.upper(Term.text).in_([func.upper(t) for t in extracted_terms])) .all() ) log.debug(f"Extracted the following terms from {doc.weblink}. Terms: {extracted_terms}") if matched_terms: doc.terms = matched_terms db_session.commit() except Exception as e: # even if one document fails we don't want them to all fail log.exception(e)
def auto_tagger(db_session): """Attempts to take existing tags and associate them with incidents.""" for project in project_service.get_all(db_session=db_session): tags = tag_service.get_all(db_session=db_session, project_id=project.id).all() log.debug(f"Fetched {len(tags)} tags from database.") tag_strings = [t.name.lower() for t in tags if t.discoverable] phrases = build_term_vocab(tag_strings) matcher = build_phrase_matcher("dispatch-tag", phrases) for incident in get_all(db_session=db_session, project_id=project.id).all(): plugin = plugin_service.get_active_instance( db_session=db_session, project_id=incident.project.id, plugin_type="storage") log.debug(f"Processing incident. Name: {incident.name}") doc = incident.incident_document if doc: try: mime_type = "text/plain" text = plugin.instance.get(doc.resource_id, mime_type) except Exception as e: log.debug(f"Failed to get document. Reason: {e}") log.exception(e) continue extracted_tags = list( set(extract_terms_from_text(text, matcher))) matched_tags = (db_session.query(Tag).filter( func.upper(Tag.name).in_( [func.upper(t) for t in extracted_tags])).all()) incident.tags.extend(matched_tags) db_session.commit() log.debug( f"Associating tags with incident. Incident: {incident.name}, Tags: {extracted_tags}" )
def auto_tagger(db_session): """Attempts to take existing tags and associate them with locations.""" tags = tag_service.get_all(db_session=db_session).all() log.debug(f"Fetched {len(tags)} tags from database.") tag_strings = [t.name.lower() for t in tags if t.discoverable] phrases = build_term_vocab(tag_strings) matcher = build_phrase_matcher("dispatch-tag", phrases) p = plugins.get( INCIDENT_PLUGIN_STORAGE_SLUG ) # this may need to be refactored if we support multiple document types for location in get_all(db_session=db_session).all(): log.debug(f"Processing location. Name: {location.name}") doc = location.location_document try: mime_type = "text/plain" text = p.get(doc.resource_id, mime_type) except Exception as e: log.debug(f"Failed to get document. Reason: {e}") sentry_sdk.capture_exception(e) continue extracted_tags = list(set(extract_terms_from_text(text, matcher))) matched_tags = ( db_session.query(Tag) .filter(func.upper(Tag.name).in_([func.upper(t) for t in extracted_tags])) .all() ) location.tags.extend(matched_tags) db_session.commit() log.debug( f"Associating tags with location. Location: {location.name}, Tags: {extracted_tags}" )