Example #1
0
from .permissions import *
from .serializers import *
from .utils import get_medcat, add_annotations, remove_annotations, train_medcat, create_annotation

# For local testing, put envs
"""
from environs import Env
env = Env()
env.read_env("/home/ubuntu/projects/MedAnno/MedAnno/env_umls", recurse=False)
print(os.environ)
"""

from medcat.utils.helpers import prepare_name
from medcat.utils.loggers import basic_logger
log = basic_logger("api.views")

# Maps between IDs and objects
CDB_MAP = {}
VOCAB_MAP = {}
CAT_MAP = {}

# Get the basic version of MedCAT
cat = None


def index(request):
    return render(request, 'index.html')


class UserViewSet(viewsets.ModelViewSet):
Example #2
0
from spacy.tokens import Span
import numpy as np
import operator
from medcat.utils.loggers import basic_logger
from medcat.utils.matutils import unitvec
import os
log = basic_logger("spacycat")

# IF UMLS it includes specific rules that are only good for the Full UMLS version
if os.getenv('TYPE', 'other').lower() == 'umls':
    log.info("Using cat_ann for annotations")
    from medcat.cat_ann import CatAnn
else:
    log.info("Using basic_cat_ann for annotations")
    from medcat.basic_cat_ann import CatAnn



class SpacyCat(object):
    """ A Spacy pipe module, can be easily added into a spacey pipline

    cdb:  the cdb object of class cat.cdb representing the concepts
    vocab:  vocab object of class cat.utils.vocab with vector representations
    train:  should the training be performed or not, if training is False
            the disambiguation using vectors will be performed. While training is True
            it will not be performed
    """
    DEBUG = os.getenv('DEBUG', "false").lower() == 'true'
    CNTX_SPAN = int(os.getenv('CNTX_SPAN', 7))
    CNTX_SPAN_SHORT = int(os.getenv('CNTX_SPAN_SHORT', 2))
    MIN_CUI_COUNT = int(os.getenv('MIN_CUI_COUNT', 10000))
Example #3
0
import json
import os

from django.db.models.signals import post_save
from django.dispatch import receiver

from .models import Entity, AnnotatedEntity, Concept, ICDCode, OPCSCode, ProjectAnnotateEntities
from medcat.cdb import CDB
from medcat.utils.vocab import Vocab
from medcat.cat import CAT
from medcat.utils.loggers import basic_logger
log = basic_logger("api.utils")


def remove_annotations(document, project, partial=False):
    try:
        if partial:
            # Removes only the ones that are not validated
            AnnotatedEntity.objects.filter(project=project,
                                           document=document,
                                           validated=False).delete()
            log.debug(f"Unvalidated Annotations removed for:{document.id}")
        else:
            # Removes everything
            AnnotatedEntity.objects.filter(project=project, document=document).delete()
            log.debug(f"All Annotations removed for:{document.id}")
    except Exception as e:
        log.debug(f"Something went wrong: {e}")


def add_annotations(spacy_doc, user, project, document, cdb, existing_annotations, tuis=[], cuis=[]):
Example #4
0
from functools import partial
from multiprocessing import Process, Manager, Queue, Pool, Array
from medcat.cdb import CDB
from medcat.spacy_cat import SpacyCat
from medcat.preprocessing.tokenizers import spacy_split_all
from medcat.utils.spelling import CustomSpellChecker
from medcat.utils.spacy_pipe import SpacyPipe
from medcat.preprocessing.cleaners import spacy_tag_punct
from medcat.utils.helpers import get_all_from_name, tkn_inds_from_doc
from medcat.utils.loggers import basic_logger
from medcat.utils.data_utils import make_mc_train_test
import time
import sys, traceback
from tqdm.autonotebook import tqdm

log = basic_logger("CAT")


class CAT(object):
    r'''
    The main MedCAT class used to annotate documents, it is built on top of spaCy
    and works as a spaCy pipline. Creates an instance of a spaCy pipline that can
    be used as a spacy nlp model.

    Args:
        cdb (medcat.cdb.CDB):
            The concept database that will be used for NER+L
        vocab (medcat.utils.vocab.Vocab, optional):
            Vocabulary used for vector embeddings and spelling. Default: None
        skip_stopwords (bool):
            If True the stopwords will be ignored and not detected in the pipeline.
Example #5
0
from spacy.tokens import Span
import numpy as np
import operator
from medcat.utils.loggers import basic_logger
from medcat.utils.matutils import unitvec
import os
log = basic_logger("cat_spacycat")

# IF UMLS it includes specific rules that are only good for the Full UMLS version
if os.getenv('TYPE', 'other').lower() == 'umls':
    log.info("Using cat_ann for annotations")
    from medcat.cat_ann import CatAnn
else:
    log.info("Using basic_cat_ann for annotations")
    from medcat.basic_cat_ann import CatAnn


class SpacyCat(object):
    """ A Spacy pipe module, can be easily added into a spacey pipline

    cdb:  the cdb object of class cat.cdb representing the concepts
    vocab:  vocab object of class cat.utils.vocab with vector representations
    train:  should the training be performed or not, if training is False
            the disambiguation using vectors will be performed. While training is True
            it will not be performed
    """
    DEBUG = os.getenv('DEBUG', "false").lower() == 'true'
    NORM_EMB = os.getenv(
        'NORM_EMB', "false").lower() == 'true'  # Should we normalize the w2v
    PREFER_FREQUENT = os.getenv('PREFER_FREQUENT', "false").lower() == 'true'
    PREFER_CONCEPTS_WITH = os.getenv('PREFER_CONCEPTS_WITH', None)
Example #6
0
from sklearn.model_selection import train_test_split
import numpy as np
from medcat.utils.models import LSTM as MODEL
from sklearn.metrics import classification_report, f1_score, confusion_matrix, precision_score, recall_score
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

from medcat.utils.loggers import basic_logger
log = basic_logger("utils")


def get_batch(ind, batch_size, x, y, cpos, device):
    # Get the start/end index for this batch
    start = ind * batch_size
    end = (ind+1) * batch_size

    # Get the batch
    x_batch = x[start:end]
    y_batch = y[start:end]
    c_batch = cpos[start:end]

    # Return and move the batches to the right device
    return x_batch.to(device), y_batch.to(device), c_batch.to(device)


def train_network(net, data, lr=0.01, test_size=0.1, max_seq_len=41, pad_id=30000, batch_size=100,
                  nepochs=20, device='cpu', save_dir='./meta_cat/', class_weights=None, ignore_cpos=False,
                  auto_save_model=True, score_average='weighted'):
    # Split data
Example #7
0
""" Representation class for CDB data
"""
import pickle
import numpy as np
from scipy.sparse import dok_matrix
#from gensim.matutils import unitvec
from medcat.utils.matutils import unitvec, sigmoid
from medcat.utils.attr_dict import AttrDict
from medcat.utils.loggers import basic_logger
import os
import pandas as pd

log = basic_logger("cdb")


class CDB(object):
    """ Holds all the CDB data required for annotation
    """
    MAX_COO_DICT_SIZE = int(os.getenv('MAX_COO_DICT_SIZE', 10000000))
    MIN_COO_COUNT = int(os.getenv('MIN_COO_COUNT', 100))

    def __init__(self):
        self.index2cui = []  # A list containing all CUIs
        self.cui2index = {}  # Map from cui to index in the index2cui list
        self.name2cui = {}  # Converts a normalized concept name to a cui
        self.name2cnt = {}  # Converts a normalized concept name to a count
        self.name_isunique = {}  # Should this name be skipped
        self.name2original_name = {}  # Holds the two versions of a name
        self.name2ntkns = {}  # Number of tokens for this name
        self.name_isupper = {}  # Checks was this name all upper case in cdb
        self.cui2desc = {}  # Map between a CUI and its cdb description