Beispiel #1
0
def create_processor(conf):
    config = load_config()
    if conf.embeddings is None:
        conf.embeddings = Path(config['sentences.wordEmbeddings'])
    if conf.chars_file is None:
        conf.chars_file = Path(config['sentences.charsFile'])
    if conf.hparams_file is None:
        conf.hparams_file = Path(config['sentences.hparamsFile'])
    if conf.model_file is None:
        conf.model_file = Path(config['sentences.modelFile'])

    logger.info('Loading hparams from: {}'.format(conf.hparams_file))
    with conf.hparams_file.open('r') as f:
        d = yaml.load(f, Loader)

        class Hparams:
            pass

        hparams = Hparams()
        hparams.__dict__.update(d)
    logger.info('Loading word embeddings from: "{}"'.format(conf.embeddings))
    words, vectors = load_vectors(conf.embeddings)
    vectors = np.array(vectors)
    logger.info('Loading characters from: {}'.format(conf.chars_file))
    char_mapping = load_char_mapping(conf.chars_file)
    input_mapping = InputMapping(char_mapping, words, hparams.word_length)
    model = BiLSTM(hparams, n_chars(char_mapping), vectors)
    model = torch.jit.script(model)
    model.eval()
    logger.info('Loading model weights from: {}'.format(conf.model_file))
    with conf.model_file.open('rb') as f:
        state_dict = torch.load(f)
        model.load_state_dict(state_dict)
    proc = SentenceProcessor(input_mapping, model)
    return proc
def fixture_bi_lstm_model():
    check_data()
    config = load_config()
    conf = Namespace(embeddings=Path(config['sentences.wordEmbeddings']),
                     chars_file=Path(config['sentences.charsFile']),
                     hparams_file=Path(config['sentences.hparamsFile']),
                     model_file=Path(config['sentences.modelFile']))
    proc = create_processor(conf)
    yield proc
    proc.close()
Beispiel #3
0
def create_processor(conf):
    torch.set_num_threads(1)
    torch.set_num_interop_threads(1)
    logging.basicConfig(level=logging.INFO)
    check_data(conf.download_data)
    config = load_config()
    if conf.embeddings is None:
        conf.embeddings = Path(config['sentences.wordEmbeddings'])
    if conf.chars_file is None:
        conf.chars_file = Path(config['sentences.charsFile'])
    if conf.hparams_file is None:
        conf.hparams_file = Path(config['sentences.hparamsFile'])
    if conf.model_file is None:
        conf.model_file = Path(config['sentences.modelFile'])
    if conf.torch_device is not None:
        device = conf.torch_device
    else:
        device = "cpu" if conf.force_cpu or not torch.cuda.is_available(
        ) else "cuda"
    device = torch.device(device)
    logger.info('Using torch device: "{}"'.format(repr(device)))
    logger.info('Loading hparams from: {}'.format(conf.hparams_file))
    with conf.hparams_file.open('r') as f:
        d = yaml.load(f, Loader)

        class Hparams:
            pass

        hparams = Hparams()
        hparams.__dict__.update(d)
    logger.info('Loading word embeddings from: "{}"'.format(conf.embeddings))
    words, vectors = load_vectors(conf.embeddings)
    vectors = np.array(vectors)
    logger.info('Loading characters from: {}'.format(conf.chars_file))
    char_mapping = load_char_mapping(conf.chars_file)
    input_mapping = InputMapping(char_mapping, words, hparams.word_length)
    model = BiLSTM(hparams, n_chars(char_mapping), vectors)
    model.eval()
    model.to(device=device)
    model.share_memory()
    logger.info('Loading model weights from: {}'.format(conf.model_file))
    with conf.model_file.open('rb') as f:
        state_dict = torch.load(f)
        model.load_state_dict(state_dict)
    torch.multiprocessing.set_start_method('fork')
    processor = SentenceProcessor(input_mapping, model, device)
    return processor
Beispiel #4
0
def check_data(download=False):
    try:
        data = Path(os.environ['BIOMEDICUS_DATA'])
    except KeyError:
        data = Path.home() / '.biomedicus' / 'data'
        os.environ['BIOMEDICUS_DATA'] = str(data)

    config = load_config()
    download_url = config['data.data_url']
    data_version = config['data.version']
    if not data.exists() or (data / 'VERSION.txt').read_text() != data_version:
        print(
            'It looks like you do not have the set of models distributed for BioMedICUS.\n'
            'The models are available from our website (https://nlpie.umn.edu/downloads)\n'
            'and can be installed by specifying the environment variable BIOMEDICUS_DATA\n'
            'or by placing the extracted models in ~/.biomedicus/data')
        prompt = 'Would you like to download the model files to ~/.biomedicus/data (Y/N)? '
        if download or input(prompt) in ['Y', 'y', 'Yes', 'yes']:
            download_data_to(download_url, data)
        else:
            exit()
Beispiel #5
0
def check_data(download=False):
    try:
        data = Path(os.environ['BIOMEDICUS_DATA'])
    except KeyError:
        data = Path.home() / '.biomedicus' / 'data'
        os.environ['BIOMEDICUS_DATA'] = str(data)

    config = load_config()
    download_url = config['data.data_url']
    data_version = config['data.version']
    version_file = data / 'VERSION.txt'
    if not data.exists():
        print('No existing data folder.')
    elif not version_file.exists():
        print('No existing version file.')
    else:
        existing_version = version_file.read_text().strip()
        if existing_version != data_version:
            print('Data folder ({}) is not most recent version ({})'.format(
                existing_version, data_version))
        else:
            logger.info(
                'Data folder is up to date version {}'.format(data_version))
            return
    if not download:
        print(
            'It looks like you do not have the set of models distributed for BioMedICUS.\n'
            'The models are available from our website (https://nlpie.umn.edu/downloads)\n'
            'and can be installed by specifying the environment variable BIOMEDICUS_DATA\n'
            'or by placing the extracted models in ~/.biomedicus/data')
        prompt = 'Would you like to download the model files to {} (Y/N)? '.format(
            str(data))
        download = input(prompt) in ['Y', 'y', 'Yes', 'yes']
    if download:
        download_data_to(download_url, data)
    else:
        exit()
Beispiel #6
0
def test_load_default_config():
    from biomedicus import config
    c = config.load_config()
    assert isinstance(c, dict)