def __init__(self, options_file: str, weight_file: str, requires_grad: bool = False) -> None: super(_ElmoCharacterEncoder, self).__init__() with open(cached_path(options_file), 'r') as fin: self._options = json.load(fin) self._weight_file = weight_file self.output_dim = self._options['lstm']['projection_dim'] self.requires_grad = requires_grad self._load_weights() ELMoCharacterMapper.set_max_word_length(self._options['char_cnn']['max_characters_per_token']) ELMoCharacterMapper.number_of_characters(self._options['char_cnn']['n_characters']) # Cache the arrays for use in forward -- +1 due to masking. -> No need for that anymore (velmo takes care of that) self._beginning_of_sentence_characters = torch.from_numpy( numpy.array(ELMoCharacterMapper.beginning_of_sentence_characters) #+ 1 ) self._end_of_sentence_characters = torch.from_numpy( numpy.array(ELMoCharacterMapper.end_of_sentence_characters) #+ 1 )
def get_batch(self, batch): # sent in batch in decreasing order of lengths # batch: (bsize, max_len, word_dim) embed = np.zeros((len(batch), len(batch[0]), 50)) for i in range(len(batch)): for j in range(len(batch[i])): embed[i, j, :] = ELMoCharacterMapper.convert_word_to_char_ids(batch[i][j]) return torch.LongTensor(embed)
def elmo_sent_mapper(sentence, max_length, pad_token="~"): word_list = [] for i in range(max_length): word = sentence[i] if i < len(sentence) else pad_token word_list.append(ELMoCharacterMapper.convert_word_to_char_ids(word)) return word_list
from allennlp.modules.elmo import Elmo from allennlp.data.token_indexers.elmo_indexer import ELMoCharacterMapper import torch import numpy as np a = torch.autograd.Variable( torch.LongTensor( np.reshape(ELMoCharacterMapper.convert_word_to_char_ids("hello"), [1, 1, 50]))) options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" elmo_instance = Elmo(options_file, weight_file, 1) print(elmo_instance(a))