def get_elmo(options_file, weight_file, gpu, dropout): global elmo # Create the ELMo class. This example computes two output representation # layers each with separate layer weights. # We recommend adding dropout (50% is good default) either here or elsewhere # where ELMo is used (e.g. in the next layer bi-LSTM). elmo = Elmo(options_file, weight_file, num_output_representations=2, do_layer_norm=False, dropout=dropout) if gpu: elmo.cuda()
class ElmoEmbedding: def __init__(self, dim): if dim == 2048: options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" elif dim == 512: options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5" self.dim = dim self.elmo = Elmo(options_file, weight_file, 2, dropout=0) if func.gpu_available(): self.elmo = self.elmo.cuda() self.elmo.eval() self.load() def save(self): pass def load(self): self.cache = DiskDict(f'./generate/elmo.{self.dim}.cache') def convert(self, sentences): not_hit = set() for sent in sentences: key = self.make_key(sent) if key not in self.cache: not_hit.add(key) not_hit = list(not_hit) if not_hit: embeddings, masks = self.convert_impl([self.make_sentence(key) for key in not_hit]) for key, embedding, mask in zip(not_hit, torch.unbind(embeddings), torch.unbind(masks)): embedding = embedding[:mask.sum()] self.cache[key] = embedding.tolist() embeddings = [func.tensor(self.cache[self.make_key(sent)]) for sent in sentences] mlen = max([e.shape[0] for e in embeddings]) embeddings = [func.pad_zeros(e, mlen, 0) for e in embeddings] embeddings = torch.stack(embeddings) assert embeddings.requires_grad == False return embeddings def make_key(self, sent): return '$$'.join(sent) def make_sentence(self, key): return key.split('$$') def convert_impl(self, sentences): character_ids = func.tensor(batch_to_ids(sentences)) m = self.elmo(character_ids) embeddings = m['elmo_representations'] embeddings = torch.cat(embeddings, -1) mask = m['mask'] return embeddings, mask
def load_elmo(opt): options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" elmo = Elmo(options_file, weight_file, 3, dropout=0, requires_grad=False) # by default all 3 layers are output if opt.gpuid != -1: elmo = elmo.cuda() return elmo
def main(args): # Set random seeds for reproducibility torch.manual_seed(args.seed) random.seed(args.seed) with open(args.train_path, "rb") as file: train_summaries = pickle.load(file, encoding='utf-8') # with open(args.valid_path, "rb") as file: # valid_summaries = pickle.load(file, encoding='utf-8') for summary in train_summaries: convert_document(summary) # for summary in valid_summaries: # convert_document(summary) elmo_instance = Elmo(options_url, weights_url, 1) if use_cuda: elmo_instance.cuda() begin = timer() total_answers = 0 for summary in train_summaries[:]: answers = [[elmo_tokenize(word) for word in answer] * 2 for answer in summary.answers] answers = pad_elmo(answers) answers = answers * 2 batch = variable(torch.LongTensor(answers)) a = elmo_instance(batch) total_answers += len(answers) end = timer() print("Total time elapsed: {}".format(end - begin)) print("Time per thousand answers: {}".format( (end - begin) * 1000 / total_answers))
class ELMoEmbeddingInputModule(OnlineInputModule[MCAnnotation]): def setup(self): print("Setting up Elmo Embedding") self.vocab = self.shared_resources self.config = self.shared_resources.config self.embeddings = self.shared_resources.embeddings if self.embeddings is not None: self.__default_vec = np.zeros([self.embeddings.shape[-1]]) self.elmo = Elmo(options_file, weight_file, 1, dropout=0) if torch.cuda.is_available(): self.elmo.cuda() def setup_from_data(self, data: Iterable[Tuple[QASetting, List[Answer]]]): vocab = self.shared_resources.vocab if not vocab.frozen: preprocessing.fill_vocab( (q for q, _ in data), vocab, lowercase=self.shared_resources.config.get('lowercase', True)) vocab.freeze() if not hasattr(self.shared_resources, 'answer_vocab' ) or not self.shared_resources.answer_vocab.frozen: self.shared_resources.answer_vocab = util.create_answer_vocab( qa_settings=(q for q, _ in data), answers=(a for _, ass in data for a in ass)) self.shared_resources.answer_vocab.freeze() self.shared_resources.char_vocab = preprocessing.char_vocab_from_vocab( self.shared_resources.vocab) # Preprocess dependency info # if self.shared_resources.config.get("use_dep_sa", False): # print("Process dependency information...", file=sys.stderr) # nlp = stanfordnlp.Pipeline() # type2id = nlp.processors['depparse'].trainer.vocab['deprel'] # for i in tqdm(range(len(data))): # setting, _ = data[i] # question = setting.question # support = setting.support[0] # doc = nlp(question + support) # setting.q_tokenized = [w.text for w in doc.sentences[0].words] # setting.s_tokenized = [w.text for w in doc.sentences[1].words] # setting.q_dep_i = [None] * (len(setting.q_tokenized) - 1) # setting.q_dep_j = [None] * (len(setting.q_tokenized) - 1) # setting.q_dep_type = [None] * (len(setting.q_tokenized) - 1) # for idx, d in enumerate(doc.sentences[0].dependencies): # if d[1] == 'root': # continue # setting.q_dep_i[idx] = int(d[0].index) - 1 # setting.q_dep_j[idx] = int(d[2].index) - 1 # setting.q_dep_type[idx] = type2id.unit2id(d[1]) # setting.s_dep_i = [None] * (len(setting.s_tokenized) - 1) # setting.s_dep_j = [None] * (len(setting.s_tokenized) - 1) # setting.s_dep_type = [None] * (len(setting.s_tokenized) - 1) # for idx, d in enumerate(doc.sentences[1].dependencies): # if d[1] == 'root': # continue # setting.s_dep_i[idx] = int(d[0].index) - 1 # setting.s_dep_j[idx] = int(d[2].index) - 1 # setting.s_dep_type[idx] = type2id.unit2id(d[1]) # if torch.cuda.is_available(): # torch.cuda.empty_cache() @property def training_ports(self) -> List[TensorPort]: return [Ports.Target.target_index] @property def output_ports(self) -> List[TensorPort]: if self.shared_resources.embeddings is not None: if self.shared_resources.config.get("use_dep_sa", False): return [ Ports.Input.emb_support, Ports.Input.support_length, Ports.Input.support_dep_i, Ports.Input.support_dep_j, Ports.Input.support_dep_type, Ports.Input.emb_question, Ports.Input.question_length, Ports.Input.question_dep_i, Ports.Input.question_dep_j, Ports.Input.question_dep_type, Ports.is_eval ] else: return [ Ports.Input.emb_support, Ports.Input.emb_question, Ports.Input.support, Ports.Input.question, Ports.Input.support_length, Ports.Input.question_length, Ports.Input.sample_id, Ports.Input.word_chars, Ports.Input.word_char_length, Ports.Input.question_batch_words, Ports.Input.support_batch_words, Ports.is_eval ] else: return [ Ports.Input.support, Ports.Input.question, Ports.Input.support_length, Ports.Input.question_length, Ports.Input.sample_id, Ports.Input.word_chars, Ports.Input.word_char_length, Ports.Input.question_batch_words, Ports.Input.support_batch_words, Ports.is_eval ] def preprocess(self, questions: List[QASetting], answers: Optional[List[List[Answer]]] = None, is_eval: bool = False) -> List[MCAnnotation]: if answers is None: answers = [None] * len(questions) preprocessed = [] if len(questions) > 1000: bar = progressbar.ProgressBar(max_value=len(questions), widgets=[ ' [', progressbar.Timer(), '] ', progressbar.Bar(), ' (', progressbar.ETA(), ') ' ]) for i, (q, a) in bar(enumerate(zip(questions, answers))): preprocessed.append(self.preprocess_instance(i, q, a)) else: for i, (q, a) in enumerate(zip(questions, answers)): preprocessed.append(self.preprocess_instance(i, q, a)) return preprocessed def preprocess_instance( self, idd: int, question: QASetting, answers: Optional[List[Answer]] = None) -> MCAnnotation: has_answers = answers is not None if self.shared_resources.config.get("use_dep_sa", False): anno = MCAnnotation( question_tokens=question.q_tokenized, question_ids=None, question_length=len(question.q_tokenized), support_tokens=question.s_tokenized, support_ids=None, support_length=len(question.s_tokenized), answer=self.shared_resources.answer_vocab(answers[0].text) if has_answers else 0, id=idd, question_dep_i=question.q_dep_i, question_dep_j=question.q_dep_j, question_dep_type=question.q_dep_type, support_dep_i=question.s_dep_i, support_dep_j=question.s_dep_j, support_dep_type=question.s_dep_type, ) return anno else: q_tokenized, q_ids, q_length, _, _ = preprocessing.nlp_preprocess( question.question, self.shared_resources.vocab, lowercase=self.shared_resources.config.get('lowercase', True)) s_tokenized, s_ids, s_length, _, _ = preprocessing.nlp_preprocess( question.support[0], self.shared_resources.vocab, lowercase=self.shared_resources.config.get('lowercase', True)) return MCAnnotation( question_tokens=q_tokenized, question_ids=q_ids, question_length=q_length, support_tokens=s_tokenized, support_ids=s_ids, support_length=s_length, answer=self.shared_resources.answer_vocab(answers[0].text) if has_answers else 0, id=idd, question_dep_i=None, question_dep_j=None, question_dep_type=None, support_dep_i=None, support_dep_j=None, support_dep_type=None, ) def create_batch(self, annotations: List[MCAnnotation], is_eval: bool, with_answers: bool) -> Mapping[TensorPort, np.ndarray]: word_chars, word_lengths, tokens, vocab, rev_vocab = \ preprocessing.unique_words_with_chars( [a.question_tokens for a in annotations] + [a.support_tokens for a in annotations], self.shared_resources.char_vocab) question_words, support_words = tokens[:len(annotations)], tokens[ len(annotations):] q_lengths = [a.question_length for a in annotations] s_lengths = [a.support_length for a in annotations] if self.shared_resources.config.get('use_dep_sa', False): xy_dict = { Ports.Input.support_length: s_lengths, Ports.Input.support_dep_i: [a.support_dep_i for a in annotations], Ports.Input.support_dep_j: [a.support_dep_j for a in annotations], Ports.Input.support_dep_type: [a.support_dep_type for a in annotations], Ports.Input.question_length: q_lengths, Ports.Input.question_dep_i: [a.question_dep_i for a in annotations], Ports.Input.question_dep_j: [a.question_dep_j for a in annotations], Ports.Input.question_dep_type: [a.question_dep_type for a in annotations], Ports.is_eval: is_eval } else: xy_dict = { Ports.Input.question_length: q_lengths, Ports.Input.support_length: s_lengths, Ports.Input.sample_id: [a.id for a in annotations], Ports.Input.word_chars: word_chars, Ports.Input.word_char_length: word_lengths, Ports.Input.question_batch_words: question_words, Ports.Input.support_batch_words: support_words, Ports.is_eval: is_eval, Ports.Input.support: [a.support_ids for a in annotations], Ports.Input.question: [a.question_ids for a in annotations] } if with_answers: xy_dict[Ports.Target.target_index] = [ a.answer for a in annotations ] xy_dict = numpify(xy_dict) # Elmo embeddings tokens_support = [a.support_tokens for a in annotations] tokens_question = [a.question_tokens for a in annotations] # debug tokens_support_len = 0 tokens_question_len = 0 tokens_support_maxlen = 0 tokens_question_maxlen = 0 for a in annotations: tokens_support_len += len(a.support_tokens) tokens_question_len += len(a.question_tokens) tokens_support_maxlen = max(tokens_support_maxlen, len(a.support_tokens)) tokens_question_maxlen = max(tokens_question_maxlen, len(a.question_tokens)) # print('Q len:', tokens_question_len, 'maxlen:', tokens_question_maxlen, # ' S len:', tokens_support_len, 'maxlen:', tokens_support_maxlen, # file=sys.stderr) chars_support = batch_to_ids(tokens_support) chars_question = batch_to_ids(tokens_question) if torch.cuda.is_available(): chars_support = chars_support.cuda() chars_question = chars_question.cuda() with torch.no_grad(): emb_support = self.elmo( chars_support)['elmo_representations'][0].detach() emb_question = self.elmo( chars_question)['elmo_representations'][0].detach() xy_dict[Ports.Input.emb_support] = emb_support xy_dict[Ports.Input.emb_question] = emb_question return xy_dict
class BiLSTM(nn.Module): def __init__(self, emb_dim, h_dim, n_labels, v_size, gpu=True, v_vec=None, batch_first=True, emb_type=None, elmo_model_dir=None): super(BiLSTM, self).__init__() self.gpu = gpu self.h_dim = h_dim if self.h_dim is None: self.h_dim = emb_dim + 36 if emb_type == 'ELMo': options_file = f'{elmo_model_dir}/options.json' weight_file = f'{elmo_model_dir}/weights.hdf5' self.word_embed = Elmo(options_file, weight_file, num_output_representations=1, dropout=0) if gpu: self.word_embed = self.word_embed.cuda() elif emb_type == 'ELMoForManyLangs': from elmoformanylangs import Embedder e = Embedder(elmo_model_dir) self.word_embed = e.sents2elmo elif emb_type == 'None': self.word_embed = None else: self.word_embed = nn.Embedding(v_size, emb_dim, padding_idx=0) if v_vec is not None: v_vec = torch.tensor(v_vec) self.word_embed.weight.data.copy_(v_vec) feature_embed_layers = [] feature_embed_size = { "feature:0": 25, "feature:1": 26, "feature:2": 12, "feature:3": 6, "feature:4": 94, "feature:5": 32 } for key in feature_embed_size: size = feature_embed_size[key] feature_embed = nn.Embedding(size, 5, padding_idx=0) feature_embed.weight.data[0] = torch.zeros(5) feature_embed_layers.append(feature_embed) self.feature_embed_layers = nn.ModuleList(feature_embed_layers) self.drop_target = nn.Dropout(p=0.2) self.lstm = nn.LSTM(input_size=emb_dim + 36, hidden_size=self.h_dim, batch_first=batch_first, bidirectional=True) self.l1 = nn.Linear(self.h_dim * 2, n_labels) def init_hidden(self, b_size): h0 = Variable(torch.zeros(1 * 2, b_size, self.h_dim)) c0 = Variable(torch.zeros(1 * 2, b_size, self.h_dim)) if self.gpu: h0 = h0.cuda() c0 = c0.cuda() return (h0, c0) def forward(self, x): self.hidden = self.init_hidden(x[2].size(0)) if self.word_embed: word_emb = self.word_embed(x[0]) if self.word_embed.__class__.__name__ == 'Embedding': pass elif self.word_embed.__class__.__name__ == 'Elmo': exophoras = [['これ'], ['あなた'], ['私']] exophora_ids = batch_to_ids(exophoras) if self.gpu: exophora_ids = exophora_ids.cuda() exophora_emb = self.word_embed(exophora_ids) word_emb = word_emb['elmo_representations'][0] exophora_emb = exophora_emb['elmo_representations'][0] exophora_emb = exophora_emb.reshape(3, -1) exophora_emb = exophora_emb.repeat([word_emb.shape[0], 1, 1]) none_emb = torch.zeros(word_emb.shape[0], 1, word_emb.shape[2]) if self.gpu: none_emb = none_emb.cuda() word_emb = torch.cat((none_emb, exophora_emb, word_emb), 1) elif self.word_embed.__func__.__name__ == 'sents2elmo': word_emb = [torch.tensor(emb) for emb in word_emb] word_emb = nn.utils.rnn.pad_sequence(word_emb, batch_first=True, padding_value=0) exophoras = [['これ'], ['あなた'], ['私']] exophora_emb = self.word_embed(exophoras) exophora_emb = torch.tensor(exophora_emb).reshape(3, -1) exophora_emb = exophora_emb.repeat([word_emb.shape[0], 1, 1]) none_emb = torch.zeros(word_emb.shape[0], 1, word_emb.shape[2]) if self.gpu: word_emb = word_emb.cuda() exophora_emb = exophora_emb.cuda() none_emb = none_emb.cuda() word_emb = torch.cat((none_emb, exophora_emb, word_emb), dim=1) feature_emb_list = [] for i, _x in enumerate(x[1]): feature_emb = self.feature_embed_layers[i](_x) feature_emb_list.append(feature_emb) x_feature = torch.tensor(x[2], dtype=torch.float, device=x[2].device) if self.word_embed: x = torch.cat( (word_emb, feature_emb_list[0], feature_emb_list[1], feature_emb_list[2], feature_emb_list[3], feature_emb_list[4], feature_emb_list[5], x_feature), dim=2) else: x = torch.cat( (feature_emb_list[0], feature_emb_list[1], feature_emb_list[2], feature_emb_list[3], feature_emb_list[4], feature_emb_list[5], x_feature), dim=2) x = self.drop_target(x) out, hidden = self.lstm(x, self.hidden) # out = out[:, :, :self.h_dim] + out[:, :, self.h_dim:] out = self.l1(out) return out
class WordRep(nn.Module): def __init__(self, data): super(WordRep, self).__init__() print("build word representation...") self.gpu = data.HP_gpu self.use_char = data.use_char self.batch_size = data.HP_batch_size self.char_hidden_dim = 0 self.char_all_feature = False self.sentence_classification = data.sentence_classification self.use_features = data.use_features if self.use_char: self.char_hidden_dim = data.HP_char_hidden_dim self.char_embedding_dim = data.char_emb_dim if data.char_feature_extractor == "CNN": self.char_feature = CharCNN(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_feature_extractor == "LSTM": self.char_feature = CharBiLSTM(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_feature_extractor == "GRU": self.char_feature = CharBiGRU(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) elif data.char_feature_extractor == "ALL": self.char_all_feature = True self.char_feature = CharCNN(data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) self.char_feature_extra = CharBiLSTM( data.char_alphabet.size(), data.pretrain_char_embedding, self.char_embedding_dim, self.char_hidden_dim, data.HP_dropout, self.gpu) else: print( "Error char feature selection, please check parameter data.char_feature_extractor (CNN/LSTM/GRU/ALL)." ) exit(0) self.embedding_dim = data.word_emb_dim self.drop = nn.Dropout(data.HP_dropout) self.use_elmo = data.use_elmo self.fine_tune_emb = data.fine_tune_emb if not self.use_elmo: self.word_embedding = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) self.word_embedding.weight.requires_grad = self.fine_tune_emb if data.pretrain_word_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) else: options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" # Compute two different representation for each token. # Each representation is a linear weighted combination for the # 3 layers in ELMo (i.e., charcnn, the outputs of the two BiLSTM)) if self.fine_tune_emb: #self.elmo = Elmo(options_file, weight_file, 1, dropout=0, scalar_mix_parameters=[1.,1.,1.])#, requires_grad=self.fine_tune_emb) self.elmo = Elmo(options_file, weight_file, 1, dropout=0, requires_grad=True) else: self.elmo = Elmo(options_file, weight_file, 1, dropout=0, scalar_mix_parameters=[0., 0., 0.]) if self.gpu: self.elmo = self.elmo.cuda() # self.feature_num = data.feature_num # self.feature_embedding_dims = data.feature_emb_dims # self.feature_embeddings = nn.ModuleList() # for idx in range(self.feature_num): # self.feature_embeddings.append(nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])) # for idx in range(self.feature_num): # if data.pretrain_feature_embeddings[idx] is not None: # self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy(data.pretrain_feature_embeddings[idx])) # else: # self.feature_embeddings[idx].weight.data.copy_(torch.from_numpy(self.random_embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))) if data.use_features: self.feature_num = data.feature_num self.feature_embedding_dims = data.feature_emb_dims self.feature_embeddings = nn.ModuleList() for idx in range(self.feature_num): self.feature_embeddings.append( nn.Embedding(data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx])) for idx in range(self.feature_num): if data.pretrain_feature_embeddings[idx] is not None: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy( data.pretrain_feature_embeddings[idx])) else: self.feature_embeddings[idx].weight.data.copy_( torch.from_numpy( self.random_embedding( data.feature_alphabets[idx].size(), self.feature_embedding_dims[idx]))) if self.gpu: self.drop = self.drop.cuda() if not self.use_elmo: self.word_embedding = self.word_embedding.cuda() # for idx in range(self.feature_num): # self.feature_embeddings[idx] = self.feature_embeddings[idx].cuda() if data.use_features: for idx in range(self.feature_num): self.feature_embeddings[idx] = self.feature_embeddings[ idx].cuda() def random_embedding(self, vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, word_text_input): """ input: word_inputs: (batch_size, sent_len) features: list [(batch_size, sent_len), (batch_len, sent_len),...] word_seq_lengths: list of batch_size, (batch_size,1) char_inputs: (batch_size*sent_len, word_length) char_seq_lengths: list of whole batch_size for char, (batch_size*sent_len, 1) char_seq_recover: variable which records the char order information, used to recover char order output: Variable(batch_size, sent_len, hidden_dim) """ batch_size = word_inputs.size(0) sent_len = word_inputs.size(1) if self.use_elmo: character_ids = batch_to_ids(word_text_input) if self.gpu: character_ids = character_ids.cuda() elmo_output = self.elmo(character_ids)["elmo_representations"][0] if not self.fine_tune_emb: elmo_output = elmo_output.detach() word_list = [elmo_output] else: word_embs = self.word_embedding(word_inputs) word_list = [word_embs] if not self.sentence_classification and self.use_features: for idx in range(self.feature_num): word_list.append(self.feature_embeddings[idx]( feature_inputs[idx])) if self.use_char: ## calculate char lstm last hidden # print("charinput:", char_inputs) # exit(0) char_features = self.char_feature.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features = char_features[char_seq_recover] char_features = char_features.view(batch_size, sent_len, -1) ## concat word and char together word_list.append(char_features) word_embs = torch.cat([word_embs, char_features], 2) if self.char_all_feature: char_features_extra = self.char_feature_extra.get_last_hiddens( char_inputs, char_seq_lengths.cpu().numpy()) char_features_extra = char_features_extra[char_seq_recover] char_features_extra = char_features_extra.view( batch_size, sent_len, -1) ## concat word and char together word_list.append(char_features_extra) word_embs = torch.cat(word_list, 2) word_represent = self.drop(word_embs) return word_represent
class PairClassifier(nn.Module): def __init__(self, input_size, num_epochs=10, dropout_p=0.1, loss_func='crossentropy'): super(PairClassifier, self).__init__() print('elmo files:', options_file, weight_file) self.elmo = Elmo(options_file, weight_file, 1, dropout=0) if use_cuda: self.elmo = self.elmo.cuda() self.input_size = input_size self.epochs = num_epochs self.loss_func = loss_func self.dropout = nn.Dropout(dropout_p) self.fc1 = nn.Linear( self.input_size * 2, 2 ) # Use this layer when train with only CNN model, i.e. No ensemble self.logsoftmax = nn.LogSoftmax(dim=1) def forward(self, x): # print('x:', str(x)) batch_size = len(x) character_ids = batch_to_ids(x) if use_cuda: character_ids = character_ids.cuda() embeddings = self.elmo(character_ids)['elmo_representations'] #print('elmo embeddings:', embeddings[0].size()) X = embeddings[0].view(batch_size, -1, 1024) # (N, W, D) # TODO: embed entity and time phrase x = self.dropout(x) # (N, len(Ks)*Co) logit = self.fc1(x) # (N, C) return logit ''' Create and train a CNN model Hybrid features supported - pass structured feats as X2 Does NOT support joint training yet returns: the CNN model ''' def fit(self, ann_maps, timex_maps, id_maps): # Train and return the model st = time.time() # TODO: create the pairs # Params batch_size = 16 learning_rate = 0.001 if use_cuda: self = self.cuda() Yarray = Y.astype('int') X_len = len(X) print('X len:', X_len) print('Y numpy shape:', str(Yarray.shape)) steps = 0 st = time.time() optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate) if self.loss_func == 'crossentropy': loss = nn.CrossEntropyLoss() else: print('ERROR: unrecognized loss function name') for epoch in range(self.epochs): print('epoch', str(epoch)) i = 0 numpy.random.seed(seed=1) perm = torch.from_numpy(numpy.random.permutation(X_len)) permutation = perm.long() perm_list = perm.tolist() Xiter = [X[i] for i in perm_list] #Xiter = X[permutation] Yiter = Yarray[permutation] while i + batch_size < X_len: batchX = Xiter[i:i + batch_size] batchY = Yiter[i:i + batch_size] #Xtensor = torch.from_numpy(batchX).float() Ytensor = torch.from_numpy(batchY).long() if use_cuda: #Xtensor = Xtensor.cuda() Ytensor = Ytensor.to(tdevice) optimizer.zero_grad() logit = self(batchX) loss_val = loss(logit, Ytensor) #print('loss: ', loss_val.data.item()) loss_val.backward() optimizer.step() steps += 1 i = i + batch_size # Print epoch time ct = time.time() - st unit = "s" if ct > 60: ct = ct / 60 unit = "m" print("time so far: ", str(ct), unit) print('loss: ', loss_val.data.item()) def predict(self, test_anns, test_times, testids=None): y_pred = {} # TODO: create pairs for x in range(len(testX)): input_row = testX[x] icd = None if icd is None: icd_var = self([input_row]) # Softmax and log softmax values icd_vec = self.logsoftmax(icd_var).squeeze() #print('pred vector:', icd_vec.size(), icd_vec) #print('argmax:', torch.argmax(icd_vec)) #icd_vec_softmax = softmax(icd_var) cat = torch.argmax(icd_vec).item() if x == 0: print('cat:', cat) #icd_code = cat y_pred.append(cat) #print "Probabilities: " + str(probs) return y_pred # Uncomment this line if threshold is not in used.