def __init__(self, opt, shared): super(ElmoEncoder, self).__init__() self.opt = opt self.shared = shared self.num_output = 2 if opt.use_elmo_post == 1 else 1 # initialize from these options_file = None weight_file = None if opt.elmo_in_size == 1024: options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" elif opt.elmo_in_size == 512: options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5" self.elmo = Elmo(options_file, weight_file, num_output_representations=self.num_output, dropout=opt.elmo_dropout, requires_grad=opt.fix_elmo == 0) # skip initialization for n, p in self.elmo.named_parameters(): p.skip_init = True
def embed_corpus_with_elmo(corpus_name="ag_news", document_size=4000, language_model="elmo"): from allennlp.modules.elmo import Elmo, batch_to_ids # code from https://github.com/allenai/allennlp/issues/2245 options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" model = Elmo(options_file, weight_file, 1, dropout=0) model.eval() model = model.to(torch.device("cuda")) tokens = [] embeddings = [] corpus = get_corpus(corpus_name, document_size) for doc in tqdm(corpus): token, ids = doc.split(), batch_to_ids([doc.split()]) ids = ids.cuda(torch.device('cuda')) with torch.no_grad(): hidden_states = model(ids) embedding = hidden_states["elmo_representations"][0][0] embedding = embedding.detach().cpu().numpy() tokens.append(token) embeddings.append(embedding) with open(f"{corpus_name}.{language_model}.pk", "wb") as f: pickle.dump({ "tokens": tokens, "embeddings": embeddings }, f, protocol=4)
def __init__( self, options_files: Dict[str, str], weight_files: Dict[str, str], do_layer_norm: bool = False, dropout: float = 0.5, requires_grad: bool = False, projection_dim: int = None, vocab_to_cache: List[str] = None, scalar_mix_parameters: List[float] = None, aligning_files: Dict[str, str] = None, ) -> None: super().__init__() if options_files.keys() != weight_files.keys(): raise ConfigurationError("Keys for Elmo's options files and weights files don't match") aligning_files = aligning_files or {} output_dim = None for lang in weight_files.keys(): name = "elmo_%s" % lang elmo = Elmo( options_files[lang], weight_files[lang], num_output_representations=1, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, vocab_to_cache=vocab_to_cache, scalar_mix_parameters=scalar_mix_parameters, ) self.add_module(name, elmo) output_dim_tmp = elmo.get_output_dim() if output_dim is not None: # Verify that all ELMo embedders have the same output dimension. check_dimensions_match( output_dim_tmp, output_dim, "%s output dim" % name, "elmo output dim" ) output_dim = output_dim_tmp self.output_dim = output_dim if projection_dim: self._projection = torch.nn.Linear(output_dim, projection_dim) self.output_dim = projection_dim else: self._projection = None for lang in weight_files.keys(): name = "aligning_%s" % lang aligning_matrix = torch.eye(output_dim) if lang in aligning_files and aligning_files[lang] != "": aligninig_path = cached_path(aligning_files[lang]) aligning_matrix = torch.FloatTensor(torch.load(aligninig_path)) aligning = torch.nn.Linear(output_dim, output_dim, bias=False) aligning.weight = torch.nn.Parameter(aligning_matrix, requires_grad=False) self.add_module(name, aligning)
def __init__(self, posts: List[Dict[str, Any]], labels_map: Dict[str, Dict[str, int]], dictionary: Dictionary): self.posts = list( map(lambda post: parse_post(post, image_retriever="pretrained"), posts)) self.labels_map = labels_map self.dictionary = dictionary options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" self.elmo = Elmo(options_file, weight_file, 2, dropout=0) self.elmo = self.elmo.to(device) # Preprocess posts data for post_id, _ in enumerate(self.posts): # Map str label to integer for label in self.posts[post_id]['label'].keys(): self.posts[post_id]['label'][label] = self.labels_map[label][ self.posts[post_id]['label'][label]] # Convert caption to list of token indices self.posts[post_id]['caption'] += '.' character_ids = batch_to_ids( [self.posts[post_id]['caption'].split(" ")]) character_ids = character_ids.to( device) # (len(batch), max sentence length, max word length). x = self.elmo(character_ids) self.posts[post_id]['caption'] = x['elmo_representations'][0]
def __init__(self, cfg, phrase_embed_dim=1024, bidirectional=False): super(PhraseEmbeddingSentElmo, self).__init__() self.hidden_dim = phrase_embed_dim self.phrase_select_type = cfg.MODEL.VG.PHRASE_SELECT_TYPE self.bidirectional = bidirectional self.hidden_dim = phrase_embed_dim if not self.bidirectional else phrase_embed_dim // 2 options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" # Compute two different representation for each token. # Each representation is a linear weighted combination for the # 3 layers in ELMo (i.e., charcnn, the outputs of the two BiLSTM)) self.elmo = Elmo(options_file, weight_file, 2, dropout=0, requires_grad=False) self.elmo.eval() self.seq_rnn = nn.GRU(input_size=1024, hidden_size=self.hidden_dim, num_layers=1, bias=True, batch_first=True, dropout=0, bidirectional=bidirectional)
def getELMo(vocab, unidir, downstream=False, mix_parameters=[1, 1, 1]): options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" vocab_to_cache = sorted(vocab.keys(), key=lambda t: vocab[t]) if downstream: elmo = Elmo(options_file, weight_file, num_output_representations=1, vocab_to_cache=vocab_to_cache) else: elmo = Elmo(options_file, weight_file, num_output_representations=1, scalar_mix_parameters=mix_parameters, vocab_to_cache=vocab_to_cache) if unidir: for l in ["backward_layer_0", "backward_layer_1"]: layer = getattr(elmo._elmo_lstm._elmo_lstm, l) for s in [ "input_linearity", "state_linearity", "state_projection" ]: subject = getattr(layer, s) for a in ["weight", "bias"]: if hasattr(subject, a) and getattr(subject, a) is not None: target = getattr(subject, a) target.data.fill_(0.0) return elmo
def __init__(self, config, model): super().__init__() self.config = config self.logger = self.config.logger self.model = model self.model_path = config.dir_model self.use_elmo = config.use_elmo self.idx_to_tag = { idx: tag for tag, idx in self.config.vocab_tags.items() } self.criterion = CRF(self.config.ntags) self.optimizer = optim.Adam(self.model.parameters()) if self.use_elmo: options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" self.elmo = Elmo(options_file, weight_file, 2, dropout=0) else: self.load_emb() if USE_GPU: self.use_cuda = True self.logger.info("GPU found.") self.model = model.cuda() self.criterion = self.criterion.cuda() if self.use_elmo: self.elmo = self.elmo.cuda() print("Moved elmo to cuda") else: self.model = model.cpu() self.use_cuda = False self.logger.info("No GPU found.")
def __init__(self, char_vocab_size, glove_vocab_size, word_vocab_size, embed_dim, dropout, elmo=False, elmo_options_file=None, elmo_weights_file=None, glove_cpu=False): super(Embedding, self).__init__() self.word_embedding = WordEmbedding(word_vocab_size, embed_dim) self.char_embedding = CharEmbedding(char_vocab_size, embed_dim) self.glove_embedding = WordEmbedding(glove_vocab_size, embed_dim, requires_grad=False, cpu=glove_cpu) self.output_size = 2 * embed_dim self.highway1 = Highway(self.output_size, dropout) self.highway2 = Highway(self.output_size, dropout) if elmo: assert elmo_options_file is not None and elmo_weights_file is not None from allennlp.modules.elmo import Elmo self.elmo = Elmo(elmo_options_file, elmo_weights_file, 1, dropout=0) self.output_size += self.elmo.get_output_dim() else: self.elmo = None
def __init__(self, config, model_dir, device=None): self.config = config self.model_dir = model_dir self.log_file = os.path.join(model_dir, 'log.csv') self.device = get_device(device) self.slu_cls = getattr(modules, config['model']['name']) self.slu = self.slu_cls(config['model']) self.use_elmo = config.get("use_elmo", False) if self.use_elmo: option_file = config["elmo"]["option_file"] weight_file = config["elmo"]["weight_file"] self.elmo = Elmo(option_file, weight_file, 1, dropout=0) self.slu.elmo_scalar_mixes = nn.ModuleList(self.elmo._scalar_mixes) if len(config["elmo"].get("checkpoint", "")) > 0: self.elmo._elmo_lstm = torch.load( config["elmo"]["checkpoint"]).elmo for param in self.elmo._elmo_lstm.parameters(): param.requires_grad_(False) self.elmo.to(self.device) self.slu.to(self.device)
def __init__( self, options_file: str = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/" + "elmo_2x4096_512_2048cnn_2xhighway_options.json", weight_file: str = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/" + "elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5", do_layer_norm: bool = False, dropout: float = 0.5, requires_grad: bool = False, projection_dim: int = None, vocab_to_cache: List[str] = None, scalar_mix_parameters: List[float] = None, ) -> None: super().__init__() self._elmo = Elmo( options_file, weight_file, 1, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, vocab_to_cache=vocab_to_cache, scalar_mix_parameters=scalar_mix_parameters, ) if projection_dim: self._projection = torch.nn.Linear(self._elmo.get_output_dim(), projection_dim) self.output_dim = projection_dim else: self._projection = None self.output_dim = self._elmo.get_output_dim()
def main(): assert config['config_target'] == 'naive_psychology' if args.weight_name == 'elmo': lm = Elmo(args.elmo_option_file, args.elmo_weight_file, 1, dropout=0) tokenizer = MosesTokenizer(lang='en') else: # tokenizer tokenizer = AutoTokenizer.from_pretrained( args.weight_name, cache_dir=args.cache_dir ) # language model lm = AutoModel.from_pretrained(args.weight_name, cache_dir=args.cache_dir) if args.gpu_id != -1: lm = lm.cuda(args.gpu_id) # dataset corpus = NaivePsychology(config['file_path']) # from the original dev, extract our train split train_sids, dev_sids = load_splits(config['split_dir']) process_split(corpus.dev_generator, tokenizer, lm, 'train', train_sids) # from the original dev, extract our dev split process_split(corpus.dev_generator, tokenizer, lm, 'dev', dev_sids) test_sids = set([sid for sid, _ in corpus.test_generator()]) process_split(corpus.test_generator, tokenizer, lm, 'test', test_sids)
def __init__( self, options_file: str, weight_file: str, do_layer_norm: bool = False, dropout: float = 0.5, requires_grad: bool = False, projection_dim: int = None, vocab_to_cache: List[str] = None, scalar_mix_parameters: List[float] = None, ) -> None: super().__init__() self._elmo = Elmo( options_file, weight_file, 1, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, vocab_to_cache=vocab_to_cache, scalar_mix_parameters=scalar_mix_parameters, ) if projection_dim: self._projection = torch.nn.Linear(self._elmo.get_output_dim(), projection_dim) self.output_dim = projection_dim else: self._projection = None self.output_dim = self._elmo.get_output_dim()
class ELMoVectors(object): def __init__(self, size_elmo, device): self.size_elmo = size_elmo self.device = device self.model = Elmo(options_files[size_elmo], weight_files[size_elmo], 1, dropout=0., requires_grad=False) self.model.to(device) def get_embedding_size(self): return elmo_emb_size[self.size_elmo] def transform(self, X): # split all text by sentence for character embeding of a sentence X = self.tokenize(X) word_token = batch_to_ids(X).to(self.device) #word_emb = torch.LongTensor(word_emb).to(self.device) word_emb = self.model(word_token) # del useless varaibles del word_token return word_emb['elmo_representations'][0] def tokenize(self, X): for i in range(len(X)): X[i] = X[i].split(' ') return X
class getElmo(nn.Module): def __init__(self, layer=2, dropout=0, out_dim=100, gpu=True): super(getElmo, self).__init__() options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" self.dropout = dropout self.gpu = gpu self.Elmo = Elmo(options_file, weight_file, layer, dropout=dropout) self.Elmo.eval() self.layers2one = nn.Linear( layer, 1).cuda() if self.gpu else nn.Linear(layer, 1) self.optLinear = nn.Linear( 1024, out_dim).cuda() if self.gpu else nn.Linear(1024, out_dim) def forward(self, texts): word_idxs = batch_to_ids(texts).cuda() if self.gpu else batch_to_ids( texts) elmo_embs = self.Elmo.forward(word_idxs) elmo_reps = torch.stack(elmo_embs['elmo_representations'], dim=-1).cuda() if self.gpu else torch.stack( elmo_embs['elmo_representations'], dim=-1) elmo_decrease_layer = self.layers2one(elmo_reps).squeeze() elmo_fit_hidden = self.optLinear(elmo_decrease_layer) mask = elmo_embs['mask'] return elmo_fit_hidden, mask
def elmo_encode(self, data, __id2word): """ get the id2word from vocab, then convert to id from allennlp.modules.elmo import Elmo, batch_to_ids batch_to_id fills to the max sentence length, which could be less than desired So further fill it to get to the max sent length """ data_text = [self.glove_tokenizer(x, __id2word) for x in data] with torch.no_grad(): elmo = Elmo(options_file, weight_file, 2, dropout=0).cuda() elmo.eval() character_ids = batch_to_ids(data_text).cuda() row_num = character_ids.shape[0] elmo_dim = self.elmo_dim if torch.sum(character_ids) != 0: elmo_emb = elmo(character_ids)['elmo_representations'] elmo_emb = (elmo_emb[0] + elmo_emb[1]) / 2 # avg of two layers else: elmo_emb = torch.zeros([row_num, self.sent_pad_len, elmo_dim], dtype=torch.float) sent_len = elmo_emb.shape[1] if sent_len < self.sent_pad_len: fill_sent_len = self.sent_pad_len - sent_len # create a bunch of 0's to fill it up filler = torch.zeros([row_num, fill_sent_len, elmo_dim], dtype=torch.float) elmo_emb = torch.cat((elmo_emb, filler.cuda()), dim=1) return elmo_emb.cuda()
class ElmoEmbedding: def __init__(self, dim): if dim == 2048: options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" elif dim == 512: options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5" self.dim = dim self.elmo = Elmo(options_file, weight_file, 2, dropout=0) if func.gpu_available(): self.elmo = self.elmo.cuda() self.elmo.eval() self.load() def save(self): pass def load(self): self.cache = DiskDict(f'./generate/elmo.{self.dim}.cache') def convert(self, sentences): not_hit = set() for sent in sentences: key = self.make_key(sent) if key not in self.cache: not_hit.add(key) not_hit = list(not_hit) if not_hit: embeddings, masks = self.convert_impl([self.make_sentence(key) for key in not_hit]) for key, embedding, mask in zip(not_hit, torch.unbind(embeddings), torch.unbind(masks)): embedding = embedding[:mask.sum()] self.cache[key] = embedding.tolist() embeddings = [func.tensor(self.cache[self.make_key(sent)]) for sent in sentences] mlen = max([e.shape[0] for e in embeddings]) embeddings = [func.pad_zeros(e, mlen, 0) for e in embeddings] embeddings = torch.stack(embeddings) assert embeddings.requires_grad == False return embeddings def make_key(self, sent): return '$$'.join(sent) def make_sentence(self, key): return key.split('$$') def convert_impl(self, sentences): character_ids = func.tensor(batch_to_ids(sentences)) m = self.elmo(character_ids) embeddings = m['elmo_representations'] embeddings = torch.cat(embeddings, -1) mask = m['mask'] return embeddings, mask
def __init__(self, **kwargs): kwargs.pop('use_cuda') self._embedder = Elmo(config.ELMO_OPTIONS, config.ELMO_WEIGHTS, num_output_representations=1, **kwargs) self._embedder = self._embedder.cuda() self.embedding_dim = 1024
def __init__(self, size_elmo, device): self.size_elmo = size_elmo self.device = device self.model = Elmo(options_files[size_elmo], weight_files[size_elmo], 1, dropout=0., requires_grad=False) self.model.to(device)
def __init__(self, args): super(ElmoWrapper, self).__init__() options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" self.elmo = Elmo(options_file, weight_file, 2, dropout=0.0).to(args.device) # 2 layers self.elmo.eval()
def __init__(self, device): self.device = device bioelmo_options_file = "/home/soumyasharma/datafiles/biomed_elmo_options.json" bioelmo_weight_file = "/home/soumyasharma/datafiles/biomed_elmo_weights.hdf5" # Compute two different representation for each token. # Each representation is a linear weighted combination for the # 3 layers in ELMo (i.e., charcnn, the outputs of the two BiLSTM)) self.model = Elmo(bioelmo_options_file, bioelmo_weight_file, 2, dropout=0) self.model = self.model.to(self.device)
def __init__(self, emb_dim, h_dim, n_labels, v_size, gpu=True, v_vec=None, batch_first=True, emb_type=None, elmo_model_dir=None): super(BiLSTM, self).__init__() self.gpu = gpu self.h_dim = h_dim if self.h_dim is None: self.h_dim = emb_dim + 36 if emb_type == 'ELMo': options_file = f'{elmo_model_dir}/options.json' weight_file = f'{elmo_model_dir}/weights.hdf5' self.word_embed = Elmo(options_file, weight_file, num_output_representations=1, dropout=0) if gpu: self.word_embed = self.word_embed.cuda() elif emb_type == 'ELMoForManyLangs': from elmoformanylangs import Embedder e = Embedder(elmo_model_dir) self.word_embed = e.sents2elmo elif emb_type == 'None': self.word_embed = None else: self.word_embed = nn.Embedding(v_size, emb_dim, padding_idx=0) if v_vec is not None: v_vec = torch.tensor(v_vec) self.word_embed.weight.data.copy_(v_vec) feature_embed_layers = [] feature_embed_size = { "feature:0": 25, "feature:1": 26, "feature:2": 12, "feature:3": 6, "feature:4": 94, "feature:5": 32 } for key in feature_embed_size: size = feature_embed_size[key] feature_embed = nn.Embedding(size, 5, padding_idx=0) feature_embed.weight.data[0] = torch.zeros(5) feature_embed_layers.append(feature_embed) self.feature_embed_layers = nn.ModuleList(feature_embed_layers) self.drop_target = nn.Dropout(p=0.2) self.lstm = nn.LSTM(input_size=emb_dim + 36, hidden_size=self.h_dim, batch_first=batch_first, bidirectional=True) self.l1 = nn.Linear(self.h_dim * 2, n_labels)
def init_elmo(self): ''' initilize the ELMo model ''' self.elmo = Elmo(self.opt.elmo_options_file, self.opt.elmo_weight_file, 1) for param in self.elmo.parameters(): param.requires_grad = False self.word_dim = self.opt.elmo_dim
def load_elmo(opt): options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" elmo = Elmo(options_file, weight_file, 3, dropout=0, requires_grad=False) # by default all 3 layers are output if opt.gpuid != -1: elmo = elmo.cuda() return elmo
def main(): # Load generated model file archive = load_archive(args.archive_path) model = archive.model finetuned_elmo_state_dict = model._contextualizer._elmo.state_dict() # Load ELMo options and weights file elmo = Elmo(args.options_file, args.weight_file, 1) original_elmo_state_dict = elmo.state_dict() # Get the average parameter shift in the token embedder. token_embedder_total_shift = 0.0 token_embedder_num_params = 0.0 for key, parameter in finetuned_elmo_state_dict.items(): if "token_embedder" in key: token_embedder_num_params += parameter.numel() token_embedder_total_shift += torch.abs( parameter - original_elmo_state_dict[key]).sum().item() logger.info("Average Shift (L1 distance) in token embedder: {}".format( token_embedder_total_shift / token_embedder_num_params)) # Get the average parameter shift in the first layer of the LSTM. layer_0_total_shift = 0.0 layer_0_num_params = 0.0 for key, parameter in finetuned_elmo_state_dict.items(): if "backward_layer_0" in key or "forward_layer_0" in key: layer_0_num_params += parameter.numel() layer_0_total_shift += torch.abs( parameter - original_elmo_state_dict[key]).sum().item() logger.info("Average Shift (L1 distance) in LSTM Layer 0: {}".format( layer_0_total_shift / layer_0_num_params)) # Get the average parameter shift in the second layer of the LSTM. layer_1_total_shift = 0.0 layer_1_num_params = 0.0 for key, parameter in finetuned_elmo_state_dict.items(): if "backward_layer_1" in key or "forward_layer_1" in key: layer_1_num_params += parameter.numel() layer_1_total_shift += torch.abs( parameter - original_elmo_state_dict[key]).sum().item() logger.info("Average Shift (L1 distance) in LSTM Layer 1: {}".format( layer_1_total_shift / layer_1_num_params)) # Print the scalar mix parameters of the fine-tuned model. normed_scalars = torch.nn.functional.softmax(torch.cat([ parameter for key, parameter in finetuned_elmo_state_dict.items() if "scalar_parameters" in key ]), dim=0) normed_scalars = torch.split(normed_scalars, split_size_or_sections=1) normed_scalars = [normed_scalar.item() for normed_scalar in normed_scalars] logger.info( "Normalized Scalar Mix of fine-tuned model: {}".format(normed_scalars)) # Print the gamma logger.info("Gamma of fine-tuned model: {}".format( finetuned_elmo_state_dict["scalar_mix_0.gamma"].item()))
def __init__(self, config): super().__init__() elmo_path = config['elmo'] elmo_option_file = os.path.join( elmo_path, "elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json") elmo_weight_file = os.path.join( elmo_path, "elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5") self.elmo = Elmo(elmo_option_file, elmo_weight_file, 2) for p in self.elmo.parameters(): p.requires_grad = False
def __init__(self, options_file="/beegfs/ijh216/elmo/options.json", weights_file="/beegfs/ijh216/elmo/weights.hdf5"): super(ElmoEmbedder, self).__init__() self.elmo = Elmo(options_file, weights_file, 1, requires_grad=False) if torch.cuda.is_available(): self.elmo = self.elmo.cuda()
def get_elmo(options_file, weight_file, gpu, dropout): global elmo # Create the ELMo class. This example computes two output representation # layers each with separate layer weights. # We recommend adding dropout (50% is good default) either here or elsewhere # where ELMo is used (e.g. in the next layer bi-LSTM). elmo = Elmo(options_file, weight_file, num_output_representations=2, do_layer_norm=False, dropout=dropout) if gpu: elmo.cuda()
def setup(self): print("Setting up Elmo Embedding") self.vocab = self.shared_resources self.config = self.shared_resources.config self.embeddings = self.shared_resources.embeddings if self.embeddings is not None: self.__default_vec = np.zeros([self.embeddings.shape[-1]]) self.elmo = Elmo(options_file, weight_file, 1, dropout=0) if torch.cuda.is_available(): self.elmo.cuda()
def create_elmo_embed(self, opt={}, prefix='elmo'): # TODO options_file = os.path.join(opt['data_dir'], opt.get('{}_options_file'.format(prefix))) weights_file = os.path.join(opt['data_dir'], opt.get('{}_weights_file'.format(prefix))) self.elmo = Elmo(options_file, weights_file, 2, dropout=0) self.elmo_output_dim = self.elmo.get_output_dim() return self.elmo_output_dim
def __init__(self, params): super(ElmoEmbedding, self).__init__() self.weight_file = weight_file self.options_file = options_file self.elmo_emb_size = params['emb_elmo_size'] self.layer_weight = nn.Parameter(torch.tensor([0.5,0.5], device = device)) self.gamma = nn.Parameter(torch.ones(1,device = device)) self.mlp = nn.Sequential(nn.Linear(1024, self.elmo_emb_size), nn.ReLU()) self.elmo = Elmo(self.options_file, self.weight_file, 2) if USE_CUDA: self.elmo.cuda()
def __init__(self, options_file: str, weight_file: str, do_layer_norm: bool = False, dropout: float = 0.5, requires_grad: bool = False, projection_dim: int = None, vocab_to_cache: List[str] = None, scalar_mix_parameters: List[float] = None) -> None: super(ElmoTokenEmbedder, self).__init__() self._elmo = Elmo(options_file, weight_file, 1, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, vocab_to_cache=vocab_to_cache, scalar_mix_parameters=scalar_mix_parameters) if projection_dim: self._projection = torch.nn.Linear(self._elmo.get_output_dim(), projection_dim) self.output_dim = projection_dim else: self._projection = None self.output_dim = self._elmo.get_output_dim()
def test_elmo_bilm_can_handle_higher_dimensional_input_with_cache(self): sentences = [["This", "is", "a", "sentence"], ["Here", "'s", "one"], ["Another", "one"]] vocab, tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences) words_to_cache = list(vocab.get_token_to_index_vocabulary("tokens").keys()) elmo_bilm = Elmo(self.options_file, self.weight_file, 1, vocab_to_cache=words_to_cache) elmo_bilm.eval() individual_dim = elmo_bilm(tensor["character_ids"], tensor["tokens"]) elmo_bilm = Elmo(self.options_file, self.weight_file, 1, vocab_to_cache=words_to_cache) elmo_bilm.eval() expanded_word_ids = torch.stack([tensor["tokens"] for _ in range(4)], dim=1) expanded_char_ids = torch.stack([tensor["character_ids"] for _ in range(4)], dim=1) expanded_result = elmo_bilm(expanded_char_ids, expanded_word_ids) split_result = [x.squeeze(1) for x in torch.split(expanded_result["elmo_representations"][0], 1, dim=1)] for expanded in split_result: numpy.testing.assert_array_almost_equal(expanded.data.cpu().numpy(), individual_dim["elmo_representations"][0].data.cpu().numpy())
#print (passage_mask) #question_lstm_mask = None; passage_lstm_mask = None """ ################### EMBEDDING LAYER ######################################### """ print ("-------------- EMBEDDING LAYER ---------------") if (use_ELMO): if (load_ELMO_experiments_flag): options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" print ("Loading ELMO") text_field_embedder = Elmo(options_file, weight_file, 2, dropout=0) print ("ELMO weights loaded") else: text_field_embedder = TextFieldEmbedder() token_embedders = dict() text_field_embedder = Embedding(embedding_dim = 100, trainable = False) ## Parameters needed for the next layer embedder_out_dim = text_field_embedder.get_output_dim() print ("Embedder output dimensions: ", embedder_out_dim) ## Propagate the Batch though the Embedder embeddings_batch_question = text_field_embedder(character_ids_question)["elmo_representations"][1] embeddings_batch_passage = text_field_embedder( character_ids_passage)["elmo_representations"][1] #print (embeddings_batch_question)
class ElmoTokenEmbedder(TokenEmbedder): """ Compute a single layer of ELMo representations. This class serves as a convenience when you only want to use one layer of ELMo representations at the input of your network. It's essentially a wrapper around Elmo(num_output_representations=1, ...) Parameters ---------- options_file : ``str``, required. An ELMo JSON options file. weight_file : ``str``, required. An ELMo hdf5 weight file. do_layer_norm : ``bool``, optional. Should we apply layer normalization (passed to ``ScalarMix``)? dropout : ``float``, optional. The dropout value to be applied to the ELMo representations. requires_grad : ``bool``, optional If True, compute gradient of ELMo parameters for fine tuning. projection_dim : ``int``, optional If given, we will project the ELMo embedding down to this dimension. We recommend that you try using ELMo with a lot of dropout and no projection first, but we have found a few cases where projection helps (particulary where there is very limited training data). vocab_to_cache : ``List[str]``, optional, (default = 0.5). A list of words to pre-compute and cache character convolutions for. If you use this option, the ElmoTokenEmbedder expects that you pass word indices of shape (batch_size, timesteps) to forward, instead of character indices. If you use this option and pass a word which wasn't pre-cached, this will break. """ def __init__(self, options_file: str, weight_file: str, do_layer_norm: bool = False, dropout: float = 0.5, requires_grad: bool = False, projection_dim: int = None, vocab_to_cache: List[str] = None) -> None: super(ElmoTokenEmbedder, self).__init__() self._elmo = Elmo(options_file, weight_file, 1, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, vocab_to_cache=vocab_to_cache) if projection_dim: self._projection = torch.nn.Linear(self._elmo.get_output_dim(), projection_dim) else: self._projection = None def get_output_dim(self): return self._elmo.get_output_dim() def forward(self, # pylint: disable=arguments-differ inputs: torch.Tensor, word_inputs: torch.Tensor = None) -> torch.Tensor: """ Parameters ---------- inputs: ``torch.Tensor`` Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch. word_inputs : ``torch.Tensor``, optional. If you passed a cached vocab, you can in addition pass a tensor of shape ``(batch_size, timesteps)``, which represent word ids which have been pre-cached. Returns ------- The ELMo representations for the input sequence, shape ``(batch_size, timesteps, embedding_dim)`` """ elmo_output = self._elmo(inputs, word_inputs) elmo_representations = elmo_output['elmo_representations'][0] if self._projection: projection = self._projection for _ in range(elmo_representations.dim() - 2): projection = TimeDistributed(projection) elmo_representations = projection(elmo_representations) return elmo_representations # Custom vocab_to_cache logic requires a from_params implementation. @classmethod def from_params(cls, vocab: Vocabulary, params: Params) -> 'ElmoTokenEmbedder': # type: ignore # pylint: disable=arguments-differ params.add_file_to_archive('options_file') params.add_file_to_archive('weight_file') options_file = params.pop('options_file') weight_file = params.pop('weight_file') requires_grad = params.pop('requires_grad', False) do_layer_norm = params.pop_bool('do_layer_norm', False) dropout = params.pop_float("dropout", 0.5) namespace_to_cache = params.pop("namespace_to_cache", None) if namespace_to_cache is not None: vocab_to_cache = list(vocab.get_token_to_index_vocabulary(namespace_to_cache).keys()) else: vocab_to_cache = None projection_dim = params.pop_int("projection_dim", None) params.assert_empty(cls.__name__) return cls(options_file=options_file, weight_file=weight_file, do_layer_norm=do_layer_norm, dropout=dropout, requires_grad=requires_grad, projection_dim=projection_dim, vocab_to_cache=vocab_to_cache)