Example #1
0
    def __init__(self, opt, shared):
        super(ElmoEncoder, self).__init__()
        self.opt = opt
        self.shared = shared

        self.num_output = 2 if opt.use_elmo_post == 1 else 1

        # initialize from these
        options_file = None
        weight_file = None
        if opt.elmo_in_size == 1024:
            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
        elif opt.elmo_in_size == 512:
            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_options.json"
            weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5"

        self.elmo = Elmo(options_file,
                         weight_file,
                         num_output_representations=self.num_output,
                         dropout=opt.elmo_dropout,
                         requires_grad=opt.fix_elmo == 0)

        # skip initialization
        for n, p in self.elmo.named_parameters():
            p.skip_init = True
Example #2
0
def embed_corpus_with_elmo(corpus_name="ag_news",
                           document_size=4000,
                           language_model="elmo"):
    from allennlp.modules.elmo import Elmo, batch_to_ids
    # code from https://github.com/allenai/allennlp/issues/2245
    options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
    weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"

    model = Elmo(options_file, weight_file, 1, dropout=0)
    model.eval()
    model = model.to(torch.device("cuda"))
    tokens = []
    embeddings = []
    corpus = get_corpus(corpus_name, document_size)
    for doc in tqdm(corpus):
        token, ids = doc.split(), batch_to_ids([doc.split()])
        ids = ids.cuda(torch.device('cuda'))
        with torch.no_grad():
            hidden_states = model(ids)
        embedding = hidden_states["elmo_representations"][0][0]
        embedding = embedding.detach().cpu().numpy()
        tokens.append(token)
        embeddings.append(embedding)
    with open(f"{corpus_name}.{language_model}.pk", "wb") as f:
        pickle.dump({
            "tokens": tokens,
            "embeddings": embeddings
        },
                    f,
                    protocol=4)
Example #3
0
    def __init__(
        self,
        options_files: Dict[str, str],
        weight_files: Dict[str, str],
        do_layer_norm: bool = False,
        dropout: float = 0.5,
        requires_grad: bool = False,
        projection_dim: int = None,
        vocab_to_cache: List[str] = None,
        scalar_mix_parameters: List[float] = None,
        aligning_files: Dict[str, str] = None,
    ) -> None:
        super().__init__()

        if options_files.keys() != weight_files.keys():
            raise ConfigurationError("Keys for Elmo's options files and weights files don't match")

        aligning_files = aligning_files or {}
        output_dim = None
        for lang in weight_files.keys():
            name = "elmo_%s" % lang
            elmo = Elmo(
                options_files[lang],
                weight_files[lang],
                num_output_representations=1,
                do_layer_norm=do_layer_norm,
                dropout=dropout,
                requires_grad=requires_grad,
                vocab_to_cache=vocab_to_cache,
                scalar_mix_parameters=scalar_mix_parameters,
            )
            self.add_module(name, elmo)

            output_dim_tmp = elmo.get_output_dim()
            if output_dim is not None:
                # Verify that all ELMo embedders have the same output dimension.
                check_dimensions_match(
                    output_dim_tmp, output_dim, "%s output dim" % name, "elmo output dim"
                )

            output_dim = output_dim_tmp

        self.output_dim = output_dim

        if projection_dim:
            self._projection = torch.nn.Linear(output_dim, projection_dim)
            self.output_dim = projection_dim
        else:
            self._projection = None

        for lang in weight_files.keys():
            name = "aligning_%s" % lang
            aligning_matrix = torch.eye(output_dim)
            if lang in aligning_files and aligning_files[lang] != "":
                aligninig_path = cached_path(aligning_files[lang])
                aligning_matrix = torch.FloatTensor(torch.load(aligninig_path))

            aligning = torch.nn.Linear(output_dim, output_dim, bias=False)
            aligning.weight = torch.nn.Parameter(aligning_matrix, requires_grad=False)
            self.add_module(name, aligning)
Example #4
0
    def __init__(self, posts: List[Dict[str, Any]],
                 labels_map: Dict[str, Dict[str,
                                            int]], dictionary: Dictionary):
        self.posts = list(
            map(lambda post: parse_post(post, image_retriever="pretrained"),
                posts))
        self.labels_map = labels_map
        self.dictionary = dictionary
        options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
        weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
        self.elmo = Elmo(options_file, weight_file, 2, dropout=0)
        self.elmo = self.elmo.to(device)

        # Preprocess posts data
        for post_id, _ in enumerate(self.posts):
            # Map str label to integer
            for label in self.posts[post_id]['label'].keys():
                self.posts[post_id]['label'][label] = self.labels_map[label][
                    self.posts[post_id]['label'][label]]

            # Convert caption to list of token indices
            self.posts[post_id]['caption'] += '.'
            character_ids = batch_to_ids(
                [self.posts[post_id]['caption'].split(" ")])
            character_ids = character_ids.to(
                device)  # (len(batch), max sentence length, max word length).
            x = self.elmo(character_ids)
            self.posts[post_id]['caption'] = x['elmo_representations'][0]
Example #5
0
    def __init__(self, cfg, phrase_embed_dim=1024, bidirectional=False):
        super(PhraseEmbeddingSentElmo, self).__init__()

        self.hidden_dim = phrase_embed_dim
        self.phrase_select_type = cfg.MODEL.VG.PHRASE_SELECT_TYPE
        self.bidirectional = bidirectional
        self.hidden_dim = phrase_embed_dim if not self.bidirectional else phrase_embed_dim // 2

        options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
        weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"

        # Compute two different representation for each token.
        # Each representation is a linear weighted combination for the
        # 3 layers in ELMo (i.e., charcnn, the outputs of the two BiLSTM))
        self.elmo = Elmo(options_file,
                         weight_file,
                         2,
                         dropout=0,
                         requires_grad=False)
        self.elmo.eval()
        self.seq_rnn = nn.GRU(input_size=1024,
                              hidden_size=self.hidden_dim,
                              num_layers=1,
                              bias=True,
                              batch_first=True,
                              dropout=0,
                              bidirectional=bidirectional)
Example #6
0
def getELMo(vocab, unidir, downstream=False, mix_parameters=[1, 1, 1]):
    options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
    weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"

    vocab_to_cache = sorted(vocab.keys(), key=lambda t: vocab[t])
    if downstream:
        elmo = Elmo(options_file,
                    weight_file,
                    num_output_representations=1,
                    vocab_to_cache=vocab_to_cache)
    else:
        elmo = Elmo(options_file,
                    weight_file,
                    num_output_representations=1,
                    scalar_mix_parameters=mix_parameters,
                    vocab_to_cache=vocab_to_cache)

    if unidir:
        for l in ["backward_layer_0", "backward_layer_1"]:
            layer = getattr(elmo._elmo_lstm._elmo_lstm, l)
            for s in [
                    "input_linearity", "state_linearity", "state_projection"
            ]:
                subject = getattr(layer, s)
                for a in ["weight", "bias"]:
                    if hasattr(subject, a) and getattr(subject, a) is not None:
                        target = getattr(subject, a)
                        target.data.fill_(0.0)

    return elmo
Example #7
0
    def __init__(self, config, model):
        super().__init__()
        self.config = config
        self.logger = self.config.logger
        self.model = model
        self.model_path = config.dir_model
        self.use_elmo = config.use_elmo

        self.idx_to_tag = {
            idx: tag
            for tag, idx in self.config.vocab_tags.items()
        }

        self.criterion = CRF(self.config.ntags)
        self.optimizer = optim.Adam(self.model.parameters())

        if self.use_elmo:
            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file, weight_file, 2, dropout=0)
        else:
            self.load_emb()

        if USE_GPU:
            self.use_cuda = True
            self.logger.info("GPU found.")
            self.model = model.cuda()
            self.criterion = self.criterion.cuda()
            if self.use_elmo:
                self.elmo = self.elmo.cuda()
                print("Moved elmo to cuda")
        else:
            self.model = model.cpu()
            self.use_cuda = False
            self.logger.info("No GPU found.")
Example #8
0
 def __init__(self,
              char_vocab_size,
              glove_vocab_size,
              word_vocab_size,
              embed_dim,
              dropout,
              elmo=False,
              elmo_options_file=None,
              elmo_weights_file=None,
              glove_cpu=False):
     super(Embedding, self).__init__()
     self.word_embedding = WordEmbedding(word_vocab_size, embed_dim)
     self.char_embedding = CharEmbedding(char_vocab_size, embed_dim)
     self.glove_embedding = WordEmbedding(glove_vocab_size,
                                          embed_dim,
                                          requires_grad=False,
                                          cpu=glove_cpu)
     self.output_size = 2 * embed_dim
     self.highway1 = Highway(self.output_size, dropout)
     self.highway2 = Highway(self.output_size, dropout)
     if elmo:
         assert elmo_options_file is not None and elmo_weights_file is not None
         from allennlp.modules.elmo import Elmo
         self.elmo = Elmo(elmo_options_file,
                          elmo_weights_file,
                          1,
                          dropout=0)
         self.output_size += self.elmo.get_output_dim()
     else:
         self.elmo = None
Example #9
0
    def __init__(self, config, model_dir, device=None):
        self.config = config
        self.model_dir = model_dir
        self.log_file = os.path.join(model_dir, 'log.csv')

        self.device = get_device(device)

        self.slu_cls = getattr(modules, config['model']['name'])
        self.slu = self.slu_cls(config['model'])

        self.use_elmo = config.get("use_elmo", False)
        if self.use_elmo:
            option_file = config["elmo"]["option_file"]
            weight_file = config["elmo"]["weight_file"]
            self.elmo = Elmo(option_file, weight_file, 1, dropout=0)
            self.slu.elmo_scalar_mixes = nn.ModuleList(self.elmo._scalar_mixes)

            if len(config["elmo"].get("checkpoint", "")) > 0:
                self.elmo._elmo_lstm = torch.load(
                    config["elmo"]["checkpoint"]).elmo
                for param in self.elmo._elmo_lstm.parameters():
                    param.requires_grad_(False)

            self.elmo.to(self.device)

        self.slu.to(self.device)
Example #10
0
    def __init__(
        self,
        options_file:
        str = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/"
        + "elmo_2x4096_512_2048cnn_2xhighway_options.json",
        weight_file:
        str = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/"
        + "elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
        do_layer_norm: bool = False,
        dropout: float = 0.5,
        requires_grad: bool = False,
        projection_dim: int = None,
        vocab_to_cache: List[str] = None,
        scalar_mix_parameters: List[float] = None,
    ) -> None:
        super().__init__()

        self._elmo = Elmo(
            options_file,
            weight_file,
            1,
            do_layer_norm=do_layer_norm,
            dropout=dropout,
            requires_grad=requires_grad,
            vocab_to_cache=vocab_to_cache,
            scalar_mix_parameters=scalar_mix_parameters,
        )
        if projection_dim:
            self._projection = torch.nn.Linear(self._elmo.get_output_dim(),
                                               projection_dim)
            self.output_dim = projection_dim
        else:
            self._projection = None
            self.output_dim = self._elmo.get_output_dim()
def main():
    assert config['config_target'] == 'naive_psychology'

    if args.weight_name == 'elmo':
        lm = Elmo(args.elmo_option_file, args.elmo_weight_file, 1, dropout=0)
        tokenizer = MosesTokenizer(lang='en')
    else:
        # tokenizer
        tokenizer = AutoTokenizer.from_pretrained(
            args.weight_name, cache_dir=args.cache_dir
        )

        # language model
        lm = AutoModel.from_pretrained(args.weight_name,
                                       cache_dir=args.cache_dir)
    if args.gpu_id != -1:
        lm = lm.cuda(args.gpu_id)


    # dataset
    corpus = NaivePsychology(config['file_path'])
    # from the original dev, extract our train split
    train_sids, dev_sids = load_splits(config['split_dir'])
    process_split(corpus.dev_generator, tokenizer, lm, 'train', train_sids)

    # from the original dev, extract our dev split
    process_split(corpus.dev_generator, tokenizer, lm, 'dev', dev_sids)

    test_sids = set([sid for sid, _ in corpus.test_generator()])

    process_split(corpus.test_generator, tokenizer, lm, 'test', test_sids)
Example #12
0
    def __init__(
        self,
        options_file: str,
        weight_file: str,
        do_layer_norm: bool = False,
        dropout: float = 0.5,
        requires_grad: bool = False,
        projection_dim: int = None,
        vocab_to_cache: List[str] = None,
        scalar_mix_parameters: List[float] = None,
    ) -> None:
        super().__init__()

        self._elmo = Elmo(
            options_file,
            weight_file,
            1,
            do_layer_norm=do_layer_norm,
            dropout=dropout,
            requires_grad=requires_grad,
            vocab_to_cache=vocab_to_cache,
            scalar_mix_parameters=scalar_mix_parameters,
        )
        if projection_dim:
            self._projection = torch.nn.Linear(self._elmo.get_output_dim(),
                                               projection_dim)
            self.output_dim = projection_dim
        else:
            self._projection = None
            self.output_dim = self._elmo.get_output_dim()
class ELMoVectors(object):
    def __init__(self, size_elmo, device):
        self.size_elmo = size_elmo
        self.device = device
        self.model = Elmo(options_files[size_elmo],
                          weight_files[size_elmo],
                          1,
                          dropout=0.,
                          requires_grad=False)
        self.model.to(device)

    def get_embedding_size(self):
        return elmo_emb_size[self.size_elmo]

    def transform(self, X):
        # split all text by sentence for character embeding of a sentence
        X = self.tokenize(X)
        word_token = batch_to_ids(X).to(self.device)
        #word_emb = torch.LongTensor(word_emb).to(self.device)
        word_emb = self.model(word_token)

        # del useless varaibles
        del word_token

        return word_emb['elmo_representations'][0]

    def tokenize(self, X):
        for i in range(len(X)):
            X[i] = X[i].split(' ')
        return X
Example #14
0
class getElmo(nn.Module):
    def __init__(self, layer=2, dropout=0, out_dim=100, gpu=True):
        super(getElmo, self).__init__()
        options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
        weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
        self.dropout = dropout
        self.gpu = gpu
        self.Elmo = Elmo(options_file, weight_file, layer, dropout=dropout)
        self.Elmo.eval()
        self.layers2one = nn.Linear(
            layer, 1).cuda() if self.gpu else nn.Linear(layer, 1)
        self.optLinear = nn.Linear(
            1024, out_dim).cuda() if self.gpu else nn.Linear(1024, out_dim)

    def forward(self, texts):
        word_idxs = batch_to_ids(texts).cuda() if self.gpu else batch_to_ids(
            texts)
        elmo_embs = self.Elmo.forward(word_idxs)
        elmo_reps = torch.stack(elmo_embs['elmo_representations'],
                                dim=-1).cuda() if self.gpu else torch.stack(
                                    elmo_embs['elmo_representations'], dim=-1)
        elmo_decrease_layer = self.layers2one(elmo_reps).squeeze()
        elmo_fit_hidden = self.optLinear(elmo_decrease_layer)
        mask = elmo_embs['mask']

        return elmo_fit_hidden, mask
    def elmo_encode(self, data, __id2word):
        """
        get the id2word from vocab, then convert to id
        from allennlp.modules.elmo import Elmo, batch_to_ids
        batch_to_id fills to the max sentence length, which could be less than desired
        So further fill it to get to the max sent length
        """
        data_text = [self.glove_tokenizer(x, __id2word) for x in data]

        with torch.no_grad():
            elmo = Elmo(options_file, weight_file, 2, dropout=0).cuda()
            elmo.eval()
            character_ids = batch_to_ids(data_text).cuda()

            row_num = character_ids.shape[0]
            elmo_dim = self.elmo_dim

            if torch.sum(character_ids) != 0:
                elmo_emb = elmo(character_ids)['elmo_representations']
                elmo_emb = (elmo_emb[0] + elmo_emb[1]) / 2  # avg of two layers
            else:
                elmo_emb = torch.zeros([row_num, self.sent_pad_len, elmo_dim],
                                       dtype=torch.float)

        sent_len = elmo_emb.shape[1]

        if sent_len < self.sent_pad_len:
            fill_sent_len = self.sent_pad_len - sent_len
            # create a bunch of 0's to fill it up
            filler = torch.zeros([row_num, fill_sent_len, elmo_dim],
                                 dtype=torch.float)
            elmo_emb = torch.cat((elmo_emb, filler.cuda()), dim=1)
        return elmo_emb.cuda()
Example #16
0
class ElmoEmbedding:
    def __init__(self, dim):
        if dim == 2048:
            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
        elif dim == 512:
            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json"
            weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5"
        self.dim = dim
        self.elmo = Elmo(options_file, weight_file, 2, dropout=0)
        if func.gpu_available():
            self.elmo = self.elmo.cuda()
        self.elmo.eval()
        self.load()


    def save(self):
        pass


    def load(self):
        self.cache = DiskDict(f'./generate/elmo.{self.dim}.cache')


    def convert(self, sentences):
        not_hit = set()
        for sent in sentences:
            key = self.make_key(sent)
            if key not in self.cache:
                not_hit.add(key)
        not_hit = list(not_hit)
        if not_hit:
            embeddings, masks = self.convert_impl([self.make_sentence(key) for key in not_hit])
            for key, embedding, mask in zip(not_hit, torch.unbind(embeddings), torch.unbind(masks)):
                embedding = embedding[:mask.sum()]
                self.cache[key] = embedding.tolist()
        embeddings = [func.tensor(self.cache[self.make_key(sent)]) for sent in sentences]
        mlen = max([e.shape[0] for e in embeddings])
        embeddings = [func.pad_zeros(e, mlen, 0) for e in embeddings]
        embeddings = torch.stack(embeddings)
        assert embeddings.requires_grad == False
        return embeddings


    def make_key(self, sent):
        return '$$'.join(sent)


    def make_sentence(self, key):
        return key.split('$$')


    def convert_impl(self, sentences):
        character_ids = func.tensor(batch_to_ids(sentences))
        m = self.elmo(character_ids)
        embeddings = m['elmo_representations']
        embeddings = torch.cat(embeddings, -1)
        mask = m['mask']
        return embeddings, mask
Example #17
0
 def __init__(self, **kwargs):
     kwargs.pop('use_cuda')
     self._embedder = Elmo(config.ELMO_OPTIONS,
                           config.ELMO_WEIGHTS,
                           num_output_representations=1,
                           **kwargs)
     self._embedder = self._embedder.cuda()
     self.embedding_dim = 1024
 def __init__(self, size_elmo, device):
     self.size_elmo = size_elmo
     self.device = device
     self.model = Elmo(options_files[size_elmo],
                       weight_files[size_elmo],
                       1,
                       dropout=0.,
                       requires_grad=False)
     self.model.to(device)
Example #19
0
    def __init__(self, args):

        super(ElmoWrapper, self).__init__()

        options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
        weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
        self.elmo = Elmo(options_file, weight_file, 2,
                         dropout=0.0).to(args.device)  # 2 layers
        self.elmo.eval()
Example #20
0
 def __init__(self, device):
     self.device = device
     bioelmo_options_file = "/home/soumyasharma/datafiles/biomed_elmo_options.json"
     bioelmo_weight_file = "/home/soumyasharma/datafiles/biomed_elmo_weights.hdf5"
     # Compute two different representation for each token.
     # Each representation is a linear weighted combination for the
     # 3 layers in ELMo (i.e., charcnn, the outputs of the two BiLSTM))
     self.model = Elmo(bioelmo_options_file, bioelmo_weight_file, 2, dropout=0)
     self.model = self.model.to(self.device)
    def __init__(self,
                 emb_dim,
                 h_dim,
                 n_labels,
                 v_size,
                 gpu=True,
                 v_vec=None,
                 batch_first=True,
                 emb_type=None,
                 elmo_model_dir=None):
        super(BiLSTM, self).__init__()
        self.gpu = gpu
        self.h_dim = h_dim
        if self.h_dim is None:
            self.h_dim = emb_dim + 36
        if emb_type == 'ELMo':
            options_file = f'{elmo_model_dir}/options.json'
            weight_file = f'{elmo_model_dir}/weights.hdf5'
            self.word_embed = Elmo(options_file,
                                   weight_file,
                                   num_output_representations=1,
                                   dropout=0)
            if gpu:
                self.word_embed = self.word_embed.cuda()
        elif emb_type == 'ELMoForManyLangs':
            from elmoformanylangs import Embedder
            e = Embedder(elmo_model_dir)
            self.word_embed = e.sents2elmo
        elif emb_type == 'None':
            self.word_embed = None
        else:
            self.word_embed = nn.Embedding(v_size, emb_dim, padding_idx=0)
        if v_vec is not None:
            v_vec = torch.tensor(v_vec)
            self.word_embed.weight.data.copy_(v_vec)

        feature_embed_layers = []
        feature_embed_size = {
            "feature:0": 25,
            "feature:1": 26,
            "feature:2": 12,
            "feature:3": 6,
            "feature:4": 94,
            "feature:5": 32
        }
        for key in feature_embed_size:
            size = feature_embed_size[key]
            feature_embed = nn.Embedding(size, 5, padding_idx=0)
            feature_embed.weight.data[0] = torch.zeros(5)
            feature_embed_layers.append(feature_embed)
        self.feature_embed_layers = nn.ModuleList(feature_embed_layers)
        self.drop_target = nn.Dropout(p=0.2)
        self.lstm = nn.LSTM(input_size=emb_dim + 36,
                            hidden_size=self.h_dim,
                            batch_first=batch_first,
                            bidirectional=True)
        self.l1 = nn.Linear(self.h_dim * 2, n_labels)
Example #22
0
 def init_elmo(self):
     '''
     initilize the ELMo model
     '''
     self.elmo = Elmo(self.opt.elmo_options_file, self.opt.elmo_weight_file,
                      1)
     for param in self.elmo.parameters():
         param.requires_grad = False
     self.word_dim = self.opt.elmo_dim
Example #23
0
def load_elmo(opt):
    options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
    weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"

    elmo = Elmo(options_file, weight_file, 3, dropout=0,
                requires_grad=False)  # by default all 3 layers are output
    if opt.gpuid != -1:
        elmo = elmo.cuda()
    return elmo
def main():
    # Load generated model file
    archive = load_archive(args.archive_path)
    model = archive.model
    finetuned_elmo_state_dict = model._contextualizer._elmo.state_dict()

    # Load ELMo options and weights file
    elmo = Elmo(args.options_file, args.weight_file, 1)
    original_elmo_state_dict = elmo.state_dict()

    # Get the average parameter shift in the token embedder.
    token_embedder_total_shift = 0.0
    token_embedder_num_params = 0.0
    for key, parameter in finetuned_elmo_state_dict.items():
        if "token_embedder" in key:
            token_embedder_num_params += parameter.numel()
            token_embedder_total_shift += torch.abs(
                parameter - original_elmo_state_dict[key]).sum().item()
    logger.info("Average Shift (L1 distance) in token embedder: {}".format(
        token_embedder_total_shift / token_embedder_num_params))

    # Get the average parameter shift in the first layer of the LSTM.
    layer_0_total_shift = 0.0
    layer_0_num_params = 0.0
    for key, parameter in finetuned_elmo_state_dict.items():
        if "backward_layer_0" in key or "forward_layer_0" in key:
            layer_0_num_params += parameter.numel()
            layer_0_total_shift += torch.abs(
                parameter - original_elmo_state_dict[key]).sum().item()
    logger.info("Average Shift (L1 distance) in LSTM Layer 0: {}".format(
        layer_0_total_shift / layer_0_num_params))

    # Get the average parameter shift in the second layer of the LSTM.
    layer_1_total_shift = 0.0
    layer_1_num_params = 0.0
    for key, parameter in finetuned_elmo_state_dict.items():
        if "backward_layer_1" in key or "forward_layer_1" in key:
            layer_1_num_params += parameter.numel()
            layer_1_total_shift += torch.abs(
                parameter - original_elmo_state_dict[key]).sum().item()
    logger.info("Average Shift (L1 distance) in LSTM Layer 1: {}".format(
        layer_1_total_shift / layer_1_num_params))

    # Print the scalar mix parameters of the fine-tuned model.
    normed_scalars = torch.nn.functional.softmax(torch.cat([
        parameter for key, parameter in finetuned_elmo_state_dict.items()
        if "scalar_parameters" in key
    ]),
                                                 dim=0)
    normed_scalars = torch.split(normed_scalars, split_size_or_sections=1)
    normed_scalars = [normed_scalar.item() for normed_scalar in normed_scalars]
    logger.info(
        "Normalized Scalar Mix of fine-tuned model: {}".format(normed_scalars))

    # Print the gamma
    logger.info("Gamma of fine-tuned model: {}".format(
        finetuned_elmo_state_dict["scalar_mix_0.gamma"].item()))
 def __init__(self, config):
     super().__init__()
     elmo_path = config['elmo']
     elmo_option_file = os.path.join(
         elmo_path, "elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json")
     elmo_weight_file = os.path.join(
         elmo_path, "elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5")
     self.elmo = Elmo(elmo_option_file, elmo_weight_file, 2)
     for p in self.elmo.parameters():
         p.requires_grad = False
Example #26
0
    def __init__(self,
                 options_file="/beegfs/ijh216/elmo/options.json",
                 weights_file="/beegfs/ijh216/elmo/weights.hdf5"):

        super(ElmoEmbedder, self).__init__()

        self.elmo = Elmo(options_file, weights_file, 1, requires_grad=False)

        if torch.cuda.is_available():
            self.elmo = self.elmo.cuda()
Example #27
0
def get_elmo(options_file, weight_file, gpu, dropout):
    global elmo
    # Create the ELMo class.  This example computes two output representation
    # layers each with separate layer weights.
    # We recommend adding dropout (50% is good default) either here or elsewhere
    # where ELMo is used (e.g. in the next layer bi-LSTM).
    elmo = Elmo(options_file, weight_file, num_output_representations=2,
                do_layer_norm=False, dropout=dropout)

    if gpu:
        elmo.cuda()
Example #28
0
    def setup(self):
        print("Setting up Elmo Embedding")
        self.vocab = self.shared_resources
        self.config = self.shared_resources.config
        self.embeddings = self.shared_resources.embeddings
        if self.embeddings is not None:
            self.__default_vec = np.zeros([self.embeddings.shape[-1]])

        self.elmo = Elmo(options_file, weight_file, 1, dropout=0)
        if torch.cuda.is_available():
            self.elmo.cuda()
Example #29
0
    def create_elmo_embed(self, opt={}, prefix='elmo'):
        # TODO
        options_file = os.path.join(opt['data_dir'],
                                    opt.get('{}_options_file'.format(prefix)))
        weights_file = os.path.join(opt['data_dir'],
                                    opt.get('{}_weights_file'.format(prefix)))
        self.elmo = Elmo(options_file, weights_file, 2, dropout=0)

        self.elmo_output_dim = self.elmo.get_output_dim()

        return self.elmo_output_dim
Example #30
0
 def __init__(self, params):
     super(ElmoEmbedding, self).__init__()
     self.weight_file = weight_file
     self.options_file = options_file
     self.elmo_emb_size = params['emb_elmo_size']
     self.layer_weight = nn.Parameter(torch.tensor([0.5,0.5], device = device))
     self.gamma = nn.Parameter(torch.ones(1,device = device))
     self.mlp = nn.Sequential(nn.Linear(1024, self.elmo_emb_size), nn.ReLU())
     self.elmo = Elmo(self.options_file, self.weight_file, 2)
     if USE_CUDA:
         self.elmo.cuda()
Example #31
0
    def __init__(self,
                 options_file: str,
                 weight_file: str,
                 do_layer_norm: bool = False,
                 dropout: float = 0.5,
                 requires_grad: bool = False,
                 projection_dim: int = None,
                 vocab_to_cache: List[str] = None,
                 scalar_mix_parameters: List[float] = None) -> None:
        super(ElmoTokenEmbedder, self).__init__()

        self._elmo = Elmo(options_file,
                          weight_file,
                          1,
                          do_layer_norm=do_layer_norm,
                          dropout=dropout,
                          requires_grad=requires_grad,
                          vocab_to_cache=vocab_to_cache,
                          scalar_mix_parameters=scalar_mix_parameters)
        if projection_dim:
            self._projection = torch.nn.Linear(self._elmo.get_output_dim(), projection_dim)
            self.output_dim = projection_dim
        else:
            self._projection = None
            self.output_dim = self._elmo.get_output_dim()
Example #32
0
    def test_elmo_bilm_can_handle_higher_dimensional_input_with_cache(self):
        sentences = [["This", "is", "a", "sentence"],
                     ["Here", "'s", "one"],
                     ["Another", "one"]]
        vocab, tensor = self.get_vocab_and_both_elmo_indexed_ids(sentences)
        words_to_cache = list(vocab.get_token_to_index_vocabulary("tokens").keys())
        elmo_bilm = Elmo(self.options_file, self.weight_file, 1, vocab_to_cache=words_to_cache)
        elmo_bilm.eval()

        individual_dim = elmo_bilm(tensor["character_ids"], tensor["tokens"])
        elmo_bilm = Elmo(self.options_file, self.weight_file, 1, vocab_to_cache=words_to_cache)
        elmo_bilm.eval()

        expanded_word_ids = torch.stack([tensor["tokens"] for _ in range(4)], dim=1)
        expanded_char_ids = torch.stack([tensor["character_ids"] for _ in range(4)], dim=1)
        expanded_result = elmo_bilm(expanded_char_ids, expanded_word_ids)
        split_result = [x.squeeze(1) for x in torch.split(expanded_result["elmo_representations"][0], 1, dim=1)]
        for expanded in split_result:
            numpy.testing.assert_array_almost_equal(expanded.data.cpu().numpy(),
                                                    individual_dim["elmo_representations"][0].data.cpu().numpy())
#print (passage_mask)

#question_lstm_mask = None; passage_lstm_mask = None


"""
################### EMBEDDING LAYER  #########################################
"""
print ("-------------- EMBEDDING LAYER ---------------")
if (use_ELMO):
    if (load_ELMO_experiments_flag):
        options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
        weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"

        print ("Loading ELMO")
        text_field_embedder = Elmo(options_file, weight_file, 2, dropout=0)
        print ("ELMO weights loaded")
else:
    text_field_embedder = TextFieldEmbedder()
    token_embedders = dict()
    text_field_embedder = Embedding(embedding_dim = 100, trainable = False)

## Parameters needed for the next layer
embedder_out_dim = text_field_embedder.get_output_dim()

print ("Embedder output dimensions: ", embedder_out_dim)
## Propagate the Batch though the Embedder
embeddings_batch_question = text_field_embedder(character_ids_question)["elmo_representations"][1]
embeddings_batch_passage = text_field_embedder( character_ids_passage)["elmo_representations"][1]

#print (embeddings_batch_question)
Example #34
0
class ElmoTokenEmbedder(TokenEmbedder):
    """
    Compute a single layer of ELMo representations.

    This class serves as a convenience when you only want to use one layer of
    ELMo representations at the input of your network.  It's essentially a wrapper
    around Elmo(num_output_representations=1, ...)

    Parameters
    ----------
    options_file : ``str``, required.
        An ELMo JSON options file.
    weight_file : ``str``, required.
        An ELMo hdf5 weight file.
    do_layer_norm : ``bool``, optional.
        Should we apply layer normalization (passed to ``ScalarMix``)?
    dropout : ``float``, optional.
        The dropout value to be applied to the ELMo representations.
    requires_grad : ``bool``, optional
        If True, compute gradient of ELMo parameters for fine tuning.
    projection_dim : ``int``, optional
        If given, we will project the ELMo embedding down to this dimension.  We recommend that you
        try using ELMo with a lot of dropout and no projection first, but we have found a few cases
        where projection helps (particulary where there is very limited training data).
    vocab_to_cache : ``List[str]``, optional, (default = 0.5).
        A list of words to pre-compute and cache character convolutions
        for. If you use this option, the ElmoTokenEmbedder expects that you pass word
        indices of shape (batch_size, timesteps) to forward, instead
        of character indices. If you use this option and pass a word which
        wasn't pre-cached, this will break.
    """
    def __init__(self,
                 options_file: str,
                 weight_file: str,
                 do_layer_norm: bool = False,
                 dropout: float = 0.5,
                 requires_grad: bool = False,
                 projection_dim: int = None,
                 vocab_to_cache: List[str] = None) -> None:
        super(ElmoTokenEmbedder, self).__init__()

        self._elmo = Elmo(options_file,
                          weight_file,
                          1,
                          do_layer_norm=do_layer_norm,
                          dropout=dropout,
                          requires_grad=requires_grad,
                          vocab_to_cache=vocab_to_cache)
        if projection_dim:
            self._projection = torch.nn.Linear(self._elmo.get_output_dim(), projection_dim)
        else:
            self._projection = None

    def get_output_dim(self):
        return self._elmo.get_output_dim()

    def forward(self, # pylint: disable=arguments-differ
                inputs: torch.Tensor,
                word_inputs: torch.Tensor = None) -> torch.Tensor:
        """
        Parameters
        ----------
        inputs: ``torch.Tensor``
            Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch.
        word_inputs : ``torch.Tensor``, optional.
            If you passed a cached vocab, you can in addition pass a tensor of shape
            ``(batch_size, timesteps)``, which represent word ids which have been pre-cached.

        Returns
        -------
        The ELMo representations for the input sequence, shape
        ``(batch_size, timesteps, embedding_dim)``
        """
        elmo_output = self._elmo(inputs, word_inputs)
        elmo_representations = elmo_output['elmo_representations'][0]
        if self._projection:
            projection = self._projection
            for _ in range(elmo_representations.dim() - 2):
                projection = TimeDistributed(projection)
            elmo_representations = projection(elmo_representations)
        return elmo_representations

    # Custom vocab_to_cache logic requires a from_params implementation.
    @classmethod
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'ElmoTokenEmbedder':  # type: ignore
        # pylint: disable=arguments-differ
        params.add_file_to_archive('options_file')
        params.add_file_to_archive('weight_file')
        options_file = params.pop('options_file')
        weight_file = params.pop('weight_file')
        requires_grad = params.pop('requires_grad', False)
        do_layer_norm = params.pop_bool('do_layer_norm', False)
        dropout = params.pop_float("dropout", 0.5)
        namespace_to_cache = params.pop("namespace_to_cache", None)
        if namespace_to_cache is not None:
            vocab_to_cache = list(vocab.get_token_to_index_vocabulary(namespace_to_cache).keys())
        else:
            vocab_to_cache = None
        projection_dim = params.pop_int("projection_dim", None)
        params.assert_empty(cls.__name__)
        return cls(options_file=options_file,
                   weight_file=weight_file,
                   do_layer_norm=do_layer_norm,
                   dropout=dropout,
                   requires_grad=requires_grad,
                   projection_dim=projection_dim,
                   vocab_to_cache=vocab_to_cache)