Example #1
0
    def _run_test(self, requires_grad):
        options_file = os.path.join(FIXTURES, 'options.json')
        weight_file = os.path.join(FIXTURES, 'lm_weights.hdf5')
        embedder = ElmoTokenEmbedder(options_file,
                                     weight_file,
                                     requires_grad=requires_grad)
        batch_size = 3
        seq_len = 4
        char_ids = Variable(
            torch.from_numpy(
                numpy.random.randint(0, 262, (batch_size, seq_len, 50))))
        embeddings = embedder(char_ids)
        loss = embeddings.sum()
        loss.backward()

        elmo_grads = [
            param.grad for name, param in embedder.named_parameters()
            if '_elmo_lstm' in name
        ]
        if requires_grad:
            # None of the elmo grads should be None.
            assert all([grad is not None for grad in elmo_grads])
        else:
            # All of the elmo grads should be None.
            assert all([grad is None for grad in elmo_grads])
Example #2
0
class ElmoWordEmbedding(torch.nn.Module):
    """
    Compute a single layer of ELMo word representations.
    """
    def __init__(self,
                 options_file: str,
                 weight_file: str,
                 vocab_to_cache: List[str],
                 do_layer_norm: bool = False,
                 dropout: float = 0.5,
                 requires_grad: bool = False,
                 projection_dim: int = None) -> None:
        super(ElmoWordEmbedding, self).__init__()

        self._elmo = ElmoTokenEmbedder(options_file=options_file,
                                       weight_file=weight_file,
                                       do_layer_norm=do_layer_norm,
                                       dropout=dropout,
                                       requires_grad=requires_grad,
                                       projection_dim=projection_dim,
                                       vocab_to_cache=vocab_to_cache)

        self._projection = self._elmo._projection

    def get_output_dim(self):
        if self._projection is not None:
            return self._projection.out_features
        else:
            return self._elmo.get_output_dim()

    def forward(self, word_inputs: torch.Tensor) -> torch.Tensor:
        if len(word_inputs.shape) == 1:
            word_inputs = word_inputs.unsqueeze(dim=-1)
        return self._elmo.forward(word_inputs, word_inputs)

    @property
    def weight(self):
        embedding_weight = torch.cat(
            (self.word_embedding.weight, self.word_embedding.weight), dim=1)
        if self._projection:
            embedding_weight = self._projection(embedding_weight)
        return embedding_weight

    @property
    def num_embeddings(self):
        return self.word_embedding.num_embeddings

    @property
    def word_embedding(self):
        return self._elmo._elmo._elmo_lstm._word_embedding
def load_elmo_embeddings(large=True):
    """
    Loads pre-trained ELMo embeddings ('large' model by default).
    
    Parameters
    ----------
    large: bool
        Set to True to load the Large ELMo model; False for small ELMo model
    
    Returns
    -------
    TextFieldEmbedder
    """
    if large:  # use the Large pre-trained model
        print("Loading LARGE ELMo..")
        options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json'
        weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5'

    else:  # use the Small pre-trained model
        print("Loading SMALL ELMo..")
        options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json'
        weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'

    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
    print("Pre-trained ELMo loaded..")
    return word_embeddings
Example #4
0
def get_elmo_embedder():
    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'

    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
    return word_embeddings
Example #5
0
def get_embedder_info(
    embedder_type: str
) -> Tuple[TokenEmbedder, TokenIndexer, str, Dict[str, Any]]:
    embedder_type = embedder_type.lower()
    text_field_embedder_kwargs: Dict[str, Any] = {}
    if embedder_type == 'ner_elmo':
        return NERElmoTokenEmbedder(), ELMoTokenCharactersIndexer(
        ), text_field_embedder_kwargs
    elif embedder_type == 'elmo':
        return ElmoTokenEmbedder(ELMO_OPTIONS_FILE,
                                 ELMO_WEIGHT_FILE), ELMoTokenCharactersIndexer(
                                 ), text_field_embedder_kwargs
    elif embedder_type == 'bert':
        bert_embedder = PretrainedBertEmbedder(
            pretrained_model="bert-base-uncased",
            top_layer_only=True,  # conserve memory
        )
        token_indexer = PretrainedBertIndexer(
            pretrained_model="bert-base-uncased",
            max_pieces=512,  # max pieces allowed for positional embeddings
            do_lowercase=True,
            use_starting_offsets=True,
        )
        text_field_embedder_kwargs['allow_unmatched_keys'] = True
        text_field_embedder_kwargs['embedder_to_indexer_map'] = {
            "tokens": ["tokens", "tokens-offsets"]
        }

        return bert_embedder, token_indexer, text_field_embedder_kwargs
    else:
        raise Exception(f'Unknown embedder type: {embedder_type}')
Example #6
0
def get_embeddings(embedder_type,
                   vocab,
                   embedding_dim=300,
                   bert_trainable=True):
    if embedder_type not in valid_embedders:
        raise Exception(f'Unknown embedder type {embedder_type}')
    vocab_size = vocab.get_vocab_size('tokens')
    token_embedders = {}
    if embedder_type == 'random':
        token_embedding = Embedding(vocab_size, embedding_dim, trainable=True)
        token_embedders['tokens'] = token_embedding
    if embedder_type in ['glove', 'elmo_and_glove']:
        weights = load_glove_weights(vocab)
        token_embedding = Embedding(vocab_size,
                                    embedding_dim,
                                    weight=weights,
                                    trainable=True)
        token_embedders['tokens'] = token_embedding
    if embedder_type in ['elmo', 'elmo_and_glove']:
        elmo_token_embedder = ElmoTokenEmbedder(
            'embeddings/elmo_2x4096_512_2048cnn_2xhighway_options.json',
            'embeddings/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5',
            do_layer_norm=False,
            dropout=0.5)
        token_embedders['elmo'] = elmo_token_embedder
    if 'bert' in embedder_type:
        token_embedders['bert'] = BertEmbedder(bert_type=embedder_type,
                                               trainable=bert_trainable)

    word_embeddings = BasicTextFieldEmbedder(token_embedders)
    return word_embeddings
Example #7
0
    def test_forward_works_with_projection_layer(self):
        params = Params({
            'options_file': self.FIXTURES_ROOT / 'elmo' / 'options.json',
            'weight_file': self.FIXTURES_ROOT / 'elmo' / 'lm_weights.hdf5',
            'projection_dim': 20
        })
        word1 = [0] * 50
        word2 = [0] * 50
        word1[0] = 6
        word1[1] = 5
        word1[2] = 4
        word1[3] = 3
        word2[0] = 3
        word2[1] = 2
        word2[2] = 1
        word2[3] = 0
        embedding_layer = ElmoTokenEmbedder.from_params(vocab=None,
                                                        params=params)
        input_tensor = torch.LongTensor([[word1, word2]])
        embedded = embedding_layer(input_tensor).data.numpy()
        assert embedded.shape == (1, 2, 20)

        input_tensor = torch.LongTensor([[[word1]]])
        embedded = embedding_layer(input_tensor).data.numpy()
        assert embedded.shape == (1, 1, 1, 20)
    def test_forward_works_with_projection_layer(self):
        params = Params({
            "options_file": self.FIXTURES_ROOT / "elmo" / "options.json",
            "weight_file": self.FIXTURES_ROOT / "elmo" / "lm_weights.hdf5",
            "projection_dim": 20,
        })
        word1 = [0] * 50
        word2 = [0] * 50
        word1[0] = 6
        word1[1] = 5
        word1[2] = 4
        word1[3] = 3
        word2[0] = 3
        word2[1] = 2
        word2[2] = 1
        word2[3] = 0
        embedding_layer = ElmoTokenEmbedder.from_params(vocab=None,
                                                        params=params)
        assert embedding_layer.get_output_dim() == 20

        input_tensor = torch.LongTensor([[word1, word2]])
        embedded = embedding_layer(input_tensor).data.numpy()
        assert embedded.shape == (1, 2, 20)

        input_tensor = torch.LongTensor([[[word1]]])
        embedded = embedding_layer(input_tensor).data.numpy()
        assert embedded.shape == (1, 1, 1, 20)
Example #9
0
    def _run_test(self, requires_grad):
        embedder = ElmoTokenEmbedder(self.options_file, self.weight_file, requires_grad=requires_grad)
        batch_size = 3
        seq_len = 4
        char_ids = torch.from_numpy(numpy.random.randint(0, 262, (batch_size, seq_len, 50)))
        embeddings = embedder(char_ids)
        loss = embeddings.sum()
        loss.backward()

        elmo_grads = [param.grad for name, param in embedder.named_parameters() if '_elmo_lstm' in name]
        if requires_grad:
            # None of the elmo grads should be None.
            assert all([grad is not None for grad in elmo_grads])
        else:
            # All of the elmo grads should be None.
            assert all([grad is None for grad in elmo_grads])
    def test_forward_works_with_projection_layer(self):
        params = Params({
                'options_file': self.FIXTURES_ROOT / 'elmo' / 'options.json',
                'weight_file': self.FIXTURES_ROOT / 'elmo' / 'lm_weights.hdf5',
                'projection_dim': 20
                })
        word1 = [0] * 50
        word2 = [0] * 50
        word1[0] = 6
        word1[1] = 5
        word1[2] = 4
        word1[3] = 3
        word2[0] = 3
        word2[1] = 2
        word2[2] = 1
        word2[3] = 0
        embedding_layer = ElmoTokenEmbedder.from_params(vocab=None, params=params)
        assert embedding_layer.get_output_dim() == 20

        input_tensor = torch.LongTensor([[word1, word2]])
        embedded = embedding_layer(input_tensor).data.numpy()
        assert embedded.shape == (1, 2, 20)

        input_tensor = torch.LongTensor([[[word1]]])
        embedded = embedding_layer(input_tensor).data.numpy()
        assert embedded.shape == (1, 1, 1, 20)
Example #11
0
    def test_context_sequence_encoding(self):
        elmo_credbank_model_path = load_abs_path(
            os.path.join(
                os.path.dirname(__file__), '..', "resource", "embedding",
                "elmo_model",
                "elmo_credbank_2x4096_512_2048cnn_2xhighway_weights_10052019.hdf5"
            ))

        elmo_embedder = ElmoTokenEmbedder(
            options_file=
            "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
            weight_file=elmo_credbank_model_path,
            do_layer_norm=False,
            dropout=0.5)
        word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})

        EXPECTED_CONTEXT_INPUT_SIZE = 60

        rumor_classifier = RumorTweetsClassifer(
            word_embeddings,
            None,
            None,
            None,
            classifier_feedforward=None,
            cxt_content_encoder=None,
            cxt_metadata_encoder=None,
            social_context_self_attention_encoder=None,
            cuda_device=-1)

        tweet_id = "500327120770301952"
        single_source_tweet_tensor_1 = self.tweet_context_encoding_by_tweet_id(
            rumor_classifier, tweet_id)
        print(type(single_source_tweet_tensor_1))
        print(single_source_tweet_tensor_1.shape)
        assert type(single_source_tweet_tensor_1) == torch.Tensor
        assert single_source_tweet_tensor_1.shape == (
            97, EXPECTED_CONTEXT_INPUT_SIZE
        ), "expected shape is [19, %s]" % EXPECTED_CONTEXT_INPUT_SIZE

        tweet_id = "552806117328568321"  # with three replies
        single_source_tweet_tensor_2 = self.tweet_context_encoding_by_tweet_id(
            rumor_classifier, tweet_id)
        print(type(single_source_tweet_tensor_2))
        print(single_source_tweet_tensor_2.shape)
        assert type(single_source_tweet_tensor_2) == torch.Tensor
        assert single_source_tweet_tensor_2.shape == (
            94, EXPECTED_CONTEXT_INPUT_SIZE
        ), "expected shape is [3, %s]" % EXPECTED_CONTEXT_INPUT_SIZE

        tweet_id = "552806117328568321"  # with three replies
        print("social context encoding without numerical feature .")
        single_source_tweet_tensor_2 = self.tweet_context_encoding_by_tweet_id(
            rumor_classifier, tweet_id, disable_nf=True)
        print(type(single_source_tweet_tensor_2))
        print(single_source_tweet_tensor_2.shape)
        assert type(single_source_tweet_tensor_2) == torch.Tensor
        assert single_source_tweet_tensor_2.shape == (
            94, EXPECTED_CONTEXT_INPUT_SIZE
        ), "expected shape is [3, %s]" % EXPECTED_CONTEXT_INPUT_SIZE
def build_elmo_model(vocab: Vocabulary) -> Model:
    print("Building the model")
    vocab_size = vocab.get_vocab_size("tokens")
    embedding = ElmoTokenEmbedder()
    embedder = BasicTextFieldEmbedder(token_embedders={'bert_tokens': embedding})
    encoder = BagOfEmbeddingsEncoder(embedding_dim=embedder.get_output_dim(), averaged=True)
    
    return SimpleClassifier(vocab, embedder, encoder)
Example #13
0
    def _run_test(self, requires_grad):
        embedder = ElmoTokenEmbedder(self.options_file, self.weight_file, requires_grad=requires_grad)
        batch_size = 3
        seq_len = 4
        char_ids = torch.from_numpy(numpy.random.randint(0, 262, (batch_size, seq_len, 50)))
        for _ in range(2):
            embeddings = embedder(char_ids)
            loss = embeddings.sum()
            loss.backward()

            elmo_grads = [param.grad for name, param in embedder.named_parameters() if '_elmo_lstm' in name]
            if requires_grad:
                # None of the elmo grads should be None.
                assert all([grad is not None for grad in elmo_grads])
            else:
                # All of the elmo grads should be None.
                assert all([grad is None for grad in elmo_grads])
Example #14
0
def run_ELMo_RSA(stim_file, header=False, filter_file=None):

    EXP = data.Stim(stim_file, header, filter_file, VOCAB_FILE)

    #Get tokenizer
    tokenizer = WhitespaceTokenizer()

    #Load model
    ##ELMo OG
    elmo_weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5'
    elmo_options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json'

    #ELMo Small
    #elmo_weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'
    #elmo_options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json'

    #ELMo Medium
    #elmo_weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5'
    #elmo_options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_options.json'

    #ELMo OG (5.5B)
    #elmo_weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5'
    #elmo_options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json'

    elmo_embedding = ElmoTokenEmbedder(options_file=elmo_options_file,
                                       weight_file=elmo_weight_file,
                                       dropout=0.0)
    embedder = BasicTextFieldEmbedder(
        token_embedders={'elmo_tokens': elmo_embedding})

    for x in range(len(EXP.SENTS)):
        sentences = list(EXP.SENTS[x])
        target = sentences[0]
        sentence = sentences[1]

        #GET BASELINE
        token_indexer = ELMoTokenCharactersIndexer()
        vocab = Vocabulary()

        target_tokens = tokenizer.tokenize(target)
        target_text_field = TextField(target_tokens,
                                      {'elmo_tokens': token_indexer})
        target_text_field.index(vocab)
        target_token_tensor = target_text_field.as_tensor(
            target_text_field.get_padding_lengths())
        target_tensor_dict = target_text_field.batch_tensors(
            [target_token_tensor])

        target_embedding = embedder(target_tensor_dict)[0]
        baseline = target_embedding[-1].data.cpu().squeeze()

        #GET SIMS
        sims = get_ELMo_sims(sentence, baseline, tokenizer, embedder)
        values = get_dummy_values(sentence)

        EXP.load_IT('elmo', x, values, False, sims)

    return EXP
Example #15
0
def main():
    elmo_token_indexer = ELMoTokenCharactersIndexer()

    reader = StanfordSentimentTreeBankDatasetReader(
        token_indexers={'tokens': elmo_token_indexer})

    train_dataset = reader.read(
        'data/stanfordSentimentTreebank/trees/train.txt')
    dev_dataset = reader.read('data/stanfordSentimentTreebank/trees/dev.txt')

    # You can optionally specify the minimum count of tokens/labels.
    # `min_count={'tokens':3}` here means that any tokens that appear less than three times
    # will be ignored and not included in the vocabulary.
    vocab = Vocabulary.from_instances(train_dataset + dev_dataset,
                                      min_count={'tokens': 3})

    # Use the 'Small' pre-trained model
    options_file = (
        'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo'
        '/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json'
    )
    weight_file = (
        'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo'
        '/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'
    )

    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)

    # BasicTextFieldEmbedder takes a dict - we need an embedding just for tokens,
    # not for labels, which are used as-is as the "answer" of the sentence classification
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})

    # Seq2VecEncoder is a neural network abstraction that takes a sequence of something
    # (usually a sequence of embedded word vectors), processes it, and returns a single
    # vector. Oftentimes this is an RNN-based architecture (e.g., LSTM or GRU), but
    # AllenNLP also supports CNNs and other simple architectures (for example,
    # just averaging over the input vectors).
    encoder = PytorchSeq2VecWrapper(
        torch.nn.LSTM(elmo_embedding_dim, HIDDEN_DIM, batch_first=True))

    model = LstmClassifier(word_embeddings, encoder, vocab)
    optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

    iterator = BucketIterator(batch_size=32,
                              sorting_keys=[("tokens", "num_tokens")])

    iterator.index_with(vocab)

    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      iterator=iterator,
                      train_dataset=train_dataset,
                      validation_dataset=dev_dataset,
                      patience=10,
                      num_epochs=20)

    trainer.train()
Example #16
0
    def __init__(self,
                 options_file: str,
                 weight_file: str,
                 vocab_to_cache: List[str],
                 do_layer_norm: bool = False,
                 dropout: float = 0.5,
                 requires_grad: bool = False,
                 projection_dim: int = None) -> None:
        super(ElmoWordEmbedding, self).__init__()

        self._elmo = ElmoTokenEmbedder(options_file=options_file,
                                       weight_file=weight_file,
                                       do_layer_norm=do_layer_norm,
                                       dropout=dropout,
                                       requires_grad=requires_grad,
                                       projection_dim=projection_dim,
                                       vocab_to_cache=vocab_to_cache)

        self._projection = self._elmo._projection
Example #17
0
def build_model(options_file, weight_file):
    vocab = Vocabulary()
    iterator = BucketIterator(batch_size=config.batch_size, sorting_keys=[("tokens", "num_tokens")])
    iterator.index_with(vocab)

    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
    encoder: Seq2VecEncoder = PytorchSeq2VecWrapper(nn.LSTM(word_embeddings.get_output_dim(), config.hidden_size, bidirectional=True, batch_first=True))
    model = BaselineModel(word_embeddings, encoder, vocab)

    return model, iterator, vocab
Example #18
0
    def test_cached_download(self):
        params = Params({
            "options_file":
            "hf://lysandre/test-elmo-tiny/options.json",
            "weight_file":
            "hf://lysandre/test-elmo-tiny/lm_weights.hdf5",
        })
        embedding_layer = ElmoTokenEmbedder.from_params(vocab=None,
                                                        params=params)

        assert isinstance(embedding_layer, ElmoTokenEmbedder
                          ), "Embedding layer badly instantiated from HF Hub."
        assert (embedding_layer.get_output_dim() == 32
                ), "Embedding layer badly instantiated from HF Hub."
Example #19
0
    def _run_test_with_vocab_to_cache(self, requires_grad):
        vocab_to_cache = ['<pad>', 'hello', 'world']
        embedder = ElmoTokenEmbedder(self.options_file,
                                     self.weight_file,
                                     requires_grad=requires_grad,
                                     vocab_to_cache=vocab_to_cache)
        word_tensor = torch.LongTensor([[[1, 2]]])
        for _ in range(2):
            embeddings = embedder(word_tensor, word_tensor)
            loss = embeddings.sum()
            loss.backward()

            elmo_grads = [param.grad for name, param in embedder.named_parameters()
                          if '_elmo_lstm' in name and '_token_embedder' not in name]
            if requires_grad:
                # None of the elmo grads should be None.
                assert all([grad is not None for grad in elmo_grads])
            else:
                # All of the elmo grads should be None.
                assert all([grad is None for grad in elmo_grads])

            assert all([param.grad is None for name, param in embedder.named_parameters()
                        if '_token_embedder' in name])
    def test_vocab_extension_attempt_does_not_give_error(self):
        # It shouldn't give error if TokenEmbedder does not extend the method `extend_vocab`

        params = Params({
            'options_file':
            self.FIXTURES_ROOT / 'elmo' / 'options.json',
            'weight_file':
            self.FIXTURES_ROOT / 'elmo' / 'lm_weights.hdf5'
        })
        embedding_layer = ElmoTokenEmbedder.from_params(vocab=None,
                                                        params=params)

        vocab = Vocabulary()
        vocab.add_token_to_namespace('word1')
        vocab.add_token_to_namespace('word2')

        # This should just pass and be no-op
        embedding_layer.extend_vocab(vocab)
Example #21
0
def main ():
	#Initlizing the embeddings (ELMO)
	elmo_token_indexer = ELMoTokenCharactersIndexer()

	reader = AnalogyDatasetReader(token_indexers={'tokens':elmo_token_indexer})

	train_dataset, test_dataset, dev_dataset = (reader.read(DATA_ROOT + "/" + fname) for fname in ["train_all.txt", "test_all.txt", "val_all.txt"])

	# elmo_embedder = Elmo(options_file, weight_file, 2, dropout=0.5)
	elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
	
	vocab = Vocabulary.from_instances(train_dataset + test_dataset + dev_dataset)
	word_embeddings = BasicTextFieldEmbedder({'tokens': elmo_embedder})
	#Initializing the model
	#takes the hidden state at the last time step of the LSTM for every layer as one single output
	lstm_encoder = PytorchSeq2VecWrapper(torch.nn.LSTM(elmo_embedding_dim, hidden_dim, batch_first=True, bidirectional=True))
	model = LstmModel(word_embeddings, lstm_encoder, vocab)

	if USE_GPU: model.cuda()
	else: model

	# Training the model 
	optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
	iterator = BucketIterator(batch_size=32, sorting_keys=[("tokens", "num_tokens")])
	iterator.index_with(vocab)

	trainer = Trainer(model=model,
                  optimizer=optimizer,
                  iterator=iterator,
                  train_dataset=train_dataset,
                  validation_dataset=dev_dataset,
                  patience=10,
                  cuda_device=0 if USE_GPU else -1,
                  num_epochs=20)

	trainer.train()

	#Saving the model
	with open("model.th", 'wb') as f:
		torch.save(model.state_dict(), f)

	vocab.save_to_files("vocabulary")
def load_elmo_model():
    elmo_embedders = ElmoTokenEmbedder(OPTION_FILE, WEIGHT_FILE)
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedders})

    encoder = PytorchSeq2VecWrapper(
        torch.nn.LSTM(word_embeddings.get_output_dim(),
                      HIDDEN_DIM,
                      bidirectional=True,
                      batch_first=True))

    vocabulary = Vocabulary()

    model = BaseModel(word_embeddings=word_embeddings,
                      encoder=encoder,
                      vocabulary=vocabulary)

    output_elmo_model_file = os.path.join(PRETRAINED_ELMO,
                                          "lstm_elmo_model.bin")
    model.load_state_dict(torch.load(output_elmo_model_file))
    return model
Example #23
0
def sequence_labelling():

    # Index each token as a sequence of character Ids (ELMo)
    token_indexers = {"tokens": ELMoTokenCharactersIndexer()}

    # Read the data
    reader = SequenceLabellingDatasetReader(token_indexers)
    training_data = reader.read(path='data/sequence_labelling/train.txt')
    validation_data = reader.read(path='data/sequence_labelling/test.txt')
    test_data = reader.read(path='data/sequence_labelling/test.txt')

    # Create a vocabulary
    vocabulary = Vocabulary.from_instances(training_data + validation_data +
                                           test_data)

    # Use ELMo embeddings
    elmo = ElmoTokenEmbedder(options_file=ELMO_OPTIONS_FILE,
                             weight_file=ELMO_WEIGHTS_FILE)

    embedder = BasicTextFieldEmbedder(token_embedders={"tokens": elmo})

    # Our text classifier will use a CNN encoder

    lstm_layer = LSTM(input_size=ELMO_EMBEDDING_DIM,
                      hidden_size=HIDDEN_SIZE,
                      bidirectional=True,
                      batch_first=True)
    lstm_encoder = PytorchSeq2SeqWrapper(module=lstm_layer)

    model = SequenceLabeller(vocabulary=vocabulary,
                             embedder=embedder,
                             encoder=lstm_encoder)

    print("\nModel :\n")
    print(model)

    # Training
    train_model(model, training_data, validation_data, vocabulary)

    # Evaluation
    evaluate_sequence_labelling_model(model, test_data)
Example #24
0
def get_model(vocab, params):
    emb_d = params["embedding_dim"]
    hidden_d = params["hidden_dim"]

    use_elmo_embeddings = params['use_elmo']
    use_lstm = params['use_lstm']
    n_layers = params["num_layers"]

    bidirectional = params['bidirectional']

    if use_elmo_embeddings:
        token_embedder = ElmoTokenEmbedder(ELMO_OPTIONS_FILE,
                                           ELMO_WEIGHTS_FILE)
    else:
        token_embedder = Embedding(
            num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=emb_d)

    word_embedder = BasicTextFieldEmbedder({"tokens": token_embedder})
    emb_d = word_embedder.get_output_dim()

    if use_lstm:
        encoder = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(emb_d,
                          hidden_d,
                          num_layers=n_layers,
                          batch_first=True,
                          bidirectional=bidirectional))
    else:
        encoder = PytorchSeq2SeqWrapper(
            torch.nn.GRU(emb_d,
                         hidden_d,
                         num_layers=n_layers,
                         batch_first=True,
                         bidirectional=bidirectional))

    model = NerModel(word_embedder,
                     encoder,
                     vocab,
                     num_categories=(3 if params["dataset"] == "senti" else 4))
    return model
Example #25
0
def predict():
	elmo_token_indexer = ELMoTokenCharactersIndexer()

	reader = AnalogyDatasetReader(token_indexers={'tokens':elmo_token_indexer})

	train_dataset, test_dataset, dev_dataset = (reader.read(DATA_ROOT + "/" + fname) for fname in ["train_all.txt", "test_all.txt", "val_all.txt"])

	# elmo_embedder = Elmo(options_file, weight_file, 2, dropout=0.5)
	elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
	
	word_embeddings = BasicTextFieldEmbedder({'tokens': elmo_embedder})
	lstm_encoder = PytorchSeq2VecWrapper(torch.nn.LSTM(elmo_embedding_dim, hidden_dim, batch_first=True, bidirectional=True))

	vocab2 = Vocabulary.from_files("./vocabulary")
	model2 = LstmModel(word_embeddings, lstm_encoder, vocab2)

	if USE_GPU: model2.cuda()
	else: model2

	with open("./model.th", 'rb') as f:
		model2.load_state_dict(torch.load(f))
	
	predictor2 = SentenceClassifierPredictor(model2, dataset_reader=reader)
	with open('test.txt', 'w+') as f:
		top_10_words_list = []
		for analogy_test in test_dataset:
			logits = predictor2.predict_instance(analogy_test)['logits']
			label_id = np.argmax(logits)
			label_predict = model2.vocab.get_token_from_index(label_id, 'labels')

			top_10_ids = np.argsort(logits)[-10:]
			top_10_words = [model2.vocab.get_token_from_index(id, 'labels') for id in top_10_ids]
			top_10_words_list.append(top_10_words)
			f.write(label_predict + "\n")

	top_10_words_list = np.array(top_10_words_list)
	print(top_10_words_list.shape)
	np.save('elmo_top_10_words_list.npy', np.array(top_10_words_list))
Example #26
0
    def test_context_feature_encoder(self):
        elmo_credbank_model_path = load_abs_path(
            os.path.join(
                os.path.dirname(__file__), '..', "resource", "embedding",
                "elmo_model",
                "elmo_credbank_2x4096_512_2048cnn_2xhighway_weights_10052019.hdf5"
            ))

        # test context feature encoding with small sample data
        #    to make sure that source tweet context are sorted in  temporal order
        elmo_embedder = ElmoTokenEmbedder(
            options_file=
            "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
            weight_file=elmo_credbank_model_path,
            do_layer_norm=False,
            dropout=0.5)
        word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
        rumor_classifier = RumorTweetsClassifer(word_embeddings, None, None,
                                                None, None)

        propagation_embeddings_tensor = rumor_classifier.batch_compute_context_feature_encoding(
            ['500294803402137600', '500327120770301952'])
        print("propagation_embeddings_tensor: ", propagation_embeddings_tensor)
def get_predictor():
    EMBEDDING_DIM = 128
    HIDDEN_DIM = 60  #128
    MAX_LEN = 70
    dropout = 0.25
    lstm_layers = 2
    #  pre-trained model
    options_file = (
        'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo'
        '/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json'
    )
    weight_file = (
        'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo'
        '/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'
    )

    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    vocab = Vocabulary.from_files(data_dir +
                                  "vocabulary_allennlp_imdb_twoclass")
    word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})

    elmo_embedding_dim = 256
    lstm = PytorchSeq2VecWrapper(
        torch.nn.LSTM(elmo_embedding_dim,
                      HIDDEN_DIM,
                      bidirectional=True,
                      num_layers=lstm_layers,
                      dropout=dropout,
                      batch_first=True))
    model = LstmTwoClassifier(word_embeddings, lstm, vocab)
    net = torch.load("model_allen_imdb_twoclass.th", map_location=str(device))
    model.load_state_dict(net)
    elmo_token_indexer = ELMoTokenCharactersIndexer()
    readerSentence = SentenceDatasetReader(
        token_indexers={'tokens': elmo_token_indexer})

    return SentimentPredictor(model, dataset_reader=readerSentence)
Example #28
0
def build_model(vocab: Vocabulary) -> Model:
    print("Building the model")
    vocab_size_tokens = vocab.get_vocab_size("tokens")
    vocab_size_chars = vocab.get_vocab_size("token_characters")

    embedder = BasicTextFieldEmbedder({"tokens": Embedding(embedding_dim=embedding_dim, pretrained_file=f"{cur_dir}/glove/glove.6B.200d.txt", trainable=False, num_embeddings=vocab_size_tokens, vocab=vocab),\
                                        "elmo": ElmoTokenEmbedder(weight_file="https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5", options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json", do_layer_norm=False, dropout=0.0),\
                                        "token_characters":TokenCharactersEncoder(embedding=Embedding(embedding_dim=16, num_embeddings=vocab_size_chars, vocab=vocab), \
                                                                                encoder=CnnEncoder(embedding_dim=16, num_filters=128, ngram_filter_sizes=[3]))})
    encoder = PytorchTransformer(input_dim=1352,
                                 num_layers=6,
                                 positional_encoding="sinusoidal")

    # embedder = BasicTextFieldEmbedder({"tokens": Embedding(embedding_dim=embedding_dim, num_embeddings=vocab_size)})
    # encoder = BagOfEmbeddingsEncoder(embedding_dim=embedding_dim)
    # embedder = BasicTextFieldEmbedder({"tokens": PretrainedTransformerMismatchedEmbedder("bert-large-uncased")})
    # encoder = LstmSeq2SeqEncoder(input_size=1024, hidden_size=1024, num_layers=2, dropout=0.5, bidirectional=True)

    if args.pseudo:
        return PseudoCrfTagger(vocab, embedder, encoder, \
                label_encoding="BIOUL", include_start_end_transitions=False, num_virtual_models = num_virtual_models)
    else:
        return CrfTagger(vocab, embedder, encoder, \
                label_encoding="BIOUL", include_start_end_transitions=False)
Example #29
0
                                                      stratify=y_full,
                                                      train_size=0.9,
                                                      test_size=0.1)
vocab = Vocabulary.from_instances(train_ds + test_ds)
iterator = BucketIterator(batch_size=32,
                          biggest_batch_first=True,
                          sorting_keys=[("tokens", "num_tokens")],
                          padding_noise=.15)
iterator.index_with(vocab)
batch = next(iter(iterator(train_ds)))
EMBEDDING_DIM = 256
HIDDEN_DIM = 64
# These files are trained by us, for pretrained ELMO just to take pretrained ones
options_file = 'forELMO\\options.json'
weight_file = 'forELMO\\corp_trained.hdf5'
elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
word_embeddings = BasicTextFieldEmbedder({"tokens": elmo_embedder})
lstm = PytorchSeq2VecWrapper(
    torch.nn.LSTM(word_embeddings.get_output_dim(),
                  HIDDEN_DIM,
                  batch_first=True,
                  bidirectional=True))
model = BaselineModel(word_embeddings, lstm)
batch = nn_util.move_to_device(batch, 0)
train_dataset, val_dataset = train_test_split(train_ds,
                                              train_size=0.9,
                                              test_size=0.1,
                                              shuffle=True,
                                              stratify=y_train)
optimizer = optim.RMSprop(model.parameters(), lr=0.01)
if torch.cuda.is_available():
def train_only_lee():
    # This is WORKING! 
	# load datasetreader 
    # Save logging to a local file
    # Multitasking
    log.getLogger().addHandler(log.FileHandler(directory+"/log.log"))

    lr = 0.00001
    batch_size = 2
    epochs = 100
    max_seq_len = 512
    max_span_width = 30
    #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,)
    token_indexer = PretrainedBertIndexer("bert-base-cased", do_lowercase=False)
    reader = ConllCorefBertReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer})

    EMBEDDING_DIM = 1024
    HIDDEN_DIM = 200
    processed_reader_dir = Path(directory+"processed/")
    
    train_ds, val_ds, test_ds = load_lee(reader, directory)
    # restore checkpoint here
    from allennlp.modules.token_embedders import ElmoTokenEmbedder
    #vocab = Vocabulary.from_instances(train_ds + val_ds)
    vocab = Vocabulary()
    iterator = BasicIterator(batch_size=batch_size)
    iterator.index_with(vocab)

    val_iterator = BasicIterator(batch_size=batch_size)
    val_iterator.index_with(vocab)
    from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
    # here, allow_unmatched_key = True since we dont pass in offsets since 
    #we allow for word embedings of the bert-tokenized, wnot necessiarly the 
    # original tokens
    # see the documetnation for offsets here for more info:
    # https://github.com/allenai/allennlp/blob/master/allennlp/modules/token_embedders/bert_token_embedder.py
    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'
 
    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embedding = BasicTextFieldEmbedder({"tokens": elmo_embedder})#, allow_unmatched_keys=True)

    #word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder}, allow_unmatched_keys=True)
    #BERT_DIM = word_embedding.get_output_dim()
    ELMO_DIM = word_embedding.get_output_dim()
    # at each batch, sample from the two, and load th eLSTM
    shared_layer = torch.nn.LSTM(ELMO_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True)
    seq2seq = PytorchSeq2SeqWrapper(shared_layer)
    mention_feedforward = FeedForward(input_dim =512, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())
    antecedent_feedforward = FeedForward(input_dim =2304, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())

    model = CoreferenceResolver(vocab=vocab, text_field_embedder=word_embedding,context_layer= seq2seq, mention_feedforward=mention_feedforward,antecedent_feedforward=antecedent_feedforward , feature_size=768,max_span_width=max_span_width,spans_per_word=0.4,max_antecedents=250,lexical_dropout= 0.2)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # and then we can do the shared loss
    # 
    # Get 
    USE_GPU = 1
    trainer = Trainer(
        model=model.cuda(),
        optimizer=optimizer,
        iterator=iterator,
        validation_iterator = val_iterator, 
        train_dataset=train_ds,
        validation_dataset = val_ds, 
        validation_metric = "+coref_f1",
        cuda_device=0 if USE_GPU else -1,
        serialization_dir= directory + "saved_models/only_lee",
        num_epochs=epochs,
    )    

    metrics = trainer.train()
    # save the model
    with open(directory + "saved_models/current_run_model_state", 'wb') as f:
        torch.save(model.state_dict(), f)
def multitask_learning():
    # load datasetreader 
    # Save logging to a local file
    # Multitasking
    log.getLogger().addHandler(log.FileHandler(directory+"/log.log"))

    lr = 0.00001
    batch_size = 2
    epochs = 10 
    max_seq_len = 512
    max_span_width = 30

    #import pdb
    #pdb.set_trace()    

    #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,)
    #token_indexer = PretrainedBertIndexer("bert-base-cased", do_lowercase=False)
    from allennlp.data.token_indexers.elmo_indexer import ELMoTokenCharactersIndexer
    # the token indexer is responsible for mapping tokens to integers
    token_indexer = ELMoTokenCharactersIndexer()
    
    def tokenizer(x: str):
        return [w.text for w in SpacyWordSplitter(language='en_core_web_sm', pos_tags=False).split_words(x)[:max_seq_len]]


    #conll_reader = ConllCorefBertReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer}) 
    conll_reader = ConllCorefReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer})
    swag_reader = SWAGDatasetReader(tokenizer=tokenizer, token_indexers = token_indexer)
    EMBEDDING_DIM = 1024
    HIDDEN_DIM = 200
    conll_datasets, swag_datasets = load_datasets(conll_reader, swag_reader, directory)
    conll_vocab = Vocabulary()
    conll_iterator = BasicIterator(batch_size=batch_size)
    conll_iterator.index_with(conll_vocab)

    swag_vocab = Vocabulary()
    swag_iterator = BasicIterator(batch_size=batch_size)
    swag_iterator.index_with(swag_vocab)

    from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
    from allennlp.modules.token_embedders import ElmoTokenEmbedder

    #bert_embedder = PretrainedBertEmbedder(pretrained_model="bert-base-cased",top_layer_only=True, requires_grad=True)

    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'
 
    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embedding = BasicTextFieldEmbedder({"tokens": elmo_embedder})#, allow_unmatched_keys=True)

    #word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder}, allow_unmatched_keys=True)
    #BERT_DIM = word_embedding.get_output_dim()
    ELMO_DIM = word_embedding.get_output_dim()

    seq2seq = PytorchSeq2SeqWrapper(torch.nn.LSTM(ELMO_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True))
    seq2vec = PytorchSeq2VecWrapper(torch.nn.LSTM(ELMO_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True))
    mention_feedforward = FeedForward(input_dim = 2336, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())
    antecedent_feedforward = FeedForward(input_dim = 7776, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())
    model1 = CoreferenceResolver(vocab=conll_vocab, text_field_embedder=word_embedding,context_layer= seq2seq, mention_feedforward=mention_feedforward,antecedent_feedforward=antecedent_feedforward , feature_size=768,max_span_width=max_span_width,spans_per_word=0.4,max_antecedents=250,lexical_dropout= 0.2)

    model2 = SWAGExampleModel(vocab=swag_vocab, text_field_embedder=word_embedding, phrase_encoder=seq2vec)
    optimizer1 = optim.Adam(model1.parameters(), lr=lr)
    optimizer2 = optim.Adam(model2.parameters(), lr=lr)

    swag_train_iterator = swag_iterator(swag_datasets[0], num_epochs=1, shuffle=True)
    conll_train_iterator = conll_iterator(conll_datasets[0], num_epochs=1, shuffle=True)
    swag_val_iterator = swag_iterator(swag_datasets[1], num_epochs=1, shuffle=True)
    conll_val_iterator:q = conll_iterator(conll_datasets[1], num_epochs=1, shuffle=True)
    task_infos = {"swag": {"model": model2, "optimizer": optimizer2, "loss": 0.0, "iterator": swag_iterator, "train_data": swag_datasets[0], "val_data": swag_datasets[1], "num_train": len(swag_datasets[0]), "num_val": len(swag_datasets[1]), "lr": lr, "score": {"accuracy":0.0}}, \
                    "conll": {"model": model1, "iterator": conll_iterator, "loss": 0.0, "val_data": conll_datasets[1], "train_data": conll_datasets[0], "optimizer": optimizer1, "num_train": len(conll_datasets[0]), "num_val": len(conll_datasets[1]),"lr": lr, "score": {"coref_prediction": 0.0, "coref_recall": 0.0, "coref_f1": 0.0,"mention_recall": 0.0}}}
    USE_GPU = 1
    trainer = MultiTaskTrainer(
        task_infos=task_infos, 
        num_epochs=epochs,
        serialization_dir=directory + "saved_models/multitask/"
    ) 
    metrics = trainer.train()
Example #32
0
    def embeddings_returner(self, vocab=None):
        '''
        Either the name of the pretrained model to use (e.g. bert-base-uncased),or the path to the .tar.gz
        file with the model weights.
        :param args: vocab_size and vocab is needed only when pretrained embeddings is used.
        :return: embedder
        '''
        '''
        "bert-base-uncased", do_lower_case=True
        "bert-base-cased" , do_lower_case=False
        https://github.com/huggingface/pytorch-transformers/issues/712
        https://qiita.com/uedake722/items/b7f4b75b4d77d9bd358b
        '''
        if self.embedding_strategy == 'bert':
            self.bertmodel_dir = ''
            if self.ifbert_use_whichmodel == 'general':
                self.bertmodel_dir += 'bert-base-uncased/'  # recomendded ver is uncased, in original repository
                self.bertmodel_relative_dirpath = self.bert_src_dir + self.bertmodel_dir

                # included in pytorch_transformers, so we replace it with model name itself
                self.bert_weight_filepath = copy.copy('bert-base-uncased')

            elif self.ifbert_use_whichmodel == 'scibert':
                self.bertmodel_dir += 'scibert_scivocab_uncased/'  # recomendded ver is uncased, in original repository
                self.bertmodel_relative_dirpath = self.bert_src_dir + self.bertmodel_dir
                self.bert_weight_filepath = self.bertmodel_relative_dirpath + 'weights.tar.gz'

            elif self.ifbert_use_whichmodel == 'biobert':
                self.bertmodel_dir += 'biobert_v1.1_pubmed/'  # currently cased version only supported
                self.bertmodel_relative_dirpath = self.bert_src_dir + self.bertmodel_dir
                self.bert_weight_filepath = self.bertmodel_relative_dirpath + 'weights.tar.gz'  # including bert_config.json and bin.

            # Load embedder
            bert_embedder = PretrainedBertEmbedder(
                pretrained_model=self.bert_weight_filepath,
                top_layer_only=self.bert_top_layer_only,
                requires_grad=self.emb_requires_grad)
            return bert_embedder, bert_embedder.get_output_dim(
            ), BasicTextFieldEmbedder({'tokens': bert_embedder},
                                      allow_unmatched_keys=True)

        elif self.embedding_strategy == 'elmo':
            if self.ifelmo_use_whichmodel == 'general':
                options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json'
                weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5'
            elif self.ifelmo_use_whichmodel == 'pubmed':
                options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/contributed/pubmed/elmo_2x4096_512_2048cnn_2xhighway_options.json'
                weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/contributed/pubmed/elmo_2x4096_512_2048cnn_2xhighway_weights_PubMed_only.hdf5'
            elif self.ifelmo_use_whichmodel == 'bioelmo':
                options_file = self.elmo_src_dir + 'BioELMo/weights/biomed_elmo_options.json'
                weight_file = self.elmo_src_dir + 'BioELMo/weights/biomed_elmo_weights.hdf5'
            else:
                options_file = -1
                weight_file = -1
            assert options_file != -1
            elmo_embedder = ElmoTokenEmbedder(
                options_file=options_file,
                weight_file=weight_file,
                requires_grad=self.emb_requires_grad)
            return elmo_embedder, elmo_embedder.get_output_dim(
            ), BasicTextFieldEmbedder({'tokens': elmo_embedder})

        elif self.embedding_strategy == 'pretrained':

            print('\nGloVe pretrained vocab loading\n')

            if 'glove' in self.args.ifpretrained_use_whichmodel:
                embedding_dim = 300
            else:
                embedding_dim = 200

            pretrain_emb_embedder = Embedding.from_params(
                vocab=vocab,
                params=Params({
                    'pretrained_file': self.glove_embeddings_file,
                    'embedding_dim': embedding_dim,
                    'trainable': False,
                    'padding_index': 0
                }))

            return pretrain_emb_embedder, pretrain_emb_embedder.get_output_dim(
            ), BasicTextFieldEmbedder({'tokens': pretrain_emb_embedder})