def __init__(self, model_name, num_labels: int, clf_dropout=0.2):
     super().__init__()
     self.transformer = GPT2Model.from_pretrained(model_name)
     self.dropout = nn.Dropout(clf_dropout)
     self.linear = nn.Linear(self.transformer.config.n_embd * 2, num_labels)
     nn.init.normal_(self.linear.weight, std=0.02)
     nn.init.normal_(self.linear.bias, 0)
Beispiel #2
0
def extract_gpt2_hidden_activations(
        text_path, save_activs_to):  #, mode="full_model", focus_layers=[]):
    # read in text samples to pass through single layer of gpt2 model
    text_inputs = []
    with open(text_path, "rb") as infile:
        text_inputs = pickle.load(infile)

    # num_inputs = len(text_inputs)

    # Load pre-trained model tokenizer (vocabulary)
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

    # get the hidden activations - assumes a gpu is available
    layer_activs = []
    for text in text_inputs:
        # tokenize text
        indexed_tokens = tokenizer.encode(text)
        tokens_tensor = torch.tensor([indexed_tokens]).to('cuda')
        # set up model
        model = GPT2Model.from_pretrained('gpt2')
        model.eval()
        model.to('cuda')

        # grab the hidden activations and save them to layer_actives
        with torch.no_grad():
            hidden, _ = model(tokens_tensor)
            layer_activs.append(hidden.cpu().numpy().squeeze())

        # clear gpu memory in preparation for next text sample
        torch.cuda.empty_cache()

    # save layer dimensions
    with open(save_activs_to, "wb") as outfile:
        pickle.dump(layer_activs, outfile)
    pass
def download_model(name):
    if not name in MODELS:
        raise Exception(str(name) + ' not a model in the list')
    if not exists(PATH):
        print("# ", str(PATH), "not found, creating dir.")
        mkdir(PATH)
    print('# Downloading model: ' + str(name))
    name_path = MODEL_PATH_DICT[name]
    if name == 'word2vec':
        if not exists(join(PATH, name_path)):
            wget.download(
                'https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz'
            )
            shutil.move(name_path, join(PATH, name_path))
            print('# Downloaded word2vec')
        else:
            print('# Already downloaded')
    if name == 'glove':
        if not exists(join(PATH, name_path)):
            wget.download(
                'http://nlp.stanford.edu/data/wordvecs/glove.840B.300d.zip')
            zip = zipfile.ZipFile('./glove.840B.300d.zip')
            zip.extractall()
            _ = glove2word2vec('./glove.840B.300d.txt', join(PATH, name_path))
            print('# Downloaded glove')
        else:
            print('# Already downloaded')
    if name == 'dict2vec':
        if not exists(join(PATH, name_path)):
            wget.download(
                'https://dict2vec.s3.amazonaws.com/dict2vec300.tar.bz2')
            tar = tarfile.open("dict2vec300.tar.bz2")
            tar.extractall()
            tar.close()
            shutil.move(name_path, join(PATH, name_path))
            print('# Downloaded dict2vec')
        else:
            print('# Already downloaded')

    if name == 'conceptnet':
        if not exists(join(PATH, name_path)):
            wget.download(
                'https://conceptnet.s3.amazonaws.com/downloads/2019/numberbatch/numberbatch-en-19.08.txt.gz'
            )
            shutil.move(name_path, join(PATH, name_path))
            print('# Downloaded Conceptnet Numberbatch')
        else:
            print('# Already downloaded')
    if name == 'bert' or name == 'bert-context':
        _ = BertTokenizer.from_pretrained('bert-large-uncased')
        _ = BertModel.from_pretrained(
            'bert-large-uncased').embeddings.word_embeddings.weight.data.numpy(
            )
        print('# Downloaded bert')
    if name == 'gpt2' or name == 'gpt2-context':
        _ = GPT2Tokenizer.from_pretrained('gpt2')
        _ = GPT2LMHeadModel.from_pretrained('gpt2')
        _ = GPT2Model.from_pretrained('gpt2')
        print('# Downloaded gpt-2')
Beispiel #4
0
 def construct_encoder(self):
     model = GPT2Model.from_pretrained(self.model_name)
     model.cuda()
     model = torch.nn.DataParallel(model)
     model.eval()
     tokenizer = GPT2Tokenizer.from_pretrained(self.model_name)
     print("Model and tokenzier are constructed!")
     return model, tokenizer
Beispiel #5
0
    def __init__(self, config, clf_dropout=0.4, n_class=8):
        super(GPT2ClassificationHeadModel, self).__init__(config)
        self.transformer = GPT2Model(config)
        self.dropout = nn.Dropout(clf_dropout)
        self.linear = nn.Linear(config.n_embd * 3, n_class)

        nn.init.normal_(self.linear.weight, std=0.02)
        nn.init.normal_(self.linear.bias, 0)

        self.apply(self.init_weights)
 def create_gpt2_model(self, config, input_ids, token_type_ids, position_ids,
                         mc_labels, lm_labels, mc_token_ids):
     model = GPT2Model(config)
     model.eval()
     hidden_states, presents = model(input_ids, position_ids, token_type_ids)
     outputs = {
         "hidden_states": hidden_states,
         "presents": presents,
     }
     return outputs
 def __init__(self, pretrain_path, dropout=0.1):
     super(Gpt2Model, self).__init__()
     self.bert = GPT2Model.from_pretrained(pretrain_path)
     self.dropout = nn.Dropout(dropout)
     self.aux_head = nn.Sequential(
         OrderedDict([
             ('dropout', nn.Dropout(dropout)),
             ('clf', nn.Linear(self.bert.config.n_embd, 6)),
         ]))
     self.main_head = nn.Sequential(
         OrderedDict([('dropout', nn.Dropout(dropout)),
                      ('clf', nn.Linear(self.bert.config.n_embd, 1))]))
 def __init__(self,
              config,
              cls_id,
              clf_dropout=0.4,
              n_class=8,
              head_start_layer=0):
     super(GPT2ClassificationHeadModel, self).__init__(config)
     self.transformer = GPT2Model(config)
     self.apply(self.init_weights)
     self.head = CustomHead(config,
                            n_class,
                            dropout=clf_dropout,
                            start_layer=head_start_layer)
     self.cls_id = cls_id
Beispiel #9
0
 def from_opt(cls, opt):
     if 'pretrained' not in opt:
         return cls(opt['n_vocab'], d_word_vec=opt['d_word_vec'], d_model=opt['d_model'], len_max_seq=opt['len_max_seq'],
                    n_layer=opt['n_layer'], d_inner=opt['d_inner'], n_head=opt['n_head'], slf_attn=opt['slf_attn'],
                    d_k=opt['d_k'], d_v=opt['d_v'], feat_vocab=opt['feat_vocab'], d_feat_vec=opt['d_feat_vec'], 
                    layer_attn=opt['layer_attn'], slf_attn_mask=opt['mask_slf_attn'],
                    dropout=opt['dropout'], attn_dropout=opt['attn_dropout'])
     elif opt['pretrained'].count('bert'):
         pretrained = BertModel.from_pretrained(opt['pretrained'])
         return cls(opt['n_vocab'], pretrained=pretrained, layer_attn=opt['layer_attn'], model_name='bert')
     elif opt['pretrained'].count('gpt2'):
         pretrained = GPT2Model.from_pretrained(opt['pretrained'])
         return cls(opt['n_vocab'], pretrained=pretrained, model_name='gpt2')
     else:
         raise ValueError("Other pretrained models haven't been supported yet")
    def __init__(self, cuda_device=-1):
        super(GPT2Embedder, self).__init__()

        self.cuda_device = 'cpu' if cuda_device == -1 else f'cuda:{cuda_device}'

        # Load pre-trained model tokenizer (vocabulary)
        self.enc = GPT2Tokenizer.from_pretrained('gpt2')
        # Load pre-trained model (weights)
        self.model = GPT2Model.from_pretrained('gpt2')

        self.model.to(self.cuda_device)
        self.model.eval(
        )  # we only use the evaluation mode of the pretrained model

        self._bos_id = self.enc.encoder['<|endoftext|>']
        self._bos_past = None
Beispiel #11
0
 def __init__(self, config):
     super(GPT2NeuralNet, self).__init__(config)
     self.gpt2 = GPT2Model(config)
     self.dropout = nn.Dropout(0.3)
     dense_size = config.n_embd * 2
     # 全连接层
     self.linear1 = nn.Linear(config.n_embd * 2, dense_size)
     self.linear2 = nn.Linear(config.n_embd * 2, dense_size)
     self.linear_gate = nn.Linear(config.n_embd * 2 + dense_size,
                                  config.n_embd * 2)
     # 输出层
     self.linear_out = nn.Linear(dense_size, 1)
     self.linear_aux_out = nn.Linear(dense_size, 5)
     self.linear_identity_out = nn.Linear(dense_size, 9)
     self.linear_np_out = nn.Linear(dense_size, 4)
     self.linear_identity_hidden = nn.Linear(config.n_embd * 2, dense_size)
     self.apply(self.init_weights)
def fetch_objects():
    bert = BertModel.from_pretrained(
        'bert-base-uncased').embeddings.position_embeddings.weight.data
    gpt = OpenAIGPTModel.from_pretrained(
        'openai-gpt').positions_embed.weight.data
    gpt2 = GPT2Model.from_pretrained('gpt2').wpe.weight.data
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    gpt_tokenizer = OpenAIGPTTokenizer.from_pretrained('openai-gpt')
    gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    return {
        'bert': bert,
        'gpt': gpt,
        'gpt2': gpt2
    }, {
        'bert': bert_tokenizer,
        'gpt': gpt_tokenizer,
        'gpt2': gpt2_tokenizer
    }
Beispiel #13
0
    def transform(self, X):
        # Load pre-trained model tokenizer (vocabulary)
        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        # Load pre-trained model (weights)
        model = GPT2Model.from_pretrained('gpt2', cache_dir='tmp/gpt2/')
        model.eval()

        output = []
        for idx, row in tqdm(X.iterrows(), total=len(X)):
            # Encode some inputs
            indexed_tokens_1 = tokenizer.encode(row.text)

            # If you have a GPU, put everything on cuda
            # Convert inputs to PyTorch tensors
            tokens_tensor_1 = torch.tensor([indexed_tokens_1])
            tokens_tensor_1 = tokens_tensor_1.to('cuda')
            model.to('cuda')

            # Predict hidden states features for each layer
            with torch.no_grad():
                hidden_states_1, past = model(tokens_tensor_1)

            tokens = [
                tokenizer.decoder[token].replace('Ġ', '')
                for token in indexed_tokens_1
            ]
            output.append([tokens, hidden_states_1.cpu()[0]])

        output = pd.DataFrame(output, columns=['tokens', 'layer_-1'])
        res = []
        for idx, row in X.iterrows():
            res.append(self.get_sample_props(output.loc[idx], **row)[1:])

        res = pd.DataFrame(res,
                           columns=[
                               'tokens', 'pronoun_offset_token',
                               'a_offset_token', 'b_offset_token', 'a_span',
                               'b_span', 'pronoun_token', 'a_tokens',
                               'b_tokens', 'bert', 'cls'
                           ])

        cols = set(X.columns).difference(res.columns)
        return {'X': pd.concat([X[cols], res], axis=1)}
Beispiel #14
0
def extract_gpt2_hidden_word_representations(
        word, save_activs_to):  #, mode="full_model", focus_layers=[]):
    # text_inputs = []
    # with open(text_path, "rb") as infile:
    #     text_inputs = pickle.load(infile)

    # num_inputs = len(text_inputs)

    # Load pre-trained model tokenizer (vocabulary)
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

    # get the hidden activations for word -- assumes gpu is available
    word_vec = None  # initialize word vector object

    # tokenize word
    indexed_tokens = tokenizer.encode(word)
    num_tokens = len(indexed_tokens)
    tokens_tensor = torch.tensor([indexed_tokens]).to('cuda')
    # set up model
    model = GPT2Model.from_pretrained('gpt2')
    model.eval()
    model.to('cuda')

    # get word_vec
    with torch.no_grad():
        # get token-wise activations
        hidden, _ = model(tokens_tensor)
        hidden_np = hidden.cpu().numpy()

        # identify hidden layer dimension that represents different tokens
        # seq_dim = hidden_np.shape.index(num_tokens)
        seq_dim = 1  # we know that the dimension corresponding to tokens is the 2nd dimension, indexed by 1

        # sum the hidden layer element-wise along the token dimension to get word vector representation
        word_vec = np.sum(hidden_np, axis=seq_dim).squeeze()

    # clear gpu memory
    torch.cuda.empty_cache()

    # save word vector
    with open(save_activs_to, "wb") as outfile:
        pickle.dump(word_vec, outfile)
    pass
def load_model_fromlist(name):
    if not name in MODELS:
        raise Exception(str(name) + ' not a model in the list')
    print('# Loading model: ' + str(name))
    name_path = MODEL_PATH_DICT[name]
    if name == 'word2vec':
        if not exists(join(PATH, name_path)): download_model(name)
        return (gensim.models.KeyedVectors.load_word2vec_format(join(
            PATH, name_path),
                                                                binary=True))
    if name == 'glove':
        if not exists(join(PATH, name_path)): download_model(name)
        return (gensim.models.KeyedVectors.load_word2vec_format(
            join(PATH, name_path)))
    if name == 'dict2vec':
        if not exists(join(PATH, name_path)): download_model(name)
        return (gensim.models.KeyedVectors.load_word2vec_format(
            join(PATH, name_path), binary=False, unicode_errors="ignore"))
    if name == 'conceptnet':
        if not exists(join(PATH, name_path)): download_model(name)
        return (gensim.models.KeyedVectors.load_word2vec_format(
            join(PATH, name_path)))
    if name == 'bert':
        tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
        model = BertModel.from_pretrained(
            'bert-large-uncased').embeddings.word_embeddings.weight.data.numpy(
            )
        return ([model, tokenizer])
    if name == 'bert-context':
        tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
        model = BertModel.from_pretrained('bert-large-uncased',
                                          output_hidden_states=True)
        return ([model, tokenizer])
    if name == 'gpt2':
        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        model = GPT2LMHeadModel.from_pretrained(
            'gpt2').transformer.wte.weight.data.numpy()
        return ([model, tokenizer])
    if name == 'gpt2-context':
        tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
        model = GPT2Model.from_pretrained('gpt2', output_hidden_states=True)
        return ([model, tokenizer])
Beispiel #16
0
    def __init__(self, model_name, add_dense=True, trainable=False):
        super().__init__()

        self.model_name = model_name
        self.add_dense = add_dense
        self.trainable = trainable

        if self.model_name == 'GPT':
            self.encoder = OpenAIGPTModel.from_pretrained('openai-gpt')
        elif self.model_name == 'GPT-2':
            self.encoder = GPT2Model.from_pretrained('gpt2')
        else:
            raise NotImplementedError(f'{self.model_name} -- No such model')

        if not self.trainable:
            for p in self.encoder.parameters():
                p.requires_grad = False

        if self.add_dense:
            self.dense = nn.Linear(in_features=768, out_features=128)
Beispiel #17
0
    def Get_GPT2_Representation(self, examples):
        for i, example in enumerate(examples):

            # example.gpt2_mat = np.zeros((pb.fgt_maxlength,768))
            # continue

            if (self.gpt2_tokenizer == None):
                self.gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

            text = example.fgt_channels[0]
            indexed_tokens = self.gpt2_tokenizer.encode(text)
            tokens_tensor = torch.tensor([indexed_tokens])

            if (self.gpt2 == None):
                self.gpt2 = GPT2Model.from_pretrained('gpt2')
                self.gpt2.eval()

            with torch.no_grad():
                hidden_states, past = self.gpt2(tokens_tensor)  # (1, 5, 768)
                shape = np.array(hidden_states).shape

                representation, sum = [], 0

                a, b = shape[1], shape[2]
                representation = np.zeros((a, b))

                for layer in hidden_states:
                    for words in layer.numpy():
                        representation += words
                        sum += 1
                if (sum > 0):
                    representation = representation * 1.0 / sum

                representation = list(representation)
                while (len(representation) < pb.fgt_maxlength):
                    representation.append(np.zeros(b))

                example.gpt2_mat = representation[0:pb.fgt_maxlength]

            print("{:.2%}".format(i * 1.0 / len(examples)))
 def test_model_from_pretrained(self):
     cache_dir = "/tmp/pytorch_pretrained_bert_test/"
     for model_name in list(PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
         model = GPT2Model.from_pretrained(model_name, cache_dir=cache_dir)
         shutil.rmtree(cache_dir)
         self.assertIsNotNone(model)
Beispiel #19
0
 def _init_model_tokenizer(self):
     from pytorch_pretrained_bert import GPT2Model, GPT2Tokenizer
     self._tokenizer = GPT2Tokenizer.from_pretrained(self.model_dir)
     self._model = GPT2Model.from_pretrained(self.model_dir)
     self._model.eval()
Beispiel #20
0
    def __init__(self,
                 n_layers,
                 in_size,
                 out_size,
                 embed_size,
                 in_size_hier,
                 hidden_size,
                 proj_size,
                 dropout=0.5,
                 initialEmbW=None,
                 independent=False,
                 rnn_type='lstm',
                 classifier='baseline',
                 states_att=False,
                 state_size=-1,
                 embedding_init=None,
                 weights_init=None,
                 elmo_init=False,
                 elmo_num_outputs=1,
                 finetune_elmo=False,
                 bert_init=False,
                 bert_model=None,
                 finetune_bert=False,
                 add_word_emb=True,
                 pretrained_all=True):
        """Initialize encoder with structure parameters

        Args:
            n_layers (int): Number of layers.
            in_size (int): Dimensionality of input vectors.
            out_size (int): Dimensionality of output vectors.
            embed_size (int): Dimensionality of word embedding.
            hidden_size (int) : Dimensionality of hidden vectors.
            proj_size (int) : Dimensionality of projection before softmax.
            dropout (float): Dropout ratio.
        """
        #TODO
        att_size = 128
        self.rnn_type = rnn_type
        self.classifier = classifier
        super(HLSTMDecoder, self).__init__()
        self.embed = nn.Embedding(in_size, embed_size)
        if embedding_init is not None:
            self.embed.weight.data.copy_(torch.from_numpy(embedding_init))
        elif weights_init is not None:
            self.embed.weight.data.copy_(
                torch.from_numpy(weights_init['embed']))
        if rnn_type == 'lstm':
            self.lstm = nn.LSTM(embed_size + in_size_hier,
                                hidden_size,
                                n_layers,
                                batch_first=True,
                                dropout=dropout)
        elif rnn_type == 'gru':
            self.lstm = nn.GRU(embed_size + in_size_hier,
                               hidden_size,
                               n_layers,
                               batch_first=True,
                               dropout=dropout)
        if weights_init is not None:
            lstm_wt = weights_init['lstm']
            for k, v in lstm_wt.items():
                self.lstm.__getattr__(k).data.copy_(torch.from_numpy(v))

        self.elmo_init = elmo_init
        self.bert_init = bert_init
        self.pretrained_all = pretrained_all
        self.bert_model = bert_model
        self.add_word_emb = add_word_emb
        if False:
            #if pretrained_all and elmo_init:
            options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file,
                             weight_file,
                             elmo_num_outputs,
                             requires_grad=finetune_elmo)
            elmo_layer = [
                nn.Linear(elmo_num_outputs * 1024, out_size),
                nn.ReLU()
            ]
            self.elmo_layer = nn.Sequential(*elmo_layer)
        elif False:
            #elif pretrained_all and bert_init:
            if 'bert' in bert_model:
                self.bert = BertModel.from_pretrained(bert_model)
            elif 'openai-gpt' in bert_model:
                self.bert = OpenAIGPTModel.from_pretrained(bert_model)
            elif 'gpt2' in bert_model:
                self.bert = GPT2Model.from_pretrained(bert_model)
            elif 'transfo-xl' in bert_model:
                self.bert = TransfoXLModel.from_pretrained(bert_model)
            self.finetune_bert = finetune_bert
            if not finetune_bert:
                for param in self.bert.parameters():
                    param.requires_grad = False
            if bert_model in ['bert-base-uncased', 'openai-gpt', 'gpt2']:
                bert_in = 768
            elif bert_model in [
                    'bert-large-uncased', 'gpt2-medium', 'transfo-xl-wt103'
            ]:
                bert_in = 1024
            bert_layer = [nn.Linear(bert_in, out_size), nn.ReLU()]
            self.bert_layer = nn.Sequential(*bert_layer)

        self.n_layers = n_layers
        self.dropout = dropout
        self.independent = independent
        self.states_att = states_att
        if states_att:
            self.ecW = nn.Linear(state_size, att_size)
            self.ysW = nn.Linear(hidden_size, att_size)
            hidden_size += state_size

        if classifier == 'baseline':
            layers = [
                nn.Linear(hidden_size, proj_size),
                nn.Linear(proj_size, out_size)
            ]
            self.y_classifier = nn.Sequential(*layers)
        elif classifier == 'weighted_norm':
            layers = [
                weight_norm(nn.Linear(hidden_size, proj_size), dim=None),
                nn.ReLU(),
                weight_norm(nn.Linear(proj_size, out_size), dim=None)
            ]
            self.y_classifier = nn.Sequential(*layers)
        elif classifier == 'logit':
            layers = [
                weight_norm(nn.Linear(hidden_size, proj_size), dim=None),
                nn.ReLU(),
                nn.Linear(proj_size, out_size)
            ]
            self.classifier_txt = nn.Sequential(*layers)
            layers = [
                weight_norm(nn.Linear(hidden_size, 2048), dim=None),
                nn.ReLU(),
                nn.Linear(2048, out_size)
            ]
            self.classifier_ft = nn.Sequential(*layers)
            if weights_init is not None:
                self.classifier_txt[0].weight.data.copy_(
                    torch.from_numpy(weights_init['classifier_txt']))
                self.classifier_ft[0].weight.data.copy_(
                    torch.from_numpy(weights_init['classifier_ft']))
print(tokenizer.decode(indexed_tokens_1))  # Who was Jim Henson ?
print(tokenizer.decode([8727, 373, 474, 320]))  # who was jim
print(tokenizer.decode([30963, 1559, 5633]))  #  henson ?
print(tokenizer.decode([30963]))  #  hen
print(tokenizer.decode([508, 8727, 373]))  # whowho was
indexed_tokens_1 = tokenizer.convert_tokens_to_ids(text_1)  # AttributeError: 'GPT2Tokenizer' object has no attribute 'convert_tokens_to_ids'
indexed_tokens_2 = tokenizer.encode(text_2); print(indexed_tokens_2)  # [18050, 367, 19069, 373, 257, 13595, 14471, 263]

## Convert inputs to PyTorch tensors
tokens_tensor_1 = torch.tensor([indexed_tokens_1]); print(tokens_tensor_1)
tokens_tensor_2 = torch.tensor([indexed_tokens_2])

##################################################################
## GPT2Model
## Load pre-trained model (weights)
model = GPT2Model.from_pretrained('/Users/coder352/datasets/WordVec/pytorch_pretrained_bert/gpt2/')
model.eval()
# INFO:pytorch_pretrained_bert.modeling_gpt2:Model config {
#   "initializer_range": 0.02,
#   "layer_norm_epsilon": 1e-05,
#   "n_ctx": 1024,
#   "n_embd": 768,
#   "n_head": 12,
#   "n_layer": 12,
#   "n_positions": 1024,
#   "vocab_size": 50257
# }

## Predict hidden states features for each layer
with torch.no_grad():
    hidden_states_1, past = model(tokens_tensor_1)
Beispiel #22
0
                    output = output.reshape(output.shape[0], -1,
                                            output.shape[1])

                output = np.swapaxes(output, 0, 1)
                list_output.append(output)

                # ====== Construct Cache ====== #
                temp_cache = {}
                for i, sent in enumerate(mini_batch):
                    hask_key = hashlib.sha256(sent.encode()).hexdigest()
                    temp_cache[hask_key] = output[i]
                self.cache.update(temp_cache)

                idx += mini_batch_size
                self.count += mini_batch_size
            output = np.concatenate(list_output, 0)
            te = time.time()
            print('encoding with model', len(sentences), 'processed',
                  self.count, 'took', '{:4.1f}'.format(te - ts))

        te = time.time()
        embedding = self.get_multi_head_embedding(output, heads, head_size)
        return embedding


if __name__ == '__main__':
    model = GPT2Model('bert-base-uncased')
    model.prepare('Length')

    a.construct_encoder()
Beispiel #23
0
    def __init__(self,
                 n_wlayers,
                 n_slayers,
                 in_size,
                 out_size,
                 embed_size,
                 hidden_size,
                 dropout=0.5,
                 ignore_label=None,
                 initialEmbW=None,
                 independent=False,
                 rnn_type='lstm',
                 embedding_init=None,
                 weights_init=None,
                 elmo_init=False,
                 elmo_num_outputs=1,
                 finetune_elmo=False,
                 bert_init=False,
                 bert_model=None,
                 finetune_bert=False,
                 add_word_emb=True,
                 pretrained_all=True,
                 concat_his=False):
        """Initialize encoder with structure parameters
        Args:
            n_layers (int): Number of layers.
            in_size (int): Dimensionality of input vectors.
            out_size (int) : Dimensionality of hidden vectors to be output.
            embed_size (int): Dimensionality of word embedding.
            dropout (float): Dropout ratio.
        """

        super(HLSTMEncoder, self).__init__()
        self.embed = nn.Embedding(in_size, embed_size)
        if embedding_init is not None:
            self.embed.weight.data.copy_(torch.from_numpy(embedding_init))
        elif weights_init is not None:
            self.embed.weight.data.copy_(
                torch.from_numpy(weights_init['embed']))
        if rnn_type == 'lstm':
            self.wlstm = nn.LSTM(embed_size,
                                 hidden_size,
                                 n_wlayers,
                                 batch_first=True,
                                 dropout=dropout)
            self.slstm = nn.LSTM(hidden_size,
                                 out_size,
                                 n_slayers,
                                 batch_first=True,
                                 dropout=dropout)
        elif rnn_type == 'gru':
            self.wlstm = nn.GRU(embed_size,
                                hidden_size,
                                n_wlayers,
                                batch_first=True,
                                dropout=dropout)
            self.slstm = nn.GRU(hidden_size,
                                out_size,
                                n_slayers,
                                batch_first=True,
                                dropout=dropout)
        self.elmo_init = elmo_init
        self.bert_init = bert_init
        self.pretrained_all = pretrained_all
        self.concat_his = concat_his
        self.bert_model = bert_model
        self.add_word_emb = add_word_emb
        if pretrained_all and elmo_init:
            options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file,
                             weight_file,
                             elmo_num_outputs,
                             requires_grad=finetune_elmo)
            elmo_layer = [
                nn.Linear(elmo_num_outputs * 1024, out_size),
                nn.ReLU()
            ]
            self.elmo_layer = nn.Sequential(*elmo_layer)
        elif pretrained_all and bert_init:
            if 'bert' in bert_model:
                self.bert = BertModel.from_pretrained(bert_model)
            elif 'openai-gpt' in bert_model:
                self.bert = OpenAIGPTModel.from_pretrained(bert_model)
            elif 'gpt2' in bert_model:
                self.bert = GPT2Model.from_pretrained(bert_model)
            elif 'transfo-xl' in bert_model:
                self.bert = TransfoXLModel.from_pretrained(bert_model)
            self.finetune_bert = finetune_bert
            if not finetune_bert:
                for param in self.bert.parameters():
                    param.requires_grad = False
            if bert_model in ['bert-base-uncased', 'openai-gpt', 'gpt2']:
                bert_in = 768
            elif bert_model in [
                    'bert-large-uncased', 'gpt2-medium', 'transfo-xl-wt103'
            ]:
                bert_in = 1024
            bert_layer = [nn.Linear(bert_in, out_size), nn.ReLU()]
            self.bert_layer = nn.Sequential(*bert_layer)

        self.independent = independent
        self.rnn_type = rnn_type
Beispiel #24
0
    def __init__(self,
                 n_layers,
                 in_size,
                 out_size,
                 embed_size,
                 dropout=0.5,
                 initialEmbW=None,
                 rnn_type='lstm',
                 attention=None,
                 q_size=-1,
                 embedding_init=None,
                 weights_init=None,
                 elmo_init=False,
                 elmo_num_outputs=1,
                 finetune_elmo=False,
                 bert_init=False,
                 bert_model=None,
                 finetune_bert=False,
                 add_word_emb=True):
        """Initialize encoder with structure parameters
        Args:
            n_layers (int): Number of layers.
            in_size (int): Dimensionality of input vectors.
            out_size (int) : Dimensionality of hidden vectors to be output.
            embed_size (int): Dimensionality of word embedding.
            dropout (float): Dropout ratio.
        """
        # TODO
        conv_out_size = 512

        super(LSTMEncoder, self).__init__()
        self.embed = nn.Embedding(in_size, embed_size)
        if embedding_init is not None:
            self.embed.weight.data.copy_(torch.from_numpy(embedding_init))
        elif weights_init is not None:
            self.embed.weight.data.copy_(
                torch.from_numpy(weights_init['embed']))
        self.elmo_init = elmo_init
        self.bert_init = bert_init
        self.bert_model = bert_model
        self.add_word_emb = add_word_emb
        if elmo_init:
            options_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file,
                             weight_file,
                             elmo_num_outputs,
                             requires_grad=finetune_elmo)
            elmo_layer = [
                nn.Linear(elmo_num_outputs * 1024, out_size),
                nn.ReLU()
            ]
            self.elmo_layer = nn.Sequential(*elmo_layer)
        elif bert_init:
            if 'bert' in bert_model:
                self.bert = BertModel.from_pretrained(bert_model)
            elif 'openai-gpt' in bert_model:
                self.bert = OpenAIGPTModel.from_pretrained(bert_model)
            elif 'gpt2' in bert_model:
                self.bert = GPT2Model.from_pretrained(bert_model)
            elif 'transfo-xl' in bert_model:
                self.bert = TransfoXLModel.from_pretrained(bert_model)
            self.finetune_bert = finetune_bert
            if not finetune_bert:
                for param in self.bert.parameters():
                    param.requires_grad = False
            if bert_model in ['bert-base-uncased', 'openai-gpt', 'gpt2']:
                bert_in = 768
            elif bert_model in [
                    'bert-large-uncased', 'gpt2-medium', 'transfo-xl-wt103'
            ]:
                bert_in = 1024
            bert_layer = [nn.Linear(bert_in, out_size), nn.ReLU()]
            self.bert_layer = nn.Sequential(*bert_layer)
        if rnn_type == 'lstm':
            self.lstm = nn.LSTM(embed_size,
                                out_size,
                                n_layers,
                                batch_first=True,
                                dropout=dropout)
        elif rnn_type == 'gru':
            self.lstm = nn.GRU(embed_size,
                               out_size,
                               n_layers,
                               batch_first=True,
                               dropout=dropout)
        self.attention = attention
        if attention == 'conv' or attention == 'conv_sum':
            conv_in_size = out_size
            self.conv1 = nn.Conv1d(in_channels=conv_in_size,
                                   out_channels=conv_out_size,
                                   kernel_size=1,
                                   padding=0)
            self.conv2 = nn.Conv1d(in_channels=conv_out_size,
                                   out_channels=2,
                                   kernel_size=1,
                                   padding=0)
            if weights_init is not None:
                self.conv1.weight.data.copy_(
                    torch.from_numpy(weights_init['conv1']))
                self.conv2.weight.data.copy_(
                    torch.from_numpy(weights_init['conv2']))
        elif attention == 'c_conv_sum':
            hidden_size = 512
            conv_hidden_size = 256
            layers = [
                weight_norm(nn.Linear(out_size, hidden_size), dim=None),
                nn.ReLU()
            ]
            self.c_fa = nn.Sequential(*layers)
            layers = [
                weight_norm(nn.Linear(q_size, hidden_size), dim=None),
                nn.ReLU()
            ]
            self.q_fa = nn.Sequential(*layers)
            layers = [
                nn.Conv2d(in_channels=hidden_size,
                          out_channels=conv_hidden_size,
                          kernel_size=1),
                nn.ReLU(),
                nn.Conv2d(in_channels=conv_hidden_size,
                          out_channels=1,
                          kernel_size=1)
            ]
            self.cq_att = nn.Sequential(*layers)
            if weights_init is not None:
                self.c_fa[0].weight.data.copy_(
                    torch.from_numpy(weights_init['c_fa']))
                self.q_fa[0].weight.data.copy_(
                    torch.from_numpy(weights_init['q_fa']))
                self.cq_att[0].weight.data.copy_(
                    torch.from_numpy(weights_init['cq_att_conv1']))
                self.cq_att[2].weight.data.copy_(
                    torch.from_numpy(weights_init['cq_att_conv2']))
def main():
    """Main training program."""

    print('Evaluate GPT2 model')

    # Disable CuDNN.
    torch.backends.cudnn.enabled = False

    # Timer.
    timers = Timers()

    # Arguments.
    args = get_args()

    # Pytorch distributed.
    initialize_distributed(args)

    # Random seeds for reproducability.
    set_random_seed(args.seed)

    # Data stuff.
    eval_data = get_eval_data(args)

    # Model, optimizer, and learning rate.
    if args.eval_hf:
        from pytorch_pretrained_bert import GPT2LMHeadModel
        from pytorch_pretrained_bert import GPT2Model as HFGPT2Model
        if args.num_layers == 24:
            model_path = args.load
            #model_path = '/home/universal-lm-data.cosmos549/repos/gpt2_mp/models/345M'
            hfmodel = HFGPT2Model.from_pretrained(model_path, cache_dir='gpt2_weights', from_tf=True).cuda()
            model = GPT2LMHeadModel(hfmodel.config)
            model.transformer.load_state_dict(hfmodel.state_dict())
            model.cuda()
        else:
            model = GPT2LMHeadModel.from_pretrained('gpt2', cache_dir='gpt2_weights').cuda()
    else:
        if args.load_openai:
            from utils import move_weights
            model_path = args.load
            args.load = None
            model = setup_model(args)
            from pytorch_pretrained_bert import GPT2LMHeadModel
            from pytorch_pretrained_bert import GPT2Model as HFGPT2Model

            model_path = 'gpt2'
            from_tf = False
            print('loading openai weights')
            model.cpu()
            if args.num_layers == 24:
                #model_path = '/home/universal-lm-data.cosmos549/repos/gpt2_mp/models/345M'
                hfmodel = HFGPT2Model.from_pretrained(model_path, cache_dir='gpt2_weights', from_tf=True)
                gpt2model = GPT2LMHeadModel(hfmodel.config)
                gpt2model.transformer.load_state_dict(hfmodel.state_dict())
                gpt2model
            else:
                gpt2model = GPT2LMHeadModel.from_pretrained('gpt2', cache_dir='gpt2_weights')
            model2fill = model
            while isinstance(model2fill, (DDP, FP16_Module)):
                model2fill = model2fill.module
            move_weights(model2fill, gpt2model)
            model.cuda()
        else:
            model = setup_model(args)

    # Run on test data.
    prefix = "wiki" #os.path.basename(args.valid_data)
    evaluate_and_print_results(prefix, eval_data,
                               model, args, timers)