예제 #1
0
    def test_encoder_layer(self):
        embed_dim, n_heads, dropout_rate = 512, 8, 0.5
        encoder_layer = EncoderLayer(embed_dim, embed_dim // 2, n_heads, dropout_rate)

        batch_size, max_seq_len = 5, 10
        x = torch.randn(batch_size, max_seq_len, embed_dim)
        assert encoder_layer(x).shape == x.shape
예제 #2
0
  def __init__(self, num_classes=3):
    """We have some of the best constructors in the world"""

    super(TransformerClassifier, self).__init__()

    tokenizer = CharBPETokenizer(
      '../Tokenize/thyme-tokenizer-vocab.json',
      '../Tokenize/thyme-tokenizer-merges.txt')
    vocab_size = tokenizer.get_vocab_size()

    self.embedding = nn.Embedding(
      num_embeddings=vocab_size,
      embedding_dim=cfg.getint('model', 'emb_dim'))

    self.position = PositionalEncoding(
      embedding_dim=cfg.getint('model', 'emb_dim'))

    trans_encoders = []
    for n in range(cfg.getint('model', 'num_layers')):
      trans_encoders.append(EncoderLayer(
        d_model=cfg.getint('model', 'emb_dim'),
        d_inner=cfg.getint('model', 'feedforw_dim'),
        n_head=cfg.getint('model', 'num_heads'),
        d_k=cfg.getint('model', 'emb_dim'),
        d_v=cfg.getint('model', 'emb_dim')))
    self.trans_encoders = nn.ModuleList(trans_encoders)

    self.dropout = nn.Dropout(cfg.getfloat('model', 'dropout'))

    self.linear = nn.Linear(
      in_features=cfg.getint('model', 'emb_dim'),
      out_features=num_classes)

    self.init_weights()
예제 #3
0
  def __init__(self, num_classes=2):
    """We have some of the best constructors in the world"""

    super(TransformerClassifier, self).__init__()

    self.embed = nn.Embedding(
      num_embeddings=cfg.getint('data', 'vocab_size'),
      embedding_dim=cfg.getint('model', 'emb_dim'))

    trans_encoders = []
    for n in range(cfg.getint('model', 'n_layers')):
      trans_encoders.append(EncoderLayer(
        d_model=cfg.getint('model', 'emb_dim'),
        d_inner=cfg.getint('model', 'feedforw_dim'),
        n_head=cfg.getint('model', 'n_heads'),
        d_k=cfg.getint('model', 'emb_dim'),
        d_v=cfg.getint('model', 'emb_dim')))
    self.trans_encoders = nn.ModuleList(trans_encoders)

    self.dropout = nn.Dropout(cfg.getfloat('model', 'dropout'))

    self.linear = nn.Linear(
      in_features=cfg.getint('model', 'emb_dim'),
      out_features=num_classes)

    self.init_weights()
예제 #4
0
 def __init__(self, vocab_size, embed_model=None, emb_size=100, hidden_size=128, \
              input_dropout_p=0, dropout_p=0, n_layers=1, bidirectional=False, \
              rnn_cell=None, rnn_cell_name='gru', variable_lengths=True,d_ff=2048,dropout=0.3,N=1):
     super(EncoderRNN, self).__init__(vocab_size, emb_size, hidden_size,
                                      input_dropout_p, dropout_p, n_layers,
                                      rnn_cell_name)
     self.variable_lengths = variable_lengths
     self.bidirectional = bidirectional
     if bidirectional:
         self.d_model = 2 * hidden_size
     else:
         self.d_model = hidden_size
     ff = PositionwiseFeedForward(self.d_model, d_ff, dropout)
     if embed_model is None:
         self.embedding = nn.Embedding(vocab_size, emb_size)
     else:
         self.embedding = embed_model
     if rnn_cell is None:
         self.rnn = self.rnn_cell(emb_size,
                                  hidden_size,
                                  n_layers,
                                  batch_first=True,
                                  bidirectional=bidirectional,
                                  dropout=dropout_p)
     else:
         self.rnn = rnn_cell
     self.group_attention = GroupAttention(8, self.d_model)
     self.onelayer = Encoder(
         EncoderLayer(self.d_model, deepcopy(self.group_attention),
                      deepcopy(ff), dropout), N)
예제 #5
0
    def __init__(self,
                 input_vocab_size,
                 output_vocab_size,
                 d_model,
                 d_inner,
                 n_layers,
                 n_head,
                 d_k,
                 d_v,
                 dropout,
                 max_len,
                 save_config=True):
        """Constructor"""

        super(TransformerEncoder, self).__init__()

        self.embed = nn.Embedding(num_embeddings=input_vocab_size,
                                  embedding_dim=d_model)

        trans_encoders = []
        for n in range(n_layers):
            trans_encoders.append(
                EncoderLayer(d_model=d_model,
                             d_inner=d_inner,
                             n_head=n_head,
                             d_k=d_k,
                             d_v=d_v))
        self.trans_encoders = nn.ModuleList(trans_encoders)

        self.dropout = nn.Dropout(dropout)

        self.classifier = nn.Linear(in_features=d_model,
                                    out_features=output_vocab_size)

        # save configuration for loading later
        if save_config:
            config = dict(input_vocab_size=input_vocab_size,
                          output_vocab_size=output_vocab_size,
                          d_model=d_model,
                          d_inner=d_inner,
                          n_layers=n_layers,
                          n_head=n_head,
                          d_k=d_k,
                          d_v=d_v,
                          dropout=dropout,
                          max_len=max_len)

            pickle_file = open(config_path, 'wb')
            pickle.dump(config, pickle_file)

        self.init_weights()
예제 #6
0
    def __init__(self, n_bins, ip_bin_size, hm, args):
        super(TransformerEnc, self).__init__()
        self.model_n_dim = args.bin_rnn_size

        self.attn = MultiAttn(args.num_heads, self.model_n_dim)
        self.ff = FeedForward(self.model_n_dim, args.dff, args.dropout)
        self.posit = Position(self.model_n_dim, args.dropout, n_bins)
        self.enc = EncoderLayer(self.model_n_dim, self.attn, self.ff,
                                args.dropout)

        self.transformer = TransfromerEncoder(args.num_t, self.enc)
        self.linear = nn.Linear(hm, self.model_n_dim)
        self.pooler = Pooler(args.bin_rnn_size)

        self.norm = None

        if args.norm is not None:
            self.norm = Norm(hm)
def make_model_elmo(N=6, d_model=1024, d_ff=2048, h=8, dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), 
                             c(ff), dropout), N),
        nn.Sequential(Embedder(), c(position)),
        nn.Sequential(Embedder(), c(position)),
        generator=None)
    
    # This was important from their code. 
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
예제 #8
0
def BuildModel(vocab_size, encoder_emb, decoder_emb, d_model = 512, N = 6, d_ff = 2048, h = 8, dropout = 0.1):

    target_vocab = vocab_size
    c = copy.deepcopy

    attention = MultiHeadedAttention(h, d_model)
    feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)

    encoder_layer = EncoderLayer(d_model, c(attention), c(feed_forward), dropout)
    decoder_layer = DecoderLayer(d_model, c(attention), c(attention), c(feed_forward), dropout)

    encoder = Encoder(encoder_layer, N)
    decoder = Decoder(decoder_layer, N)

    model = EncoderDecoder( encoder, decoder,
        nn.Sequential(Embeddings(encoder_emb, d_model), c(position)),
        nn.Sequential(Embeddings(decoder_emb, d_model), c(position)),
        Generator(d_model, target_vocab))

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
예제 #9
0
    # y = Dropout(0.1)(x)
    # y = Dense(500, activation='relu')(x)
    # y = Dropout(0.2)(y)
    # y = Dense(500, activation='relu')(y)
    # y = Dropout(0.2)(y)
    # y = Dense(500, activation='relu')(y)
    # y = Dropout(0.3)(y)
    # d_model = 1
    d_inner_hid = opt.d_inner_hid  #1  # d_inner_hid = 512
    n_head = opt.n_head  # 1  # n_head = 3
    d_k = opt.d_k  # 1 #64
    d_v = opt.d_v  # 1 #64
    layers = opt.layers  # 1
    dropout_rate = 0.1
    encodeLayerList = [
        EncoderLayer(1, d_inner_hid, n_head, d_k, d_v, dropout_rate)
        for _ in range(layers)
    ]
    y = None
    for enc_layer in encodeLayerList:
        if y is None:
            y, _ = enc_layer(x)
        else:
            y, _ = enc_layer(y)

    y_2dim = Reshape([int(y.shape[1])])(y)
    # y_2dim = Reshape([int(x.shape[1])])(x)

    out = Dense(nb_classes, activation='softmax')(y_2dim)

    model = Model(input=x, output=out)
    def __init__(self,
                 input_vocab,
                 target_vocab,
                 d_model=512,
                 d_int=2048,
                 d_k=64,
                 h=8,
                 n_layers=6,
                 dropout_rate=0.1,
                 max_len_pe=200,
                 bert_name=None):
        """
        :param input_vocab: Vocab based on BERT tokenizer
        :param target_vocab: Vocab based on BERT tokenizer, requires embedding. Fields tokenizer, tokenizer.ids_to_tokens = ordered_dict
        pad=0, start=1, end=2
        :param size: Size of the BERT model: base or large
        :param d_model: dimension of transformer embeddings #TODO add linear layer to map BERT output to dim 512?
        :param dropout_rate:dropout, default 0.1
        """
        super(TSP, self).__init__()
        self.dropout_rate = dropout_rate
        self.input_vocab = input_vocab
        self.target_vocab = target_vocab
        self.model_embeddings_source = nn.Sequential(
            DecoderEmbeddings(vocab=self.input_vocab, embed_size=d_model),
            PositionalEncoding(d_model=d_model,
                               dropout=dropout_rate,
                               max_len=max_len_pe))
        self.model_embeddings_target = nn.Sequential(
            DecoderEmbeddings(vocab=self.target_vocab, embed_size=d_model),
            PositionalEncoding(d_model=d_model,
                               dropout=dropout_rate,
                               max_len=max_len_pe))
        self.encoder = TransformerEncoder(layer=EncoderLayer(
            d_model=d_model,
            d_int=d_int,
            d_k=d_k,
            d_v=d_k,
            h=h,
            p_drop=dropout_rate),
                                          n_layer=n_layers)
        self.decoder = Transformer(layer=DecoderLayer(d_model=d_model,
                                                      d_int=d_int,
                                                      d_k=d_k,
                                                      d_v=d_k,
                                                      h=h,
                                                      p_drop=dropout_rate),
                                   n_layer=n_layers)
        self.linear_projection = nn.Linear(
            d_model,
            len(self.target_vocab.tokenizer.ids_to_tokens),
            bias=False)
        self.dropout = nn.Dropout(self.dropout_rate)

        self.device = self.linear_projection.weight.device

        initialize_weights(self.encoder)
        initialize_weights(self.decoder)
        initialize_weights(self.linear_projection)
        initialize_weights(self.model_embeddings_source)
        initialize_weights(self.model_embeddings_target)