コード例 #1
0
    def __init__(self,
                 in_dim,
                 out_dim,
                 N,
                 heads,
                 model_dim,
                 key_dim,
                 value_dim,
                 ff_dim,
                 max_len=10000,
                 batch_first=True):

        super().__init__()
        self.name = 'transformer'

        self.batch_first = batch_first
        self.model_dim = model_dim

        # define layers
        # embedding layers
        self.src_embed = nn.Linear(in_dim, model_dim)
        self.tgt_embed = nn.Linear(in_dim, model_dim)
        self.pos_enc = PositionalEncoding(model_dim, max_len)
        # encoder-decoder
        self.encoder = Encoder(N, heads, model_dim, key_dim, value_dim, ff_dim)
        self.decoder = Decoder(N, heads, model_dim, key_dim, value_dim, ff_dim)
        # final output layer
        self.fc = nn.Linear(model_dim, out_dim)

        # xavier initialization
        for p in self.parameters():
            if p.dim() > 1 and p.requires_grad:
                nn.init.xavier_uniform_(p)
コード例 #2
0
 def __init__(self, vocab_size, embed_model=None, emb_size=100, hidden_size=128, \
              input_dropout_p=0, dropout_p=0, n_layers=1, bidirectional=False, \
              rnn_cell=None, rnn_cell_name='gru', variable_lengths=True,d_ff=2048,dropout=0.3,N=1):
     super(EncoderRNN, self).__init__(vocab_size, emb_size, hidden_size,
                                      input_dropout_p, dropout_p, n_layers,
                                      rnn_cell_name)
     self.variable_lengths = variable_lengths
     self.bidirectional = bidirectional
     if bidirectional:
         self.d_model = 2 * hidden_size
     else:
         self.d_model = hidden_size
     ff = PositionwiseFeedForward(self.d_model, d_ff, dropout)
     if embed_model is None:
         self.embedding = nn.Embedding(vocab_size, emb_size)
     else:
         self.embedding = embed_model
     if rnn_cell is None:
         self.rnn = self.rnn_cell(emb_size,
                                  hidden_size,
                                  n_layers,
                                  batch_first=True,
                                  bidirectional=bidirectional,
                                  dropout=dropout_p)
     else:
         self.rnn = rnn_cell
     self.group_attention = GroupAttention(8, self.d_model)
     self.onelayer = Encoder(
         EncoderLayer(self.d_model, deepcopy(self.group_attention),
                      deepcopy(ff), dropout), N)
コード例 #3
0
ファイル: GPT.py プロジェクト: TempestWK1/NLP-Tutorials
    def __init__(self, model_dim, max_len, n_layer, n_head, n_vocab, lr, max_seg=3, drop_rate=0.1, padding_idx=0):
        super().__init__()
        self.padding_idx = padding_idx
        self.n_vocab = n_vocab
        self.max_len = max_len

        self.word_emb = keras.layers.Embedding(
            input_dim=n_vocab, output_dim=model_dim,  # [n_vocab, dim]
            embeddings_initializer=tf.initializers.RandomNormal(0., 0.01),
        )
        self.segment_emb = keras.layers.Embedding(
            input_dim=max_seg, output_dim=model_dim,  # [max_seg, dim]
            embeddings_initializer=tf.initializers.RandomNormal(0., 0.01),
        )
        self.position_emb = keras.layers.Embedding(
            input_dim=max_len, output_dim=model_dim,  # [step, dim]
            embeddings_initializer=tf.initializers.RandomNormal(0., 0.01),
        )
        self.position_emb = self.add_weight(
            name="pos", shape=[max_len, model_dim], dtype=tf.float32,
            initializer=keras.initializers.RandomNormal(0., 0.01))
        self.position_space = tf.ones((1, max_len, max_len))
        self.encoder = Encoder(n_head, model_dim, drop_rate, n_layer)
        self.task_mlm = keras.layers.Dense(n_vocab)
        self.task_nsp = keras.layers.Dense(2)

        self.cross_entropy = keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction="none")
        self.opt = keras.optimizers.Adam(lr)
コード例 #4
0
ファイル: fastspeech2.py プロジェクト: atkisonb/FastSpeech2-1
    def __init__(self, preprocess_config, model_config):
        super(FastSpeech2, self).__init__()
        self.model_config = model_config

        self.encoder = Encoder(model_config)
        self.variance_adaptor = VarianceAdaptor(preprocess_config, model_config)
        self.decoder = Decoder(model_config)
        self.mel_linear = nn.Linear(
            model_config["transformer"]["decoder_hidden"],
            preprocess_config["preprocessing"]["mel"]["n_mel_channels"],
        )
        self.postnet = PostNet()

        self.speaker_emb = None
        if model_config["multi_speaker"]:
            with open(
                os.path.join(
                    preprocess_config["path"]["preprocessed_path"], "speakers.json"
                ),
                "r",
            ) as f:
                n_speaker = len(json.load(f))
            self.speaker_emb = nn.Embedding(
                n_speaker,
                model_config["transformer"]["encoder_hidden"],
            )
コード例 #5
0
ファイル: model_new.py プロジェクト: ljmzlh/transformer
def make_model(opt,
               src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab), opt)

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
コード例 #6
0
    def test_encoder(self):
        max_seq_len, vocab_size, embed_dim, n_heads, dropout_rate, n_layers = 10, 200, 512, 8, 0.5, 6
        encoder =  Encoder(vocab_size, embed_dim, max_seq_len, n_heads, dropout_rate, n_layers)

        batch_size = 5
        x = torch.randint(0, vocab_size, size=(batch_size, max_seq_len))
        assert encoder(x).shape == (batch_size, max_seq_len, embed_dim)
コード例 #7
0
def create_model(seq_len, vocab_size, pad_id, N, d_model, d_ff, h, dropout):
    inp = Input((seq_len, ))
    embedding = Embedding(vocab_size, d_model, pad_id)(inp)
    encoding = PositionalEncoding(d_model)(inp)
    net = Add()([embedding, encoding])
    net = Dropout(dropout)(net)
    mask = Lambda(lambda t: create_padding_mask(t, pad_id),
                  name="input_mask")(inp)
    net = Encoder(N=N, d_model=d_model, d_ff=d_ff, h=h,
                  dropout=dropout)([net, mask])
    net = Flatten()(net)
    net = Dense(2, activation="softmax")(net)

    model = Model(inp, net)

    # NOTE: keras optimizers cannot be saved with optimizer state
    # need to use an optimizer from `tf.train`
    # NOTE: this seems to be a 1.0 thing, in 2.0 all tf.train optimizers are
    # dropped and the keras versions are the only implementations
    # NOTE: this is not recommended for training, the paper authors describe
    # a variable learning rate schedule, that still needs to be implemented.
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001,
                                       beta1=0.9,
                                       beta2=0.98,
                                       epsilon=1e-9)

    model.compile(optimizer=optimizer,
                  loss="categorical_crossentropy",
                  metrics=["acc"])

    return model
コード例 #8
0
def transformer_pretrain(
    num_layers=4,
    d_model=128,
    num_heads=8,
    dff=256,
    maximum_position_encoding=2048,
):
    inp = Input((None, d_model))

    encoder = Encoder(
        num_layers=num_layers,
        d_model=d_model,
        num_heads=num_heads,
        dff=dff,
        maximum_position_encoding=maximum_position_encoding,
        rate=0.3,
    )

    x = encoder(inp)

    out = Dense(d_model, activation="linear", name="out_pretraining")(x)

    model = Model(inputs=inp, outputs=out)

    opt = Adam(0.0001)

    model.compile(optimizer=opt, loss=mae)

    model.summary()

    return model
コード例 #9
0
    def __init__(self, src_vocab, tgt_vocab, hparams=None):
        super(GraphTransformer, self).__init__()
        self.hparams = dict(GraphTransformer.default_hparams)
        if hparams:
            for k, v in hparams.items():
                if k in self.hparams:
                    self.hparams[k] = v
        self.src_vocab = src_vocab
        self.src_vocab_size = len(src_vocab)
        self.tgt_vocab = tgt_vocab
        self.tgt_vocab_size = len(tgt_vocab)
        self.src_seq_len = self.hparams["num_src_tokens"]
        self.tgt_seq_len = self.hparams["num_tgt_tokens"]
        self.biaffine = self.hparams["biaffine"]

        self.encoder = Encoder(num_layers=self.hparams["num_layers"],
                               d_model=self.hparams["d_model"],
                               num_heads=self.hparams["num_heads"],
                               dff=self.hparams["dff"],
                               source_vocab_size=self.src_vocab_size,
                               maximum_position_encoding=self.src_seq_len,
                               rate=self.hparams["dropout"])

        self.decoder = GraphDecoder(num_layers=self.hparams["num_layers"],
                                    d_model=self.hparams["d_model"],
                                    num_heads=self.hparams["num_heads"],
                                    dff=self.hparams["dff"],
                                    tgt_vocab=self.tgt_vocab,
                                    src_seq_len=self.src_seq_len,
                                    maximum_position_encoding=self.tgt_seq_len,
                                    rate=self.hparams["dropout"],
                                    biaffine=self.biaffine)
コード例 #10
0
    def __init__(self, dataset, params):
        super(Rel_time_emb, self).__init__()
        self.dataset = dataset
        self.params = params

        self.create_time_embedds()

        self.time_nl = torch.sin
        self.his_encoder = Encoder(self.params)
コード例 #11
0
    def __init__(self, sentence_encoder, hidden, n_layers, n_head, d_k, d_v, d_model, d_inner, d_mlp, dropout=0.1):
        super(Model, self).__init__()

        self.PositionEncoder = PositionalEncoding(dropout, hidden*2)
        self.Transformer = Encoder(n_layers, n_head, d_k, d_v, d_model, d_inner)
        self.Dropoutlayer = nn.Dropout(p=dropout)
        self.Decoderlayer = self.build_decoder(hidden*2, d_mlp, dropout)
        self.sentence_encoder = sentence_encoder
        self.criterion = nn.CrossEntropyLoss()
コード例 #12
0
    def __init__(self,
                 model_dim,
                 max_len,
                 n_layer,
                 n_head,
                 n_vocab,
                 lr,
                 max_seg=3,
                 drop_rate=0.1,
                 padding_idx=0):
        super().__init__()
        self.padding_idx = padding_idx
        self.n_vocab = n_vocab
        self.max_len = max_len

        # I think task emb is not necessary for pretraining,
        # because the aim of all tasks is to train a universal sentence embedding
        # the body encoder is the same across all task, and the output layer defines each task.
        # finetuning replaces output layer and leaves the body encoder unchanged.

        # self.task_emb = keras.layers.Embedding(
        #     input_dim=n_task, output_dim=model_dim,  # [n_task, dim]
        #     embeddings_initializer=tf.initializers.RandomNormal(0., 0.01),
        # )

        self.word_emb = keras.layers.Embedding(
            input_dim=n_vocab,
            output_dim=model_dim,  # [n_vocab, dim]
            embeddings_initializer=tf.initializers.RandomNormal(0., 0.01),
        )

        self.segment_emb = keras.layers.Embedding(
            input_dim=max_seg,
            output_dim=model_dim,  # [max_seg, dim]
            embeddings_initializer=tf.initializers.RandomNormal(0., 0.01),
        )
        self.position_emb = keras.layers.Embedding(
            input_dim=max_len,
            output_dim=model_dim,  # [step, dim]
            embeddings_initializer=tf.initializers.RandomNormal(0., 0.01),
        )
        self.position_emb = self.add_weight(
            name="pos",
            shape=[max_len, model_dim],
            dtype=tf.float32,
            initializer=keras.initializers.RandomNormal(0., 0.01))
        self.position_space = tf.ones((1, max_len, max_len))
        self.encoder = Encoder(n_head, model_dim, drop_rate, n_layer)
        self.o_mlm = keras.layers.Dense(n_vocab)
        self.o_nsp = keras.layers.Dense(2)

        self.cross_entropy = keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction="none")
        self.opt = keras.optimizers.Adam(lr)
コード例 #13
0
    def __init__(self, vocab, config):
        super(NER_SOFTMAX_CHAR, self).__init__()
        word_emb_matrix = get_word_embd(vocab, config)
        embd_vector = torch.from_numpy(word_emb_matrix).float()

        self.word_embeds = nn.Embedding.from_pretrained(embd_vector,
                                                        freeze=False)
        self.char_embeds = nn.Embedding(len(vocab.char_to_id),
                                        config.char_embd_dim,
                                        padding_idx=Constants.PAD_ID)
        if config.is_caps:
            self.caps_embeds = nn.Embedding(vocab.get_caps_cardinality(),
                                            config.caps_embd_dim,
                                            padding_idx=Constants.PAD_ID)

        self.lstm_char = nn.LSTM(self.char_embeds.embedding_dim,
                                 config.char_lstm_dim,
                                 num_layers=1,
                                 bidirectional=True,
                                 batch_first=True)

        input_size = self.word_embeds.embedding_dim + config.char_embd_dim * 2

        if config.is_caps:
            input_size += config.caps_embd_dim

        model_dim = 128  #512
        num_head = 2  #8
        num_layer = 2  #6
        dropout_ratio = 0.1
        affine_dim = 256  #2048

        self.tx_proj = nn.Linear(input_size, model_dim)
        self.lstm = Encoder(num_layer, num_head, dropout_ratio, model_dim,
                            affine_dim)

        self.dropout = nn.Dropout(config.dropout_rate)
        self.hidden_layer = nn.Linear(model_dim, config.word_lstm_dim)
        self.tanh_layer = torch.nn.Tanh()

        self.hidden2tag = nn.Linear(config.word_lstm_dim, len(vocab.id_to_tag))

        self.config = config

        init_lstm_wt(self.lstm_char)

        init_linear_wt(self.hidden_layer)
        init_linear_wt(self.hidden2tag)
        self.char_embeds.weight.data.uniform_(-1., 1.)
        if config.is_caps:
            self.caps_embeds.weight.data.uniform_(-1., 1.)
コード例 #14
0
    def __init__(self,
                 in_dim,
                 out_dim,
                 N,
                 heads,
                 embed_dim,
                 model_dim,
                 key_dim,
                 value_dim,
                 ff_dim,
                 dropout=0.1,
                 max_len=10000,
                 batch_first=True,
                 pretrained_vec=None):

        super().__init__()
        self.name = 'transformer'

        self.batch_first = batch_first
        self.model_dim = model_dim
        self.embed_dim = embed_dim

        # define layers
        self.embedding = nn.Embedding(in_dim, embed_dim)
        # not training embedding layer if pretrained embedding is provided
        if pretrained_vec is not None:
            self.embedding = self.embedding.from_pretrained(pretrained_vec,
                                                            freeze=True)
        if embed_dim != model_dim:
            self.fc_in = nn.Linear(embed_dim, model_dim)
        self.pos_enc = PositionalEncoding(model_dim, max_len)
        self.encoder = Encoder(N,
                               heads,
                               model_dim,
                               key_dim,
                               value_dim,
                               ff_dim,
                               dropout=dropout)
        # final output layer
        self.fc = nn.Linear(model_dim, out_dim)

        # xavier initialization
        for p in self.parameters():
            if p.dim() > 1 and p.requires_grad:
                nn.init.xavier_uniform_(p)
コード例 #15
0
def transformer_classifier(
    num_layers=4,
    d_model=128,
    num_heads=8,
    dff=256,
    maximum_position_encoding=2048,
    n_classes=16,
):
    inp = Input((None, d_model))

    encoder = Encoder(
        num_layers=num_layers,
        d_model=d_model,
        num_heads=num_heads,
        dff=dff,
        maximum_position_encoding=maximum_position_encoding,
        rate=0.3,
    )

    x = encoder(inp)

    x = Dropout(0.2)(x)

    x = GlobalAvgPool1D()(x)

    x = Dense(4 * n_classes, activation="selu")(x)

    out = Dense(n_classes, activation="sigmoid")(x)

    model = Model(inputs=inp, outputs=out)

    opt = Adam(0.00001)

    model.compile(optimizer=opt,
                  loss=custom_binary_crossentropy,
                  metrics=[custom_binary_accuracy])

    model.summary()

    return model
コード例 #16
0
ファイル: main.py プロジェクト: edjah/CS287-Problem-Sets
def setup_self_attn_model():
    import torch.nn as nn
    from transformer import Encoder, Decoder, Transformer, EncoderLayer, DecoderLayer, SelfAttention, PositionwiseFeedforward

    device = torch.device('cuda:0')
    pad_idx = DE.vocab.stoi["<pad>"]

    hid_dim = 300
    n_layers = 3
    n_heads = 4
    pf_dim = 512  # 2048
    dropout = 0.1

    input_dim = len(DE.vocab)
    enc = Encoder(input_dim, hid_dim, n_layers, n_heads, pf_dim, EncoderLayer,
                  SelfAttention, PositionwiseFeedforward, dropout, device)

    output_dim = len(EN.vocab)
    dec = Decoder(output_dim, hid_dim, n_layers, n_heads, pf_dim, DecoderLayer,
                  SelfAttention, PositionwiseFeedforward, dropout, device)

    model = Transformer(enc, dec, pad_idx, device)

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    # model.load_state_dict(torch.load("weights/bigger_self_attn_weights"))

    train_model(model,
                num_epochs=100,
                learning_rate=0.001,
                weight_decay=0,
                log_freq=1,
                self_attn_hid_dim=hid_dim)
    torch.save(model.state_dict(), "weights/bigger_self_attn_weights")

    return model
コード例 #17
0
 def __init__(self):
     super(EventEncoder, self).__init__()
     self.embedding = tf.keras.layers.Embedding(input_dim=len(vocab),
                                                output_dim=embedding_size)
     self.trajectory_encoder = tf.keras.models.Sequential([
         tf.keras.layers.Input(shape=(EVENT_SIZE, 3, 1)),
         tf.keras.layers.Conv2D(filters=32,
                                kernel_size=3,
                                strides=(2, 1),
                                padding='same',
                                activation='relu'),
         tf.keras.layers.MaxPool2D(2, strides=(2, 1)),
         tf.keras.layers.Conv2D(filters=64,
                                kernel_size=3,
                                strides=(2, 1),
                                padding='same',
                                activation='relu'),
         tf.keras.layers.MaxPool2D(2, strides=(2, 1)),
         tf.keras.layers.Flatten(),
         tf.keras.layers.Dropout(.5),
         tf.keras.layers.Dense(d_model, activation='relu')
     ])
     self.encoder = Encoder(1, d_model + embedding_size, 4, dff)
コード例 #18
0
    def __init__(self,
                 dim,
                 src_n_vocab,
                 n_encod_layer,
                 tgt_n_vocab,
                 n_decode_layer,
                 max_len=512):
        self.src_emb = EmbeddingWithPositionalEncoding(dim, src_n_vocab,
                                                       max_len)
        self.tgt_emb = EmbeddingWithLearnedPositionalEncoding(
            dim, tgt_n_vocab, max_len)

        enc_layer = TransformerLayer(dim, MultiHeadAttention(6, dim, 0.1),
                                     None, nn.Linear(dim, dim), 0.1)
        self.encoder = Encoder(enc_layer, n_encod_layer)

        dec_layer = TransformerLayer(dim, MultiHeadAttention(6, dim, 0.1),
                                     MultiHeadAttention(6, dim, 0.1),
                                     nn.Linear(dim, dim), 0.1)
        self.decoder = Decoder(dec_layer, n_decode_layer)

        self.encoder_decoder = EncoderDecoder(self.encoder, self.decoder,
                                              self.src_emb, self.tgt_emb)
コード例 #19
0
def build_model(n_tokens,
                len_limit,
                batch_size,
                d_model=256,
                d_inner_hid=512,
                n_head=4,
                d_k=64,
                d_v=64,
                layers=6,
                dropout=0.1,
                active_layers=999):
    d_emb = d_model

    pos_emb = Embedding(len_limit, d_emb, trainable=False, \
                        weights=[GetPosEncodingMatrix(len_limit, d_emb)], \
                            batch_input_shape=[batch_size, None])

    word_emb = Embedding(n_tokens, d_emb, batch_input_shape=[batch_size, None])

    encoder = Encoder(d_model, d_inner_hid, n_head, d_k, d_v, layers, dropout, \
                        word_emb=word_emb, pos_emb=pos_emb)
    target_layer = TimeDistributed(Dense(n_tokens, use_bias=False))

    def get_pos_seq(x):
        mask = K.cast(K.not_equal(x, 0), 'int32')
        pos = K.cumsum(K.ones_like(x, 'int32'), 1)
        return pos * mask

    src_seq = Input(shape=(None, ), dtype='int32')

    src_pos = Lambda(get_pos_seq)(src_seq)

    enc_output = encoder(src_seq, src_pos, active_layers=active_layers)
    final_output = target_layer(enc_output)

    model = Model(inputs=src_seq, outputs=final_output)
    return model
コード例 #20
0
    def get_model(self):
        inp_exe = Input(shape=(H.executable_size, 3), dtype='int32', name='inp_exe')
        mask = Lambda(lambda x: x[:, :, 0])(inp_exe)
        mask = PaddingMask()(mask)
        print("##############", inp_exe, mask)
        encoding, enc_attention_weights = Encoder(
            num_layers=H.num_layers,
            d_model=H.d_model,
            num_heads=H.num_heads,
            d_ff=H.d_ff,
            vocab_size=H.real_vocab_size,
            dropout_rate=H.dropout_rate)(inp_exe, mask)
        encoding = GlobalAveragePooling1D()(encoding)
        
        inp_static = Input(shape=(H.static_feature_len))
        concatenated_features = concatenate([encoding, inp_static])
        
        layer_256 = Dense(256, activation="relu")(concatenated_features)
        layer_16 = Dense(16, activation="relu")(layer_256)
        result = Dense(1, activation="sigmoid")(layer_16)

        model = Model(inputs=[inp_exe, inp_static], outputs=result)
        model.summary()
        return model
コード例 #21
0
def BuildModel(vocab_size, encoder_emb, decoder_emb, d_model = 512, N = 6, d_ff = 2048, h = 8, dropout = 0.1):

    target_vocab = vocab_size
    c = copy.deepcopy

    attention = MultiHeadedAttention(h, d_model)
    feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)

    encoder_layer = EncoderLayer(d_model, c(attention), c(feed_forward), dropout)
    decoder_layer = DecoderLayer(d_model, c(attention), c(attention), c(feed_forward), dropout)

    encoder = Encoder(encoder_layer, N)
    decoder = Decoder(decoder_layer, N)

    model = EncoderDecoder( encoder, decoder,
        nn.Sequential(Embeddings(encoder_emb, d_model), c(position)),
        nn.Sequential(Embeddings(decoder_emb, d_model), c(position)),
        Generator(d_model, target_vocab))

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
コード例 #22
0
ファイル: GPT.py プロジェクト: MorvanZhou/NLP-Tutorials
    def __init__(self,
                 model_dim,
                 max_len,
                 num_layer,
                 num_head,
                 n_vocab,
                 lr,
                 max_seg=3,
                 drop_rate=0.2,
                 padding_idx=0):
        super().__init__()
        self.padding_idx = padding_idx
        self.n_vocab = n_vocab
        self.max_len = max_len

        self.word_emb = nn.Embedding(n_vocab, model_dim)
        self.word_emb.weight.data.normal_(0, 0.1)

        self.segment_emb = nn.Embedding(num_embeddings=max_seg,
                                        embedding_dim=model_dim)
        self.segment_emb.weight.data.normal_(0, 0.1)
        self.position_emb = torch.empty(1, max_len, model_dim)
        nn.init.kaiming_normal_(self.position_emb,
                                mode='fan_out',
                                nonlinearity='relu')
        self.position_emb = nn.Parameter(self.position_emb)

        self.encoder = Encoder(n_head=num_head,
                               emb_dim=model_dim,
                               drop_rate=drop_rate,
                               n_layer=num_layer)
        self.task_mlm = nn.Linear(in_features=model_dim, out_features=n_vocab)
        self.task_nsp = nn.Linear(in_features=model_dim * self.max_len,
                                  out_features=2)

        self.opt = optim.Adam(self.parameters(), lr)
コード例 #23
0
 def __init__(self):
     super(DualEventModel, self).__init__()
     self.event_encoder = EventEncoder()
     self.dual_event_encoder = Encoder(1, d_model, 4, 256, unordered=False)
     self.output_layer = tf.keras.layers.Dense(2, activation='softmax')
コード例 #24
0
# embedding images
fcnn = ResnetV1_FCNN(img_shape, 20)
em_imgL = fcnn(imgL)
em_imgR = fcnn(imgR)
em_imgs = tf.keras.layers.Concatenate(axis=2)([em_imgL, em_imgR])

# embedding sentence
print("creating transformer encoder")
GloVe_embeddings = np.load("word_embeddings/embedding.npy")
print(GloVe_embeddings.shape)
enc_mask = create_padding_mask(sent)
encoder = Encoder(
    num_layers=4,
    d_model=300,  # also the word embedding dim
    num_heads=12,
    dff=512,
    input_vocab_size=GloVe_embeddings.shape[0],
    embeddings_initializer=Constant(GloVe_embeddings),
)
em_sent = encoder(sent, training=True, mask=enc_mask)

# getting prediction from the Relational Neural Network
print("creating relational network")
relation_matrix = RelationalProduct()([em_sent, em_imgs])
g = ConvolutionalPerceptron(relation_matrix.shape[1:], [256, 256])
em_relations = g(relation_matrix)
relation_out = ReduceMean(axis=-1)(em_relations)
f = Perceptron(relation_out.shape[1], [256, 256])
relation_out = f(relation_out)
pred = Dense(1, activation="sigmoid")(relation_out)
コード例 #25
0
from transformer import Encoder
from LSTMEncoder import EncoderRNN
import torch

te = Encoder.TransformerEncoder(1000, 6)

a = torch.LongTensor([[1, 2, 3, 4, 0, 0], [10, 0, 0, 0, 0, 0],
                      [5, 6, 7, 8, 9, 0]])
#print(te.forward(a,torch.IntTensor([5,4])).shape)

ls = EncoderRNN(1000, 15, 4096, 300)
print(
    ls.forward(a, use_prob_vector=False,
               input_lengths=torch.Tensor([4, 1, 5])).shape)
コード例 #26
0
 def __init__(self):
     super(TrajectoryEncoder, self).__init__()
     self.aux_embedding = tf.keras.layers.Embedding(
         input_dim=vocab_size, output_dim=embedding_size)
     self.encoder = Encoder(1, d_model, 4, 256)
コード例 #27
0
 def __init__(self):
     super(FrameEncoder, self).__init__()
     self.aux_embedding = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_size)
     self.encoder = Encoder(1, d_model, 4, 256, unordered=True)
コード例 #28
0
ファイル: bst_model.py プロジェクト: zuoxiaofan/deep_ctr
def bst_model(sparse_input_length = 1, \
    max_seq_length = 50, \
    vocab_size_dict = None, \
    embedding_dim = 512, \
    dnn_unit_list = [512, 128, 32], \
    activation = 'relu', \
    dropout_rate = 0.2, \
    n_layers = 2, \
    num_heads = 8, \
    middle_units = 1024, \
    training = False
    ):
    
    
    # 1. Input layer
    
    # 1.1 user 
    user_id_input_layer = Input(shape=(sparse_input_length, ), name="user_id_input_layer")
    gender_input_layer = Input(shape=(sparse_input_length, ), name="gender_input_layer")
    age_input_layer = Input(shape=(sparse_input_length, ), name="age_input_layer")
    
    
    user_click_item_seq_input_layer = Input(shape=(max_seq_length, ), name="user_click_item_seq_input_layer")
    user_click_cate_seq_input_layer = Input(shape=(max_seq_length, ), name="user_click_cate_seq_input_layer")
    
    
    # 1.2 item
    item_input_layer = Input(shape=(sparse_input_length, ), name="item_input_layer")
    cate_input_layer = Input(shape=(sparse_input_length, ), name="cate_input_layer")
    
    
    
    # 2. Embedding layer
    
    # 2.1 user
    user_id_embedding_layer = Embedding(vocab_size_dict["user_id"]+1, embedding_dim, \
                                        mask_zero=True, name='user_id_embedding_layer')(user_id_input_layer)
    gender_embedding_layer = Embedding(vocab_size_dict["gender"]+1, embedding_dim, \
                                       mask_zero=True, name='gender_embedding_layer')(gender_input_layer)
    age_embedding_layer = Embedding(vocab_size_dict["age"]+1, embedding_dim, \
                                    mask_zero=True, name='age_embedding_layer')(age_input_layer)
    
    
    item_id_embedding = Embedding(vocab_size_dict["item_id"]+1, embedding_dim, \
                                mask_zero=True, name='item_id_embedding')
    cate_id_embedding = Embedding(vocab_size_dict["cate_id"]+1, embedding_dim, \
                                mask_zero=True, name='cate_id_embedding')
    
    user_click_item_seq_embedding_layer = item_id_embedding(user_click_item_seq_input_layer)
    user_click_cate_seq_embedding_layer = cate_id_embedding(user_click_cate_seq_input_layer)
    
    
    # 2.2 item 
    target_item_embedding_layer = item_id_embedding(item_input_layer)
    target_cate_embedding_layer = cate_id_embedding(cate_input_layer)
    

    
    # 3. Concat layer
    
    # 3.1 user: other features
    other_features_concat_layer = concatenate([user_id_embedding_layer, gender_embedding_layer, \
                                               age_embedding_layer], axis=-1)
    
    
    # 3.1 user: sequence features
    input_transformer_layer = concatenate([user_click_item_seq_embedding_layer, \
                                           user_click_cate_seq_embedding_layer], axis=-1)
    
    
    # 3.2 item
    input_din_layer_query = concatenate([target_item_embedding_layer, \
                                         target_cate_embedding_layer], axis=-1)

    
    # 4. Transformer layer

    d_model = input_transformer_layer.shape[-1]
    padding_mask_list = padding_mask(user_click_item_seq_input_layer)
    #print("padding_mask_list.shape: ", padding_mask_list.shape)
    
    output_tranformer_layer = Encoder(n_layers, d_model, num_heads, 
                                middle_units, max_seq_length, training)([input_transformer_layer, padding_mask_list])

    #print("output_tranformer_layer.shape: ", output_tranformer_layer.shape)

    
    
    # 5. Din attention layer
    
    query = input_din_layer_query
    keys = output_tranformer_layer
    vecs = output_tranformer_layer
    
    din_padding_mask_list = din_padding_mask(user_click_item_seq_input_layer)
    #print("din_padding_mask_list.shape: ", din_padding_mask_list.shape)

    output_din_layer = DinAttentionLayer(d_model, middle_units, dropout_rate)([query, keys, vecs, din_padding_mask_list])
    #print("output_din_layer.shape: ", output_din_layer.shape)
    
    
    
    # 6. DNN layer
    input_dnn_layer = concatenate([other_features_concat_layer, output_din_layer], \
                                 axis=-1)
    
    input_dnn_layer = tf.squeeze(input=input_dnn_layer, axis=[1])
    
    
    for inx in range(len(dnn_unit_list)):
        input_dnn_layer = Dense(dnn_unit_list[inx], activation=activation, \
                                name="FC_{0}".format(inx+1))(input_dnn_layer)
        
        input_dnn_layer = Dropout(dropout_rate, name="dropout_{0}".format(inx+1))(input_dnn_layer)
        
    
    output = Dense(1, activation='sigmoid', \
                   name='Sigmoid_output_layer')(input_dnn_layer)
    
    
    
    # Output model
    
    inputs_list = [user_id_input_layer, gender_input_layer, age_input_layer, \
                   user_click_item_seq_input_layer, user_click_cate_seq_input_layer, \
                   item_input_layer, cate_input_layer]
    
    model = Model(inputs = inputs_list, outputs = output)
    
    
    return model