Exemple #1
0
 def __init__(self, input_vocab_size, opt_vocab_size, d_model, nhead,
              num_encoder_layers, dim_feedforward, position_embed_size=300,
              utter_n_layer=2, dropout=0.3, sos=0, pad=0, teach_force=1):
     super(Transformer, self).__init__()
     self.d_model = d_model
     self.hidden_size = d_model
     self.embed_src = nn.Embedding(input_vocab_size, d_model)
     # position maxlen is 5000
     self.pos_enc = PositionEmbedding(d_model, dropout=dropout,
                                      max_len=position_embed_size)
     self.input_vocab_size = input_vocab_size
     self.utter_n_layer = utter_n_layer
     self.opt_vocab_size = opt_vocab_size
     self.pad, self.sos = pad, sos
     self.teach_force = teach_force
     
     encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, 
                                                dim_feedforward=dim_feedforward, 
                                                dropout=dropout,  activation='gelu')
     self.encoder = nn.TransformerEncoder(encoder_layer,
                                          num_layers=num_encoder_layers)
     
     self.decoder = Decoder(d_model, d_model, opt_vocab_size, 
                            n_layers=utter_n_layer, dropout=dropout, nhead=nhead)
Exemple #2
0
    def __init__(self, device):
        super().__init__()
        self.bert_tokenizer = BertTokenizer.from_pretrained(
            "bert-base-uncased", do_lower_case=True)
        self.bert_model = BertModel.from_pretrained("bert-base-uncased")
        self.bert_feature_dim = self.bert_model.pooler.dense.in_features
        self.indicator_vector = torch.nn.parameter.Parameter(
            torch.rand(self.bert_feature_dim,
                       requires_grad=True,
                       dtype=torch.double),
            requires_grad=True).to(device)
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.bert_feature_dim, nhead=8)

        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer,
                                                         num_layers=6)
        self.norm_layer = nn.LayerNorm(self.bert_feature_dim)
        self.projection_layer = nn.Linear(self.bert_feature_dim, 2)
        self.device = device
        #self.loss = nn.NLLLoss(weight=torch.tensor([0.17105,1]),reduction='mean', ignore_index=-1)
        self.loss = nn.NLLLoss(weight=torch.tensor([1.0, 1.0]),
                               reduction='mean',
                               ignore_index=-1)
        self.lsm = nn.LogSoftmax(dim=2)
Exemple #3
0
    def __init__(self, conf):
        super().__init__()
        self.save_hyperparameters(conf)

        self.ke = nn.Embedding(4, 2, max_norm=1.)

        self.conv1 = ConvBlock(3, 256, 13, stride=3, padding=6)
        self.conv2 = ConvBlock(256, 256, 7, stride=1, padding=3)
        self.conv3 = ConvBlock(256, 256, 3, stride=2, padding=1)

        self.pos_encoder = PositionalEncoding(256, self.hparams.dropout)

        encoder_layer = nn.TransformerEncoderLayer(256,
                                                   self.hparams.nhead,
                                                   self.hparams.dim_ff,
                                                   self.hparams.dropout,
                                                   activation='gelu')
        self.encoder = nn.TransformerEncoder(encoder_layer,
                                             self.hparams.nlayers)

        self.fc1 = nn.Linear(256, 1)

        self.train_acc = Accuracy()
        self.val_acc = Accuracy(compute_on_step=False)
    def __init__(self, id2char, model_para):
        super(Transformer, self).__init__()

        self.idim = model_para['encoder']['idim']

        #FIXME: need to remove these hardcoded thing later
        self.odim = len(id2char) + 2
        self.sos_id = len(id2char) + 1
        self.eos_id = len(id2char) + 1
        self.blank_id = 0
        self.space_id = -1 #FIXME: what is this
        self.vgg_ch_dim = 128
        
        self.feat_extractor = nn.Sequential(
                nn.Conv2d(1, 64, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(64, 64, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(2, stride=2),
                nn.Conv2d(64, 128, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.Conv2d(self.vgg_ch_dim,self.vgg_ch_dim, 3, stride=1, padding=1),
                nn.ReLU(),
                nn.MaxPool2d(2, stride=2),
        )
        self.vgg_o_dim = self.vgg_ch_dim * floor(self.idim/4)

        # self.vgg2enc = nn.Linear(self.vgg_o_dim, model_para['encoder']['d_model'])
        self.vgg2enc = nn.Linear(self.vgg_o_dim, model_para['encoder']['d_model'])
        self.pos_encoder = PositionalEncoding(model_para['encoder']['d_model'], model_para['encoder']['dropout'])
        encoder_layer = nn.TransformerEncoderLayer(model_para['encoder']['d_model'], #512
                                                   model_para['encoder']['nhead'], # 2
                                                   model_para['encoder']['dim_inner'], # 2048
                                                   model_para['encoder']['dropout'] # 0.1
                                                   ) 
        self.encoder = nn.TransformerEncoder(encoder_layer, model_para['encoder']['nlayers'])
Exemple #5
0
    def __init__(self, config: BertConfig):
        super(Bert, self).__init__()
        self.token_embeddings = nn.Embedding(config.vocab_size,
                                             config.hidden_size)
        self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                  config.hidden_size)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.embedding_layer_norm = nn.LayerNorm(config.hidden_size)
        self.embedding_dropout = nn.Dropout(p=config.hidden_dropout_prob)

        self.encoders = nn.TransformerEncoder(
            encoder_layer=nn.TransformerEncoderLayer(
                d_model=config.hidden_size,
                nhead=config.num_attention_heads,
                dim_feedforward=config.intermediate_size,
                dropout=config.attention_probs_dropout_prob,
                activation=config.hidden_act,
            ),
            num_layers=config.num_hidden_layers,
        )

        self.pooler_layer = nn.Linear(config.hidden_size, config.hidden_size)
        self.pooled_output_activate = nn.Tanh()
Exemple #6
0
    def __init__(self,
                 d_model=512,
                 nhead=8,
                 num_encoder_layers=6,
                 num_decoder_layers=6,
                 dim_feedforward=2048,
                 dropout=0.1,
                 activation="relu",
                 custom_encoder=None,
                 custom_decoder=None):
        super().__init__()

        if custom_encoder is not None:
            self.encoder = custom_encoder
        else:
            encoder_layer = nn.TransformerEncoderLayer(d_model, nhead,
                                                       dim_feedforward,
                                                       dropout, activation)
            encoder_norm = nn.LayerNorm(d_model)
            self.encoder = nn.TransformerEncoder(encoder_layer,
                                                 num_encoder_layers,
                                                 encoder_norm)

        if custom_decoder is not None:
            self.decoder = custom_decoder
        else:
            decoder_layer = TransformerDecoderLayerWithFastDecode(
                d_model, nhead, dim_feedforward, dropout, activation)
            decoder_norm = nn.LayerNorm(d_model)
            self.decoder = TransformerDecoderWithFastDecode(
                decoder_layer, num_decoder_layers, decoder_norm)

        self._reset_parameters()

        self.d_model = d_model
        self.nhead = nhead
Exemple #7
0
 def __init__(self, config):
     super(TransformerLM, self).__init__()
     vocabSize = config.data.vocabSize
     self.nemd = config.model.transformer.nemd
     emd_drop_ratio = config.model.transformer.emd_drop_ratio
     hidden_drop_ratio = config.model.transformer.hidden_drop_ratio
     nhead = config.model.transformer.nhead
     nhid = config.model.transformer.nhid
     nlayer = config.model.transformer.nlayer
     self.src_mask=None
     tie_weight = config.model.transformer.tie_weight
     
     self.embedding = nn.Embedding(vocabSize, self.nemd)
     self.pos_encoder = PositionalEncoding(config)
     self.dropout = nn.Dropout(emd_drop_ratio)
     encoder_layers = nn.TransformerEncoderLayer(self.nemd, nhead, nhid, hidden_drop_ratio)
     self.transformer_encoder = nn.TransformerEncoder(encoder_layers, nlayer)
     self.out = nn.Linear(self.nemd, vocabSize)
     
     # if not pretrained embedding
     self.init_weights()
     
     if tie_weight:
         self.out.weight = self.embedding.weight
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 num_layers=1,
                 dropout=0,
                 use_categories=False):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.dropout = dropout
        self.use_categories = use_categories

        enc_layer = nn.TransformerEncoderLayer(hidden_dim,
                                               2,
                                               dim_feedforward=hidden_dim * 4,
                                               dropout=dropout,
                                               activation='gelu')
        self.enc = nn.TransformerEncoder(enc_layer, num_layers)

        self.input_fc = nn.Linear(input_dim, hidden_dim)
        if use_categories:
            self.dropout_mod = nn.Dropout(dropout)
            self.stroke_cat_fc = nn.Linear(input_dim + hidden_dim, hidden_dim)
Exemple #9
0
    def __init__(self, args):
        '''
        transformer encoder for language, frames and action inputs
        '''
        super(EncoderVL, self).__init__()

        # transofmer layers
        encoder_layer = nn.TransformerEncoderLayer(
            args.demb, args.encoder_heads, args.demb,
            args.dropout['transformer']['encoder'])
        self.enc_transformer = nn.TransformerEncoder(encoder_layer,
                                                     args.encoder_layers)

        # how many last actions to attend to
        self.num_input_actions = args.num_input_actions

        # encodings
        self.enc_pos = PosEncoding(args.demb) if args.enc['pos'] else None
        self.enc_pos_learn = PosLearnedEncoding(
            args.demb) if args.enc['pos_learn'] else None
        self.enc_token = TokenLearnedEncoding(
            args.demb) if args.enc['token'] else None
        self.enc_layernorm = nn.LayerNorm(args.demb)
        self.enc_dropout = nn.Dropout(args.dropout['emb'], inplace=True)
    def __init__(self,
                 input_size,
                 emb_size,
                 hidden_size,
                 num_layer,
                 max_len=64):
        super().__init__()
        self.emb_size = emb_size
        self.hidden_size = hidden_size
        self.num_layer = num_layer
        self.scale = math.sqrt(emb_size)

        self.embedding = nn.Embedding(input_size, emb_size)
        # additional length for sos and eos
        self.pos_encoder = PositionEncoder(max_len + 10, emb_size)
        encoder_layer = nn.TransformerEncoderLayer(d_model=emb_size,
                                                   nhead=8,
                                                   dim_feedforward=hidden_size,
                                                   dropout=0.1,
                                                   activation='gelu')
        encoder_norm = nn.LayerNorm(emb_size)
        self.encoder = nn.TransformerEncoder(encoder_layer,
                                             num_layers=num_layer,
                                             norm=encoder_norm)
    def __init__(
        self,
        d_model: int,
        nhead: int,
        vocab_size: int,
        max_len: int,
        num_encoder_layers: int = 6,
        num_decoder_layers: int = 6,
    ) -> None:
        super(Transformer2, self).__init__()

        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model,
                                                   nhead=nhead)
        decoder_layer = nn.TransformerDecoderLayer(d_model=d_model,
                                                   nhead=nhead)

        self.encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
        self.decoder = nn.TransformerDecoder(decoder_layer, num_decoder_layers)

        self.embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_len)

        self.output_bias = Parameter(torch.Tensor(vocab_size))
        self._init_bias()
Exemple #12
0
    def __init__(self, num_slots, slot_dim, hidden_dim, num_heads, num_layers,
                 **kwargs):

        super().__init__()

        self.num_slots = num_slots
        self.slot_dim = slot_dim
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads

        # TODO: for now we simply embed the slots to higher dim, create
        # TODO: custom transformer layer

        # TODO: non-relational Transformer is not directly comparable
        # TODO: to non-relational GNN

        self.slot_encode = nn.Linear(slot_dim, hidden_dim)
        self.slot_decode = nn.Linear(hidden_dim, slot_dim)

        transformer_layer = nn.TransformerEncoderLayer(d_model=hidden_dim,
                                                       nhead=num_heads)

        self.transformer = nn.TransformerEncoder(
            encoder_layer=transformer_layer, num_layers=num_layers)
Exemple #13
0
    def __init__(self,
                 input_window,
                 output_window,
                 num_layers=1,
                 dropout=0.1,
                 longueur_serie=23):
        """
        Init.

        Parameters
        ----------
        input_window: int
            Représente le nombre de jour de la séquence d'entrée
            Longueur de la séquence d'entrée: 24 * input_window
        output_window: int
            Représente le nombre d'heure de la séquence de sortie
            Longueur de la séquence de sortie: output_window
        """
        super(Transformer, self).__init__()

        self.name_model = 'Transformer'
        self.input_window = input_window
        self.output_window = output_window
        self.feature_size = self.output_window * 4

        self.src_mask = None
        self.pos_encoder = PositionalEncoding(self.feature_size)
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.feature_size,
            dim_feedforward=self.feature_size * 4,
            nhead=self.output_window,
            dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(
            self.encoder_layer, num_layers=num_layers).float()
        self.decoder = nn.Linear(self.feature_size, 1)
        self.init_weights()
Exemple #14
0
    def __init__(self, numberTokens, embeddingSize, maxLength, numberEncoderLayers, numberDecoderLayers, attentionHeadCount, transformerHiddenDenseSize, batch_size=32):
        # Based on https://pytorch.org/tutorials/beginner/transformer_tutorial.html
        super(Transformer, self).__init__()
        self.batch_size=batch_size
        self.model_type = 'Transformer'
        self.embeddingSize = embeddingSize
        self.numberTokens = numberTokens

        self.encoderEmbedding = nn.Embedding(numberTokens, embeddingSize)
        self.maxLength = maxLength 
        
        encoderLayer = nn.TransformerEncoderLayer(embeddingSize, attentionHeadCount, transformerHiddenDenseSize)

        self.encoder = nn.TransformerEncoder(encoderLayer, numberEncoderLayers)


        self.decoderEmbedding = nn.Embedding(numberTokens, embeddingSize)
        
        decoderLayer = nn.TransformerDecoderLayer(embeddingSize, attentionHeadCount, transformerHiddenDenseSize)

        self.decoder = nn.TransformerDecoder(decoderLayer, numberDecoderLayers)

        self.decoderLinear = nn.Linear(embeddingSize, numberTokens)
        self.decoderSoftmax = nn.Softmax(dim=2)
Exemple #15
0
    def __init__(self, d_model, seq_len, nhead, dim_feedforward, dropout,
                 num_layers):
        super(Encoder, self).__init__()

        # 時系列データの入力処理
        self.embedding_layer = nn.Linear(51, d_model)

        # Positional Encodeingの処理
        self.positionalencoding_layer = PositionalEncoding(d_model, seq_len)

        # Dropoutの処理
        self.dropout_layer = nn.Dropout(p=dropout)

        # Transformer decoder側の処理 default dim_feedforward =2048
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(
            self.encoder_layer, num_layers)

        # Discriminatorへの入力用
        self.linear_layer_dis = nn.Linear(seq_len * d_model, d_model)
Exemple #16
0
    def __init__(self,
                 input_dim,
                 rnn_hidden_dims,
                 max_ponder=3,
                 epsilon=0.05,
                 last_relu=True,
                 act_steps=3,
                 act_fixed=False):
        super(ATC_TFencoder, self).__init__()
        self.rnn_hidden_dim = rnn_hidden_dims[-1]
        self.epsilon = epsilon
        # self.rnn_cell = GRUEXND(input_dim, rnn_hidden_dims, last_relu)
        self.transformer_encoder_layer = nn.TransformerEncoderLayer(
            d_model=50, nhead=2, dim_feedforward=150)
        self.transformer_encoder = nn.TransformerEncoder(
            self.transformer_encoder_layer, num_layers=1)
        #TransformerEncoder(source_dims=13, k_dims=16, v_dims=16, n_heads=3, layer_cnt=1)
        self.transition_layer = nn.Linear(13, rnn_hidden_dims[-1])

        self.max_ponder = max_ponder
        self.ponder_linear = nn.Linear(rnn_hidden_dims[-1], 1)

        self.act_fixed = act_fixed
        self.act_steps = act_steps
Exemple #17
0
    def __init__(self,
                 n_skill,
                 max_seq=100,
                 embed_dim=128,
                 num_heads=8,
                 dropout=0.2):
        super(SAKTModel, self).__init__()
        self.n_skill = n_skill
        self.embed_dim = embed_dim
        embed_dim = 32 * 6 + 256

        self.embedding = nn.Embedding(4, 32)
        self.user_answer_embedding = nn.Embedding(6, 32)
        self.prior_question_had_explanation_embedding = nn.Embedding(4, 32)
        self.e_embedding = nn.Embedding(n_skill + 1, 256)
        self.part_embedding = nn.Embedding(8, 32)
        self.elapsed_time_embedding = nn.Embedding(302, 32)
        self.duration_previous_content_embedding = nn.Embedding(302, 32)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout)
        self.transformer_enc = nn.TransformerEncoder(
            encoder_layer=encoder_layer, num_layers=4)
        self.gru = nn.GRU(input_size=embed_dim, hidden_size=embed_dim)

        self.continuous_embedding = nn.Sequential(nn.BatchNorm1d(99),
                                                  nn.Linear(1, embed_dim // 2),
                                                  nn.LayerNorm(embed_dim // 2))
        self.cat_embedding = nn.Sequential(
            nn.Linear(embed_dim, embed_dim // 2), nn.LayerNorm(embed_dim // 2))

        self.layer_normal = nn.LayerNorm(embed_dim)

        self.ffn = FFN(embed_dim)
        self.dropout = nn.Dropout(dropout / 2)
        self.pred = nn.Linear(embed_dim, 1)
Exemple #18
0
    def __init__(self,
                 d_model: int = 300,
                 nhead: int = 6,
                 num_encoder_layers: int = 4,
                 num_decoder_layers: int = 4,
                 dim_feedforward: int = 1024,
                 dropout: float = 0.1,
                 activation: str = "relu",
                 words_num: int = 0) -> None:
        super(Transformer, self).__init__()

        self.source_embedding = nn.Embedding(words_num, 300)
        self.pos_encoder = PositionalEncoding(d_model=d_model,
                                              dropout=dropout,
                                              max_len=75)
        self.pos_decoder = PositionalEncoding(d_model=d_model,
                                              dropout=dropout,
                                              max_len=74)
        encoder_layer = nn.TransformerEncoderLayer(d_model, nhead,
                                                   dim_feedforward, dropout,
                                                   activation)
        encoder_norm = nn.LayerNorm(d_model)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers,
                                             encoder_norm)

        self.target_embedding = nn.Embedding(words_num, 300)
        decoder_layer = nn.TransformerDecoderLayer(d_model, nhead,
                                                   dim_feedforward, dropout,
                                                   activation)
        decoder_norm = nn.LayerNorm(d_model)
        self.decoder = nn.TransformerDecoder(decoder_layer, num_decoder_layers,
                                             decoder_norm)
        self.out = nn.Linear(d_model, words_num)
        self._reset_parameters()
        self.d_model = d_model
        self.nhead = nhead
Exemple #19
0
    def __init__(self,
                 input_size,
                 encoder_size,
                 n_head,
                 feedforward_size,
                 n_layers,
                 dropout=0):
        '''
        input_size:

        '''
        super(EncoderTransformer, self).__init__()

        # self.model_type = 'Transformer'
        # self.src_mask = None

        # self.pos_encoder = PositionalEncoding(ninp, dropout)

        encoder_layers = nn.TransformerEncoderLayer(encoder_size,
                                                    n_head,
                                                    feedforward_size,
                                                    dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layers, n_layers)
Exemple #20
0
 def __init__(self):
     super(sequence_encoder, self).__init__()
     self.embedding_layer = nn.Embedding(5, 512)
     self.encoder_layer = nn.TransformerEncoderLayer(512, 8, 1024)
     self.encoder = nn.TransformerEncoder(self.encoder_layer, 5)
Exemple #21
0
    def __init__(self, config: Config):
        super().__init__(config)
        conv_H = config.experiment.cmax_h
        conv_W = config.experiment.cmax_w
        conv_layers = []
        in_channels = 1
        for index, filters in enumerate(config.experiment.cnn_filters):
            out_channels = filters
            conv_layers.extend([
                nn.Conv2d(in_channels=in_channels,
                          out_channels=out_channels,
                          kernel_size=(3, 3),
                          stride=(2, 2),
                          padding=1),
                nn.ReLU(),
                nn.BatchNorm2d(num_features=out_channels),
            ])
            if index != len(config.experiment.cnn_filters) - 1:
                conv_layers.append(nn.Dropout(config.experiment.dropout))
            conv_W = math.ceil(conv_W / 2)
            conv_H = math.ceil(conv_H / 2)
            in_channels = out_channels

        self.conv = nn.Sequential(
            *conv_layers, nn.Flatten(),
            nn.Linear(in_features=conv_W * conv_H * out_channels,
                      out_features=conv_W * conv_H * out_channels))
        self.conv_time_distributed = TimeDistributed(self.conv)

        self.embed_dim = self.features_len * (
            config.experiment.time2vec_embedding_size +
            1) + conv_W * conv_H * out_channels
        if config.experiment.use_all_gfs_as_input:
            self.time_2_vec_time_distributed = TimeDistributed(
                Time2Vec(
                    self.features_len + len(
                        process_config(
                            config.experiment.train_parameters_config_file)),
                    config.experiment.time2vec_embedding_size),
                batch_first=True)
            self.embed_dim += len(
                process_config(
                    config.experiment.train_parameters_config_file)) * (
                        config.experiment.time2vec_embedding_size + 1)

        self.pos_encoder = PositionalEncoding(self.embed_dim, self.dropout,
                                              self.sequence_length)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.embed_dim,
            nhead=config.experiment.transformer_attention_heads,
            dim_feedforward=config.experiment.transformer_ff_dim,
            dropout=config.experiment.dropout,
            batch_first=True)
        encoder_norm = nn.LayerNorm(self.embed_dim)
        self.encoder = nn.TransformerEncoder(
            encoder_layer, config.experiment.transformer_attention_layers,
            encoder_norm)

        dense_layers = []
        features = self.embed_dim + 1
        for neurons in config.experiment.transformer_head_dims:
            dense_layers.append(
                nn.Linear(in_features=features, out_features=neurons))
            features = neurons
        dense_layers.append(nn.Linear(in_features=features, out_features=1))
        self.classification_head = nn.Sequential(*dense_layers)
        self.classification_head_time_distributed = TimeDistributed(
            self.classification_head, batch_first=True)
Exemple #22
0
 def __init__(self, ):
     super(TextTansformer, self).__init__()
     self.encoder = nn.TransformerEncoder()
     self.decoder = nn.TransformerDecoder()
     pass
Exemple #23
0
    def __init__(self,
                 vocab,
                 feature_dim=(1024, 14, 14),
                 stem_num_layers=2,
                 stem_batchnorm=False,
                 module_dim=128,
                 text_dim=1,
                 module_residual=True,
                 module_batchnorm=False,
                 classifier_proj_dim=512,
                 classifier_downsample='maxpool2',
                 classifier_fc_layers=(1024, ),
                 classifier_batchnorm=False,
                 classifier_dropout=0,
                 verbose=True):
        super(ModuleNet, self).__init__()

        self.stem = build_stem(feature_dim[0],
                               module_dim,
                               num_layers=stem_num_layers,
                               with_batchnorm=stem_batchnorm)
        if verbose:
            print('Here is my stem:')
            print(self.stem)
        self.char_lstm = nn.LSTM(input_size=28,
                                 hidden_size=98,
                                 bidirectional=True,
                                 batch_first=True)
        encoder_layer = nn.TransformerEncoderLayer(d_model=28, nhead=7)
        self.char_transformer = nn.TransformerEncoder(
            encoder_layer=encoder_layer, num_layers=3)
        self.char_linear = nn.Linear(28, 196)
        num_answers = len(vocab['answer_idx_to_token'])
        module_H, module_W = feature_dim[1], feature_dim[2]
        self.classifier = build_classifier(module_dim + text_dim,
                                           module_H,
                                           module_W,
                                           num_answers,
                                           classifier_fc_layers,
                                           classifier_proj_dim,
                                           classifier_downsample,
                                           with_batchnorm=classifier_batchnorm,
                                           dropout=classifier_dropout)
        if verbose:
            print('Here is my classifier:')
            print(self.classifier)
        self.stem_times = []
        self.module_times = []
        self.classifier_times = []
        self.timing = False

        self.function_modules = {}
        self.function_modules_num_inputs = {}
        self.vocab = vocab
        self.module_list = []
        for idx, fn_str in enumerate(vocab['program_token_to_idx']):
            num_inputs = iep.programs.get_num_inputs(fn_str)
            self.function_modules_num_inputs[fn_str] = num_inputs
            if fn_str == 'scene' or num_inputs == 1:
                mod = ResidualBlock(module_dim + text_dim,
                                    with_residual=module_residual,
                                    with_batchnorm=module_batchnorm)
            elif num_inputs == 2:
                mod = ConcatBlock(module_dim + text_dim,
                                  with_residual=module_residual,
                                  with_batchnorm=module_batchnorm)
            self.add_module(fn_str, mod)
            self.module_list.append(mod)
            self.function_modules[fn_str] = idx
        self.module_list = nn.ModuleList(self.module_list)
        self.save_module_outputs = False
Exemple #24
0
 def __init__(self):
     layer = nn.TransformerEncoderLayer(128, 8)
     self.encoder = nn.TransformerEncoder(layer, 8)
     self.linear = nn.Linear(128, 2048)
Exemple #25
0
    def __init__(self,
                 char_embedding_dim: int,
                 out_dim: int,
                 image_feature_dim: int = 512,
                 nheaders: int = 8,
                 nlayers: int = 6,
                 feedforward_dim: int = 2048,
                 dropout: float = 0.1,
                 max_len: int = 100,
                 image_encoder: str = 'resnet50',
                 roi_pooling_mode: str = 'roi_align',
                 roi_pooling_size: Tuple[int, int] = (7, 7)):
        '''
        convert image segments and text segments to node embedding.
        :param char_embedding_dim:
        :param out_dim:
        :param image_feature_dim:
        :param nheaders:
        :param nlayers:
        :param feedforward_dim:
        :param dropout:
        :param max_len:
        :param image_encoder:
        :param roi_pooling_mode:
        :param roi_pooling_size:
        '''
        super().__init__()

        self.dropout = dropout
        assert roi_pooling_mode in [
            'roi_align', 'roi_pool'
        ], 'roi pooling model: {} not support.'.format(roi_pooling_mode)
        self.roi_pooling_mode = roi_pooling_mode
        assert roi_pooling_size and len(
            roi_pooling_size) == 2, 'roi_pooling_size not be set properly.'
        self.roi_pooling_size = tuple(roi_pooling_size)  # (h, w)

        transformer_encoder_layer = nn.TransformerEncoderLayer(
            d_model=char_embedding_dim,
            nhead=nheaders,
            dim_feedforward=feedforward_dim,
            dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(
            transformer_encoder_layer, num_layers=nlayers)

        if image_encoder == 'resnet18':
            self.cnn = resnet.resnet18(output_channels=out_dim)
        elif image_encoder == 'resnet34':
            self.cnn = resnet.resnet34(output_channels=out_dim)
        elif image_encoder == 'resnet50':
            self.cnn = resnet.resnet50(output_channels=out_dim)
        elif image_encoder == 'resnet101':
            self.cnn = resnet.resnet101(output_channels=out_dim)
        elif image_encoder == 'resnet152':
            self.cnn = resnet.resnet152(output_channels=out_dim)
        else:
            raise NotImplementedError()

        self.conv = nn.Conv2d(image_feature_dim, out_dim,
                              self.roi_pooling_size)
        self.bn = nn.BatchNorm2d(out_dim)

        self.projection = nn.Linear(2 * out_dim, out_dim)
        self.norm = nn.LayerNorm(out_dim)

        # Compute the positional encodings once in log space.
        position_embedding = torch.zeros(max_len, char_embedding_dim)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(
            torch.arange(0, char_embedding_dim, 2).float() *
            -(math.log(10000.0) / char_embedding_dim))
        position_embedding[:, 0::2] = torch.sin(position * div_term)
        position_embedding[:, 1::2] = torch.cos(position * div_term)
        position_embedding = position_embedding.unsqueeze(0).unsqueeze(
            0)  # 1, 1, max_len, char_embedding_dim
        self.register_buffer('position_embedding', position_embedding)

        self.pe_droput = nn.Dropout(self.dropout)
 def __init__(self, input_dim=40, hidden_dim=128, output_dim=2, num_layers=2):
     super(LSTMTrans1_deep, self).__init__()
     self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, dropout=0.2, batch_first=True)
     encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=2)
     self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=4)
     self.hidden2out = nn.Linear(hidden_dim, output_dim)
 def __init__(self, hidden_size=20, num_heads=16):
     super(TransformerModel, self).__init__()
     self.encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size,
                                                     nhead=num_heads)
     self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer,
                                                      num_layers=4)
Exemple #28
0
    def __init__(self, hps, obs_config):
        super(TransformerPolicy7, self).__init__()
        assert obs_config.drones > 0 or obs_config.minerals > 0,\
            'Must have at least one mineral or drones observation'
        assert obs_config.drones >= obs_config.allies
        assert not hps.use_privileged or (
            hps.nmineral > 0 and hps.nally > 0 and
            (hps.nenemy > 0 or hps.ally_enemy_same))

        assert hps.nally == obs_config.allies
        assert hps.nenemy == obs_config.drones - obs_config.allies
        assert hps.nmineral == obs_config.minerals
        assert hps.ntile == obs_config.tiles

        self.version = 'transformer_v7'

        self.kwargs = dict(hps=hps, obs_config=obs_config)

        self.hps = hps
        self.obs_config = obs_config
        self.agents = hps.agents
        self.nally = hps.nally
        self.nenemy = hps.nenemy
        self.nmineral = hps.nmineral
        self.nconstant = hps.nconstant
        self.ntile = hps.ntile
        self.nitem = hps.nally + hps.nenemy + hps.nmineral + hps.nconstant + hps.ntile
        self.fp16 = hps.fp16
        self.d_agent = hps.d_agent
        self.d_item = hps.d_item
        self.naction = hps.objective.naction() + obs_config.extra_actions()

        if hasattr(obs_config, 'global_drones'):
            self.global_drones = obs_config.global_drones
        else:
            self.global_drones = 0

        if hps.norm == 'none':
            norm_fn = lambda x: nn.Sequential()
        elif hps.norm == 'batchnorm':
            norm_fn = lambda n: nn.BatchNorm2d(n)
        elif hps.norm == 'layernorm':
            norm_fn = lambda n: nn.LayerNorm(n)
        else:
            raise Exception(f'Unexpected normalization layer {hps.norm}')

        endglobals = self.obs_config.endglobals()
        endallies = self.obs_config.endallies()
        endenemies = self.obs_config.endenemies()
        endmins = self.obs_config.endmins()
        endtiles = self.obs_config.endtiles()
        endallenemies = self.obs_config.endallenemies()

        self.agent_embedding = ItemBlock(
            obs_config.dstride() + obs_config.global_features(),
            hps.d_agent,
            hps.d_agent * hps.dff_ratio,
            norm_fn,
            True,
            mask_feature=7,  # Feature 7 is hitpoints
        )
        self.relpos_net = ItemBlock(3, hps.d_item // 2,
                                    hps.d_item // 2 * hps.dff_ratio, norm_fn,
                                    hps.item_ff)

        self.item_nets = nn.ModuleList()
        if hps.ally_enemy_same:
            self.item_nets.append(
                PosItemBlock(
                    obs_config.dstride(),
                    hps.d_item // 2,
                    hps.d_item // 2 * hps.dff_ratio,
                    norm_fn,
                    hps.item_ff,
                    mask_feature=7,  # Feature 7 is hitpoints
                    count=obs_config.drones,
                    start=endglobals,
                    end=endenemies,
                ))
        else:
            if self.nally > 0:
                self.item_nets.append(
                    PosItemBlock(
                        obs_config.dstride(),
                        hps.d_item // 2,
                        hps.d_item // 2 * hps.dff_ratio,
                        norm_fn,
                        hps.item_ff,
                        mask_feature=7,  # Feature 7 is hitpoints
                        count=obs_config.allies,
                        start=endglobals,
                        end=endallies,
                    ))
            if self.nenemy > 0:
                self.item_nets.append(
                    PosItemBlock(
                        obs_config.dstride(),
                        hps.d_item // 2,
                        hps.d_item // 2 * hps.dff_ratio,
                        norm_fn,
                        hps.item_ff,
                        mask_feature=7,  # Feature 7 is hitpoints
                        count=obs_config.drones - self.obs_config.allies,
                        start=endallies,
                        end=endenemies,
                        start_privileged=endtiles
                        if hps.use_privileged else None,
                        end_privileged=endallenemies
                        if hps.use_privileged else None,
                    ))
        if hps.nmineral > 0:
            self.item_nets.append(
                PosItemBlock(
                    obs_config.mstride(),
                    hps.d_item // 2,
                    hps.d_item // 2 * hps.dff_ratio,
                    norm_fn,
                    hps.item_ff,
                    mask_feature=2,  # Feature 2 is size
                    count=obs_config.minerals,
                    start=endenemies,
                    end=endmins,
                ))
        if hps.ntile > 0:
            self.item_nets.append(
                PosItemBlock(
                    obs_config.tstride(),
                    hps.d_item // 2,
                    hps.d_item // 2 * hps.dff_ratio,
                    norm_fn,
                    hps.item_ff,
                    mask_feature=2,  # Feature is elapsed since last visited time
                    count=obs_config.tiles,
                    start=endmins,
                    end=endtiles,
                ))
        if hps.nconstant > 0:
            self.constant_items = nn.Parameter(
                torch.normal(0, 1, (hps.nconstant, hps.d_item)))

        if hps.item_item_attn_layers > 0:
            encoder_layer = nn.TransformerEncoderLayer(d_model=hps.d_item,
                                                       nhead=8)
            self.item_item_attn = nn.TransformerEncoder(
                encoder_layer, num_layers=hps.item_item_attn_layers)
        else:
            self.item_item_attn = None

        self.multihead_attention = MultiheadAttention(
            embed_dim=hps.d_agent,
            kdim=hps.d_item,
            vdim=hps.d_item,
            num_heads=hps.nhead,
            dropout=hps.dropout,
        )
        self.linear1 = nn.Linear(hps.d_agent, hps.d_agent * hps.dff_ratio)
        self.linear2 = nn.Linear(hps.d_agent * hps.dff_ratio, hps.d_agent)
        self.norm1 = nn.LayerNorm(hps.d_agent)
        self.norm2 = nn.LayerNorm(hps.d_agent)

        self.map_channels = hps.d_agent // (hps.nm_nrings * hps.nm_nrays)
        map_item_channels = self.map_channels - 2 if self.hps.map_embed_offset else self.map_channels
        self.downscale = nn.Linear(hps.d_item, map_item_channels)
        self.norm_map = norm_fn(map_item_channels)
        self.conv1 = spatial.ZeroPaddedCylindricalConv2d(self.map_channels,
                                                         hps.dff_ratio *
                                                         self.map_channels,
                                                         kernel_size=3)
        self.conv2 = spatial.ZeroPaddedCylindricalConv2d(hps.dff_ratio *
                                                         self.map_channels,
                                                         self.map_channels,
                                                         kernel_size=3)
        self.norm_conv = norm_fn(self.map_channels)

        final_width = hps.d_agent
        if hps.nearby_map:
            final_width += hps.d_agent
        self.final_layer = nn.Sequential(
            nn.Linear(final_width, hps.d_agent * hps.dff_ratio),
            nn.ReLU(),
        )

        self.policy_head = nn.Linear(hps.d_agent * hps.dff_ratio, self.naction)
        if hps.small_init_pi:
            self.policy_head.weight.data *= 0.01
            self.policy_head.bias.data.fill_(0.0)

        if hps.use_privileged:
            self.value_head = nn.Linear(
                hps.d_agent * hps.dff_ratio + hps.d_item, 1)
        else:
            self.value_head = nn.Linear(hps.d_agent * hps.dff_ratio, 1)
        if hps.zero_init_vf:
            self.value_head.weight.data.fill_(0.0)
            self.value_head.bias.data.fill_(0.0)

        self.epsilon = 1e-4 if hps.fp16 else 1e-8
Exemple #29
0
    def __init__(self,
                 early_fusion,
                 d_model,
                 n_head,
                 dim_feedforward,
                 dropout,
                 num_layers,
                 layer_norm,
                 embed_dropout,
                 output_dim,
                 out_dropout,
                 multimodal_transformer=True):
        super(Transformer, self).__init__()
        self.multimodal_transformer = multimodal_transformer
        if self.multimodal_transformer:
            self.d_mult = d_model
            self.mult = mult_model.MULTModel(
                orig_d_l=300,
                orig_d_a=74,
                orig_d_v=35,
                d_l=self.d_mult,  # different from MulT 
                d_a=self.d_mult,  # different from MulT
                d_v=self.d_mult,  # different from MulT
                vonly=True,
                aonly=True,
                lonly=True,
                num_heads=n_head,
                layers=num_layers,
                attn_dropout=0.1,
                attn_dropout_a=0.0,
                attn_dropout_v=0.0,
                relu_dropout=0.1,
                res_dropout=0.1,
                out_dropout=out_dropout,
                embed_dropout=embed_dropout,
                attn_mask=True)
            self.t_in_dim = self.v_in_dim = self.a_in_dim = self.d_mult * 2
            combined_dim = 6 * d_model

        else:
            self.early_fusion = early_fusion
            'Only late fusion implemented; early fusion will be implemented later'
            # Late fusion
            if not self.early_fusion:
                self.orig_d_t = 300
                self.orig_d_a = 74
                self.orig_d_v = 35
                self.d_t = self.d_a = self.d_v = d_model
                # Temporal convolutional layers
                self.proj_t = nn.Conv1d(self.orig_d_t,
                                        self.d_t,
                                        kernel_size=1,
                                        padding=0,
                                        bias=False)
                self.proj_a = nn.Conv1d(self.orig_d_a,
                                        self.d_a,
                                        kernel_size=1,
                                        padding=0,
                                        bias=False)
                self.proj_v = nn.Conv1d(self.orig_d_v,
                                        self.d_v,
                                        kernel_size=1,
                                        padding=0,
                                        bias=False)

                # Transformer Layers
                self.encoder_layer_t = nn.TransformerEncoderLayer(
                    d_model, nhead=n_head, dim_feedforward=dim_feedforward)
                self.encoder_layer_a = nn.TransformerEncoderLayer(
                    d_model, nhead=n_head, dim_feedforward=dim_feedforward)
                self.encoder_layer_v = nn.TransformerEncoderLayer(
                    d_model, nhead=n_head, dim_feedforward=dim_feedforward)

                'Remember to implement layer norm option here'
                if layer_norm:
                    print(
                        "layer norm not implemented yet for vanilla transformer"
                    )
                    assert False
                else:
                    self.transformer_encoder_t = nn.TransformerEncoder(
                        self.encoder_layer_t, num_layers=num_layers)
                    self.transformer_encoder_a = nn.TransformerEncoder(
                        self.encoder_layer_a, num_layers=num_layers)
                    self.transformer_encoder_v = nn.TransformerEncoder(
                        self.encoder_layer_v, num_layers=num_layers)

                self.embed_dropout = embed_dropout

                'Change here for other dataset since number of modalities might be different'
                combined_dim = 3 * d_model

        self.proj1 = nn.Linear(combined_dim, combined_dim)
        self.proj2 = nn.Linear(combined_dim, combined_dim)
        self.out_layer = nn.Linear(combined_dim, output_dim)
        self.out_dropout = out_dropout
Exemple #30
0
    def __init__(self, config):
        super(MISA, self).__init__()

        self.config = config
        self.text_size = config.embedding_size
        self.visual_size = config.visual_size
        self.acoustic_size = config.acoustic_size

        self.input_sizes = input_sizes = [
            self.text_size, self.visual_size, self.acoustic_size
        ]
        self.hidden_sizes = hidden_sizes = [
            int(self.text_size),
            int(self.visual_size),
            int(self.acoustic_size)
        ]
        self.output_size = output_size = config.num_classes
        self.dropout_rate = dropout_rate = config.dropout
        self.activation = self.config.activation()
        self.tanh = nn.Tanh()

        rnn = nn.LSTM if self.config.rnncell == "lstm" else nn.GRU
        # defining modules - two layer bidirectional LSTM with layer norm in between

        if self.config.use_bert:
            # Initializing a BERT bert-base-uncased style configuration
            bertconfig = BertConfig.from_pretrained('bert-base-uncased',
                                                    output_hidden_states=True)
            self.bertmodel = BertModel.from_pretrained('bert-base-uncased',
                                                       config=bertconfig)
        else:
            self.embed = nn.Embedding(len(config.word2id), input_sizes[0])
            self.trnn1 = rnn(input_sizes[0],
                             hidden_sizes[0],
                             bidirectional=True)
            self.trnn2 = rnn(2 * hidden_sizes[0],
                             hidden_sizes[0],
                             bidirectional=True)

        self.vrnn1 = rnn(input_sizes[1], hidden_sizes[1], bidirectional=True)
        self.vrnn2 = rnn(2 * hidden_sizes[1],
                         hidden_sizes[1],
                         bidirectional=True)

        self.arnn1 = rnn(input_sizes[2], hidden_sizes[2], bidirectional=True)
        self.arnn2 = rnn(2 * hidden_sizes[2],
                         hidden_sizes[2],
                         bidirectional=True)

        ##########################################
        # mapping modalities to same sized space
        ##########################################
        if self.config.use_bert:
            self.project_t = nn.Sequential()
            self.project_t.add_module(
                'project_t',
                nn.Linear(in_features=768, out_features=config.hidden_size))
            self.project_t.add_module('project_t_activation', self.activation)
            self.project_t.add_module('project_t_layer_norm',
                                      nn.LayerNorm(config.hidden_size))
        else:
            self.project_t = nn.Sequential()
            self.project_t.add_module(
                'project_t',
                nn.Linear(in_features=hidden_sizes[0] * 4,
                          out_features=config.hidden_size))
            self.project_t.add_module('project_t_activation', self.activation)
            self.project_t.add_module('project_t_layer_norm',
                                      nn.LayerNorm(config.hidden_size))

        self.project_v = nn.Sequential()
        self.project_v.add_module(
            'project_v',
            nn.Linear(in_features=hidden_sizes[1] * 4,
                      out_features=config.hidden_size))
        self.project_v.add_module('project_v_activation', self.activation)
        self.project_v.add_module('project_v_layer_norm',
                                  nn.LayerNorm(config.hidden_size))

        self.project_a = nn.Sequential()
        self.project_a.add_module(
            'project_a',
            nn.Linear(in_features=hidden_sizes[2] * 4,
                      out_features=config.hidden_size))
        self.project_a.add_module('project_a_activation', self.activation)
        self.project_a.add_module('project_a_layer_norm',
                                  nn.LayerNorm(config.hidden_size))

        ##########################################
        # private encoders
        ##########################################
        self.private_t = nn.Sequential()
        self.private_t.add_module(
            'private_t_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.private_t.add_module('private_t_activation_1', nn.Sigmoid())

        self.private_v = nn.Sequential()
        self.private_v.add_module(
            'private_v_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.private_v.add_module('private_v_activation_1', nn.Sigmoid())

        self.private_a = nn.Sequential()
        self.private_a.add_module(
            'private_a_3',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.private_a.add_module('private_a_activation_3', nn.Sigmoid())

        ##########################################
        # shared encoder
        ##########################################
        self.shared = nn.Sequential()
        self.shared.add_module(
            'shared_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.shared.add_module('shared_activation_1', nn.Sigmoid())

        ##########################################
        # reconstruct
        ##########################################
        self.recon_t = nn.Sequential()
        self.recon_t.add_module(
            'recon_t_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.recon_v = nn.Sequential()
        self.recon_v.add_module(
            'recon_v_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))
        self.recon_a = nn.Sequential()
        self.recon_a.add_module(
            'recon_a_1',
            nn.Linear(in_features=config.hidden_size,
                      out_features=config.hidden_size))

        ##########################################
        # shared space adversarial discriminator
        ##########################################
        if not self.config.use_cmd_sim:
            self.discriminator = nn.Sequential()
            self.discriminator.add_module(
                'discriminator_layer_1',
                nn.Linear(in_features=config.hidden_size,
                          out_features=config.hidden_size))
            self.discriminator.add_module('discriminator_layer_1_activation',
                                          self.activation)
            self.discriminator.add_module('discriminator_layer_1_dropout',
                                          nn.Dropout(dropout_rate))
            self.discriminator.add_module(
                'discriminator_layer_2',
                nn.Linear(in_features=config.hidden_size,
                          out_features=len(hidden_sizes)))

        ##########################################
        # shared-private collaborative discriminator
        ##########################################

        self.sp_discriminator = nn.Sequential()
        self.sp_discriminator.add_module(
            'sp_discriminator_layer_1',
            nn.Linear(in_features=config.hidden_size, out_features=4))

        self.fusion = nn.Sequential()
        self.fusion.add_module(
            'fusion_layer_1',
            nn.Linear(in_features=self.config.hidden_size * 6,
                      out_features=self.config.hidden_size * 3))
        self.fusion.add_module('fusion_layer_1_dropout',
                               nn.Dropout(dropout_rate))
        self.fusion.add_module('fusion_layer_1_activation', self.activation)
        self.fusion.add_module(
            'fusion_layer_3',
            nn.Linear(in_features=self.config.hidden_size * 3,
                      out_features=output_size))

        self.tlayer_norm = nn.LayerNorm((hidden_sizes[0] * 2, ))
        self.vlayer_norm = nn.LayerNorm((hidden_sizes[1] * 2, ))
        self.alayer_norm = nn.LayerNorm((hidden_sizes[2] * 2, ))

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.config.hidden_size, nhead=2)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer,
                                                         num_layers=1)