Example #1
0
    def __init__(self,
                 d_feat=6,
                 hidden_size=64,
                 num_layers=2,
                 dropout=0.0,
                 base_model="GRU"):
        super().__init__()

        if base_model == "GRU":
            self.rnn = nn.GRU(
                input_size=d_feat,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True,
                dropout=dropout,
            )
        elif base_model == "LSTM":
            self.rnn = nn.LSTM(
                input_size=d_feat,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True,
                dropout=dropout,
            )
        else:
            raise ValueError("unknown base model name `%s`" % base_model)

        self.hidden_size = hidden_size
        self.d_feat = d_feat
        self.transformation = nn.Linear(self.hidden_size, self.hidden_size)
        self.a = nn.Parameter(torch.randn(self.hidden_size * 2, 1))
        self.a.requires_grad = True
        self.fc = nn.Linear(self.hidden_size, self.hidden_size)
        self.fc_out = nn.Linear(hidden_size, 1)
        self.leaky_relu = nn.LeakyReLU()
        self.softmax = nn.Softmax(dim=1)
Example #2
0
    def __init__(self,
                 n_skill,
                 max_seq=100,
                 embed_dim=128,
                 num_heads=8,
                 dropout=0.2):
        super(SAKTModel, self).__init__()
        self.n_skill = n_skill
        self.embed_dim = embed_dim
        embed_dim = 32 * 6 + 256

        self.embedding = nn.Embedding(4, 32)
        self.user_answer_embedding = nn.Embedding(6, 32)
        self.prior_question_had_explanation_embedding = nn.Embedding(4, 32)
        self.e_embedding = nn.Embedding(n_skill + 1, 256)
        self.part_embedding = nn.Embedding(8, 32)
        self.elapsed_time_embedding = nn.Embedding(302, 32)
        self.duration_previous_content_embedding = nn.Embedding(302, 32)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim,
                                                   nhead=num_heads,
                                                   dropout=dropout)
        self.transformer_enc = nn.TransformerEncoder(
            encoder_layer=encoder_layer, num_layers=4)
        self.gru = nn.GRU(input_size=embed_dim, hidden_size=embed_dim)

        self.continuous_embedding = nn.Sequential(nn.BatchNorm1d(99),
                                                  nn.Linear(1, embed_dim // 2),
                                                  nn.LayerNorm(embed_dim // 2))
        self.cat_embedding = nn.Sequential(
            nn.Linear(embed_dim, embed_dim // 2), nn.LayerNorm(embed_dim // 2))

        self.layer_normal = nn.LayerNorm(embed_dim)

        self.ffn = FFN(embed_dim)
        self.dropout = nn.Dropout(dropout / 2)
        self.pred = nn.Linear(embed_dim // 4, 1)
    def __init__(self, input_size, hidden_size, output_size, num_layers=4, attn_dim=64, fc_dim=512, attention=False):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.num_layers = num_layers
        self.fc_dim = fc_dim

        self.attention = attention # LOL
        
        self.gru = nn.GRU(
                    input_size=self.input_size,
                    hidden_size=self.hidden_size,
                    num_layers=self.num_layers,
                    batch_first=True,
                    dropout=.5,
                    bidirectional=True)

        if attention:
            self.attn = Attention(self.input_size, self.hidden_size, attn_dim) # 64
        
        self.fc1 = nn.Linear(hidden_size*2, self.fc_dim)
        self.bn1 = nn.BatchNorm1d(self.fc_dim)
        self.prelu1 = nn.PReLU(self.fc_dim)
        self.dp1 = nn.Dropout(.5)
        
        self.fc2 = nn.Linear(self.fc_dim, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.prelu2 = nn.PReLU(64)
        
        self.fc3 = nn.Linear(64, self.output_size)
        self.bn3 = nn.BatchNorm1d(self.output_size)
        self.prelu3 = nn.PReLU(self.output_size)
        
        self.init_weights()
    def __init__(self,
                 vocab,
                 n_layers,
                 hidden_size,
                 batch_size,
                 embedding_dim=10):
        super(Gru, self).__init__()
        self.vocab_size = len(vocab.values())
        self.vocab = vocab
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.dropout = nn.Dropout(p=0.5)
        self.embedding_dim = embedding_dim

        # Recurrent layer

        padding_idx = self.vocab['<pad>']
        self.word_embedding = nn.Embedding(num_embeddings=self.vocab_size,
                                           embedding_dim=self.embedding_dim,
                                           padding_idx=padding_idx)

        self.linear1 = nn.Linear(in_features=self.embedding_dim,
                                 out_features=30)
        self.linear2 = nn.Linear(in_features=30, out_features=40)

        self.gru = nn.GRU(input_size=40,
                          hidden_size=self.hidden_size,
                          num_layers=self.n_layers,
                          bidirectional=False,
                          dropout=0.5)

        # Output layer
        self.l_out = nn.Linear(in_features=self.hidden_size,
                               out_features=self.vocab_size,
                               bias=False)
Example #5
0
    def __init__(self,
                 input_size,
                 embedding_size,
                 hidden_size,
                 dropout=0.5,
                 n_layer=1,
                 pretrained=False):
        super(Utterance_encoder_ggcn, self).__init__()

        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.n_layer = n_layer

        self.embed = nn.Embedding(input_size, self.embedding_size)
        self.gru = nn.GRU(self.embedding_size,
                          self.hidden_size,
                          num_layers=n_layer,
                          dropout=dropout,
                          bidirectional=True)
        # self.hidden_proj = nn.Linear(n_layer * 2 * self.hidden_size, hidden_size)
        # self.bn = nn.BatchNorm1d(num_features=hidden_size)

        self.init_weight()
Example #6
0
  def __init__(self, input_features, rnn_features, num_layers=1, drop=0.0,
               rnn_type='LSTM', rnn_bidirectional=False):
    super(MaxoutRNN, self).__init__()
    self.bidirectional = rnn_bidirectional

    if rnn_type == 'LSTM':
      self.rnn = nn.LSTM(input_size=input_features,
              hidden_size=rnn_features, dropout=drop,
              num_layers=num_layers, batch_first=True,
              bidirectional=rnn_bidirectional)
    elif rnn_type == 'GRU':
      self.rnn = nn.GRU(input_size=input_features,
              hidden_size=rnn_features, dropout=drop,
              num_layers=num_layers, batch_first=True,
              bidirectional=rnn_bidirectional)
    else:
      raise ValueError('Unsupported RNN type')

    self.features = rnn_features

    self._init_rnn(self.rnn.weight_ih_l0)
    self._init_rnn(self.rnn.weight_hh_l0)
    self.rnn.bias_ih_l0.data.zero_()
    self.rnn.bias_hh_l0.data.zero_()
Example #7
0
    def __init__(self, input_size, hidden_size, dropout=0.25):
        super(CNNRNNBaseline, self).__init__()
        self.rnn = nn.GRU(512,
                          hidden_size,
                          num_layers=3,
                          batch_first=True,
                          dropout=dropout)
        self.linear1 = nn.Linear(hidden_size, input_size)
        self.linear2 = nn.Linear(hidden_size, input_size)

        self.drop1 = nn.Dropout(dropout)
        self.drop2 = nn.Dropout(dropout)
        # self.linear_mask1 = nn.Linear(input_size, input_size)
        # self.linear_mask2 = nn.Linear(input_size, input_size)

        # conv
        self.pool1 = nn.MaxPool2d(kernel_size=(2, 2))

        self.conv1 = nn.Conv2d(in_channels=1,
                               out_channels=256,
                               kernel_size=(3, 3))
        self.conv2 = nn.ConvTranspose2d(in_channels=256,
                                        out_channels=1,
                                        kernel_size=(3, 3))
Example #8
0
    def __init__(self,
                 embedding_size,
                 hidden_size,
                 tgt_vocab_size,
                 embedding=None,
                 num_layers=4,
                 dropout=0.5):
        super(GruDecoder, self).__init__()

        # Keep for reference
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.tgt_vocab_size = tgt_vocab_size

        if embedding is not None:
            self.embedding = embedding
        else:
            self.embedding = nn.Embedding(tgt_vocab_size,
                                          embedding_size,
                                          padding_idx=PAD)
        self.embedding_dropout = nn.Dropout(dropout)

        self.attn = BilinearAttention(query_size=hidden_size,
                                      key_size=2 * hidden_size,
                                      hidden_size=hidden_size,
                                      dropout=dropout,
                                      coverage=False)

        self.gru = nn.GRU(2 * hidden_size + embedding_size,
                          hidden_size,
                          bidirectional=False,
                          num_layers=num_layers,
                          dropout=dropout)

        self.readout = nn.Linear(
            embedding_size + hidden_size + 2 * hidden_size, hidden_size)
Example #9
0
 def __init__(self,
              input_dim: int,
              hidden_dim: int,
              num_layers: int,
              dropout: float,
              batch_size: int,
              use_gpu: bool,
              no_dropout=False):
     super().__init__()
     self.input_dim = input_dim
     self.hidden_dim = hidden_dim
     self.num_layers = num_layers
     self.dropout = dropout
     self.batch_size = batch_size
     self.use_gpu = use_gpu
     self.model = nn.GRU(input_size=self.input_dim,
                         hidden_size=self.hidden_dim,
                         num_layers=self.num_layers,
                         batch_first=True,
                         bidirectional=False,
                         dropout=self.dropout if not no_dropout else 0)
     if self.use_gpu:
         self.model.cuda()
     self.init_hidden()
Example #10
0
    def __init__(self,
                 loss_func=None,
                 teacher_forcing_ratio=0.5,
                 num_feats=3,
                 dropout=0.2):
        """
        Args:
            lost_func -- pytorch loss function. Note only loss functions where
            lower is better are supported currently. (default L1Loss aka MAE)
            teacher_forcing_ratio -- float between 0 and 1. Percentage of time 
            to force the model to train on target data (rather than its own 
            recursive output
            embedding_in -- int number of indices the embedding maps from
            (default 145603 - number of series)
            embedding_out -- int number of dimensions embedding maps to (default
            20)
            num_feats -- int number of self made features (ie age, day of
            week, week of year) (default 3)
        """
        super().__init__()

        self.hidden_units = 128
        self.n_layers = 2

        self.num_feats = num_feats

        self.rnn = nn.GRU(input_size=1 + num_feats,
                          hidden_size=self.hidden_units,
                          num_layers=self.n_layers,
                          batch_first=True,
                          dropout=dropout).cuda()

        self.out = nn.Linear(self.hidden_units, 1).cuda()

        self.loss_func = nn.L1Loss() if loss_func is None else loss_func
        self.teacher_forcing_ratio = teacher_forcing_ratio
Example #11
0
    def __init__(
        self,
        numerical_input_dim,
        cat_vocab_sizes,
        cat_embedding_dim,
        embedding_dim,
    ):
        # only 1 categorical feature for now
        super(Encoder, self).__init__()
        self.numerical_input_dim = numerical_input_dim
        self.embedding_dim = embedding_dim
        self.cat_vocab_sizes = cat_vocab_sizes
        # TODO: experiment with out dim
        self.cat_embedding_dim = cat_embedding_dim

        self.num_event_encoder = nn.BatchNorm1d(numerical_input_dim)

        self.cat_encoder = nn.Embedding(cat_vocab_sizes[0],
                                        self.cat_embedding_dim)

        self.sequence_encoder = nn.GRU(numerical_input_dim +
                                       self.cat_embedding_dim,
                                       embedding_dim,
                                       batch_first=False)
Example #12
0
    def __init__(self, in_dim, K=16, projections=[128, 128]):
        super(CBHG, self).__init__()
        self.in_dim = in_dim
        self.relu = nn.ReLU()
        self.conv1d_banks = nn.ModuleList([
            BatchNormConv1d(in_dim,
                            in_dim,
                            kernel_size=k,
                            stride=1,
                            padding=k // 2,
                            activation=self.relu) for k in range(1, K + 1)
        ])
        self.max_pool1d = nn.MaxPool1d(kernel_size=2, stride=1, padding=1)

        in_sizes = [K * in_dim] + projections[:-1]
        activations = [self.relu] * (len(projections) - 1) + [None]
        self.conv1d_projections = nn.ModuleList([
            BatchNormConv1d(in_size,
                            out_size,
                            kernel_size=3,
                            stride=1,
                            padding=1,
                            activation=ac)
            for (in_size, out_size,
                 ac) in zip(in_sizes, projections, activations)
        ])

        self.pre_highway = nn.Linear(projections[-1], in_dim, bias=False)
        self.highways = nn.ModuleList(
            [Highway(in_dim, in_dim) for _ in range(4)])

        self.gru = nn.GRU(in_dim,
                          in_dim,
                          1,
                          batch_first=True,
                          bidirectional=True)
Example #13
0
    def __init__(self, args, vocab_size, pretrain_embedding=None):
        super(Network, self).__init__()
        self.args = args
        self.logger = logging.getLogger("ntcir14")
        self.embed_size = args.embed_size   # 300 as default
        self.hidden_size = args.hidden_size # 150 as default
        self.vocab_size = vocab_size
        self.dropout_rate = args.dropout_rate
        self.encode_gru_num_layer = 1

        self.embedding = nn.Embedding(self.vocab_size, self.embed_size)

        if pretrain_embedding is not None:
            self.embedding.weight = nn.Parameter(torch.from_numpy(pretrain_embedding))
            self.logger.info('model load pretrained embedding successfully.')

        self.question_encode_gru = nn.GRU(self.hidden_size, self.hidden_size, bidirectional=True,
                                          batch_first=True, dropout=self.dropout_rate, num_layers=self.encode_gru_num_layer)
        self.passage_encode_gru = nn.GRU(self.hidden_size, self.hidden_size, bidirectional=True,
                                         batch_first=True, dropout=self.dropout_rate, num_layers=self.encode_gru_num_layer)
        self.early_match_gru = nn.GRU(self.hidden_size, self.hidden_size, bidirectional=True,
                                           batch_first=True, dropout=self.dropout_rate)
        self.late_match_gru = nn.GRU(self.hidden_size * 4, self.hidden_size, bidirectional=True,
                                      batch_first=True, dropout=self.dropout_rate)
        self.early_late_gru = nn.GRU(self.hidden_size * 2, self.hidden_size, bidirectional=True,
                                            batch_first=True, dropout=self.dropout_rate)
        self.question_passage_gru = nn.GRU(self.hidden_size * 4, self.hidden_size, bidirectional=True,
                                           batch_first=True, dropout=self.dropout_rate)

        # self.dot_attention_question = DotAttention(self.hidden_size * 2, self.hidden_size, self.dropout_rate)

        self.dot_attention_late_passage = DotAttention(self.embed_size, self.hidden_size, self.dropout_rate)
        self.dot_attention_late_question = DotAttention(self.embed_size, self.hidden_size, self.dropout_rate)
        self.dot_attention_late_match = DotAttention(self.hidden_size * 2, self.hidden_size, self.dropout_rate)

        self.dot_attention_early_match = DotAttention(self.embed_size, self.hidden_size, self.dropout_rate)
        self.dot_attention_early_passage = DotAttention(self.hidden_size * 2, self.hidden_size, self.dropout_rate)

        self.summ_question = Summ(self.embed_size, self.hidden_size, self.dropout_rate)
        self.summ_passage = Summ(self.hidden_size * 2, self.hidden_size, self.dropout_rate)
        self.linear_score = nn.Linear(self.hidden_size * 2, 1)
    def __init__(self,
                 obs_space,
                 action_space,
                 image_dim=128,
                 memory_dim=128,
                 instr_dim=128,
                 use_instr=False,
                 lang_model="gru",
                 use_memory=False,
                 arch="cnn1",
                 aux_info=None,
                 vocabulary=None,
                 learner=False,
                 corrector=False,
                 corr_length=3,
                 corr_own_vocab=False,
                 corr_vocab_size=0,
                 pretrained_corrector=False,
                 use_critic=False,
                 dropout=0.5,
                 corrector_frozen=False,
                 random_corrector=False,
                 var_corr_len=False,
                 weigh_corrections=False,
                 return_internal_repr=False):
        super().__init__()

        self.vocab = vocabulary  # Vocabulary object, from obss_preprocessor

        # Decide which components are enabled
        self.use_instr = use_instr
        self.use_memory = use_memory
        self.arch = arch
        self.lang_model = lang_model
        self.aux_info = aux_info
        self.image_dim = image_dim
        self.memory_dim = memory_dim
        self.instr_dim = instr_dim

        self.use_learner = learner
        self.use_corrector = corrector
        self.corr_own_vocab = corr_own_vocab
        self.pretrained_corrector = pretrained_corrector

        self.obs_space = obs_space

        self.policy_input_size = 0
        self.use_critic = use_critic

        if self.use_learner:
            if arch == "cnn1":
                self.image_conv = nn.Sequential(
                    nn.Conv2d(in_channels=3,
                              out_channels=16,
                              kernel_size=(2, 2)), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=(2, 2), stride=2),
                    nn.Conv2d(in_channels=16,
                              out_channels=32,
                              kernel_size=(2, 2)), nn.ReLU(),
                    nn.Conv2d(in_channels=32,
                              out_channels=image_dim,
                              kernel_size=(2, 2)), nn.ReLU())
            elif arch == "cnn2":
                self.image_conv = nn.Sequential(
                    nn.Conv2d(in_channels=3,
                              out_channels=16,
                              kernel_size=(3, 3)), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=(2, 2), stride=2, ceil_mode=True),
                    nn.Conv2d(in_channels=16,
                              out_channels=image_dim,
                              kernel_size=(3, 3)), nn.ReLU())
            elif arch == "filmcnn":
                if not self.use_instr:
                    raise ValueError(
                        "FiLM architecture can be used when instructions are enabled"
                    )

                self.image_conv_1 = nn.Sequential(
                    nn.Conv2d(in_channels=64,
                              out_channels=32,
                              kernel_size=(2, 2)), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=(2, 2), stride=2))
                self.image_conv_2 = nn.Sequential(
                    nn.Conv2d(in_channels=64,
                              out_channels=32,
                              kernel_size=(2, 2)), nn.ReLU(),
                    nn.Conv2d(in_channels=32,
                              out_channels=128,
                              kernel_size=(2, 2)), nn.ReLU())
            elif arch.startswith("expert_filmcnn"):
                if not self.use_instr:
                    raise ValueError(
                        "FiLM architecture can be used when instructions are enabled"
                    )

                self.image_conv = nn.Sequential(
                    nn.Conv2d(in_channels=3,
                              out_channels=128,
                              kernel_size=(2, 2),
                              padding=1), nn.BatchNorm2d(128), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=(2, 2), stride=2),
                    nn.Conv2d(in_channels=128,
                              out_channels=128,
                              kernel_size=(3, 3),
                              padding=1), nn.BatchNorm2d(128), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=(2, 2), stride=2))
                self.film_pool = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
            elif arch == 'embcnn1':
                self.image_conv = nn.Sequential(
                    ImageBOWEmbedding(obs_space["image"],
                                      embedding_dim=16,
                                      padding_idx=0,
                                      reduce_fn=torch.mean), nn.ReLU(),
                    nn.Conv2d(in_channels=16,
                              out_channels=32,
                              kernel_size=(3, 3)), nn.ReLU(),
                    nn.Conv2d(in_channels=32,
                              out_channels=32,
                              kernel_size=(3, 3)), nn.ReLU(),
                    nn.Conv2d(in_channels=32,
                              out_channels=image_dim,
                              kernel_size=(3, 3)), nn.ReLU())
            else:
                raise ValueError(
                    "Incorrect architecture name: {}".format(arch))

            # Define instruction embedding
            if self.use_instr:
                if self.lang_model in ['gru', 'conv', 'bigru', 'attgru']:
                    self.word_embedding = nn.Embedding(obs_space["instr"],
                                                       self.instr_dim)
                    if self.lang_model in ['gru', 'bigru', 'attgru']:
                        gru_dim = self.instr_dim
                        if self.lang_model in ['bigru', 'attgru']:
                            gru_dim //= 2
                        self.instr_rnn = nn.GRU(
                            self.instr_dim,
                            gru_dim,
                            batch_first=True,
                            bidirectional=(self.lang_model
                                           in ['bigru', 'attgru']))
                        self.final_instr_dim = self.instr_dim
                    else:
                        kernel_dim = 64
                        kernel_sizes = [3, 4]
                        self.instr_convs = nn.ModuleList([
                            nn.Conv2d(1, kernel_dim, (K, self.instr_dim))
                            for K in kernel_sizes
                        ])
                        self.final_instr_dim = kernel_dim * len(kernel_sizes)

                elif self.lang_model == 'bow':
                    hidden_units = [
                        obs_space["instr"], self.instr_dim, self.instr_dim
                    ]
                    layers = []
                    for n_in, n_out in zip(hidden_units, hidden_units[1:]):
                        layers.append(nn.Linear(n_in, n_out))
                        layers.append(nn.ReLU())
                    self.instr_bow = nn.Sequential(*layers)
                    self.final_instr_dim = instr_dim

                if self.lang_model == 'attgru':
                    self.memory2key = nn.Linear(self.memory_size,
                                                self.final_instr_dim)

            # Define memory
            if self.use_memory:
                self.memory_rnn = nn.LSTMCell(self.image_dim, self.memory_dim)
                self.policy_input_size += self.memory_dim

            # Resize image embedding
            self.embedding_size = self.semi_memory_size
            if self.use_instr and arch != "filmcnn" and not arch.startswith(
                    "expert_filmcnn"):
                self.embedding_size += self.final_instr_dim

            if arch == "filmcnn":
                self.controller_1 = AgentControllerFiLM(
                    in_features=self.final_instr_dim,
                    out_features=64,
                    in_channels=3,
                    imm_channels=16)
                self.controller_2 = AgentControllerFiLM(
                    in_features=self.final_instr_dim,
                    out_features=64,
                    in_channels=32,
                    imm_channels=32)

            if arch.startswith("expert_filmcnn"):
                if arch == "expert_filmcnn":
                    num_module = 2
                else:
                    num_module = int(arch[(arch.rfind('_') + 1):])
                self.controllers = []
                for ni in range(num_module):
                    if ni < num_module - 1:
                        mod = ExpertControllerFiLM(
                            in_features=self.final_instr_dim,
                            out_features=128,
                            in_channels=128,
                            imm_channels=128)
                    else:
                        mod = ExpertControllerFiLM(
                            in_features=self.final_instr_dim,
                            out_features=self.image_dim,
                            in_channels=128,
                            imm_channels=128)
                    self.controllers.append(mod)
                    self.add_module('FiLM_Controler_' + str(ni), mod)

        # Initialize parameters correctly.
        # Put this here, because otherwise pretrained corrector's linear weights are overwritten
        self.apply(initialize_parameters)

        if self.use_corrector:
            self.corr_vocab_size = corr_vocab_size
            self.corr_length = corr_length
            self.var_corr_len = var_corr_len

            if not self.pretrained_corrector:
                if self.corr_own_vocab:
                    num_corr_embeddings = corr_vocab_size
                    vocabulary_corr = None
                else:
                    num_corr_embeddings = self.obs_space['instr']
                    vocabulary_corr = self.vocab

                self.corrector = Corrector(image_dim=self.image_dim,
                                           memory_dim=self.memory_dim,
                                           instr_dim=self.instr_dim,
                                           num_embeddings=num_corr_embeddings,
                                           num_rnn_layers=1,
                                           vocabulary=vocabulary_corr,
                                           corr_length=self.corr_length,
                                           obs_space=self.obs_space,
                                           var_len=self.var_corr_len)

            else:
                self.load_pretrained_corrector(
                    self.pretrained_corrector,
                    corrector_frozen=corrector_frozen)
                corr_vocab_size = self.corrector.word_embedding_corrector.num_embeddings

            if self.corr_own_vocab:
                if self.var_corr_len:
                    num_corr_embeddings = corr_vocab_size + 1
                else:
                    num_corr_embeddings = corr_vocab_size

                self.word_embedding_corrections = nn.Embedding(
                    num_corr_embeddings, self.instr_dim)
                corr_rnn_hidden = 512  # currently constant
                self.corr_rnn = nn.GRU(input_size=self.instr_dim,
                                       hidden_size=corr_rnn_hidden,
                                       batch_first=True)
            else:
                if self.use_learner:
                    self.word_embedding_corrections = self.word_embedding
                    self.corr_rnn = self.instr_rnn
                else:
                    self.word_embedding_corrections = self.corrector.instr_embedding
                    self.corr_rnn = self.corrector.instr_rnn

            self.policy_input_size += self.corr_rnn.hidden_size

            self.corr_dropout = nn.Dropout(p=dropout)

            if random_corrector:
                self.corrector.randomize()

            self.weigh_corrections = weigh_corrections
            if self.weigh_corrections:
                # parameter to determine weight of corrections from previous entropy
                self.entropy_weight = nn.Linear(
                    1, 1)  #nn.Parameter(torch.randn(1))

        if self.use_critic:
            # Define critic's model (used in PPO, not in IL)
            self.critic = nn.Sequential(nn.Linear(self.policy_input_size, 64),
                                        nn.Tanh(), nn.Linear(64, 1))

        # Define actor's model
        self.actor = nn.Sequential(nn.Linear(self.policy_input_size, 64),
                                   nn.Tanh(), nn.Linear(64, action_space.n))

        # Define head for extra info
        if self.aux_info:
            self.extra_heads = None
            self.add_heads()

        self.return_internal_repr = return_internal_repr
Example #15
0
 def __init__(self, input_dim, embed_dim, dropout):
     super(GRUBlock, self).__init__()
     self.gru = nn.GRU(input_size=input_dim, hidden_size=embed_dim)
     self.layer_norm = nn.LayerNorm(embed_dim)
     self.dropout = nn.Dropout(dropout)
Example #16
0
 def __init__(self, vocab_size, hidden_size):
     super(QuestionModule, self).__init__()
     self.vocab_size = vocab_size  # Size of the vocabulary used in word embedding
     self.hidden_size = hidden_size  # Size of the hidden state of GRU
     self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
Example #17
0
    def __init__(self,
                 hidden_size,
                 K=16,
                 projection_size=128,
                 num_gru_layers=2,
                 max_pool_kernel_size=2,
                 is_post=False):
        """
        :param hidden_size: dimension of hidden unit
        :param K: # of convolution banks
        :param projection_size: dimension of projection unit
        :param num_gru_layers: # of layers of GRUcell
        :param max_pool_kernel_size: max pooling kernel size
        :param is_post: whether post processing or not
        """
        super(CBHG, self).__init__()
        self.hidden_size = hidden_size
        self.num_gru_layers = num_gru_layers
        self.projection_size = projection_size
        self.convbank_list = nn.ModuleList()
        self.convbank_list.append(
            nn.Conv1d(in_channels=projection_size,
                      out_channels=hidden_size,
                      kernel_size=1,
                      padding=int(np.floor(1 / 2))))

        for i in range(2, K + 1):
            self.convbank_list.append(
                nn.Conv1d(in_channels=hidden_size,
                          out_channels=hidden_size,
                          kernel_size=i,
                          padding=int(np.floor(i / 2))))

        self.batchnorm_list = nn.ModuleList()
        for i in range(1, K + 1):
            self.batchnorm_list.append(nn.BatchNorm1d(hidden_size))

        convbank_outdim = hidden_size * K
        if is_post:
            self.conv_projection_1 = nn.Conv1d(in_channels=convbank_outdim,
                                               out_channels=hidden_size * 2,
                                               kernel_size=3,
                                               padding=int(np.floor(3 / 2)))
            self.conv_projection_2 = nn.Conv1d(in_channels=hidden_size * 2,
                                               out_channels=projection_size,
                                               kernel_size=3,
                                               padding=int(np.floor(3 / 2)))
            self.batchnorm_proj_1 = nn.BatchNorm1d(hidden_size * 2)

        else:
            self.conv_projection_1 = nn.Conv1d(in_channels=convbank_outdim,
                                               out_channels=hidden_size,
                                               kernel_size=3,
                                               padding=int(np.floor(3 / 2)))
            self.conv_projection_2 = nn.Conv1d(in_channels=hidden_size,
                                               out_channels=projection_size,
                                               kernel_size=3,
                                               padding=int(np.floor(3 / 2)))
            self.batchnorm_proj_1 = nn.BatchNorm1d(hidden_size)

        self.batchnorm_proj_2 = nn.BatchNorm1d(projection_size)

        self.max_pool = nn.MaxPool1d(max_pool_kernel_size, stride=1, padding=1)
        self.highway = Highwaynet(self.projection_size)
        self.gru = nn.GRU(self.projection_size,
                          self.hidden_size,
                          num_layers=2,
                          batch_first=True,
                          bidirectional=True)
Example #18
0
 def __init__(self, config, num_rnn = 1):
     super().__init__()
     self.hidden_size = config.hidden_size
     self.num_layers = num_rnn
     self.biGRU = nn.GRU(config.hidden_size, config.hidden_size, self.num_layers, batch_first = True, bidirectional = True)
Example #19
0
 def init_mesh_module(self):
     self.mesh_h0    = autograd.Variable(torch.randn(1, 1, self.embedding_dim))
     self.mesh_gru   = nn.GRU(self.embedding_dim, self.embedding_dim)
     if(use_cuda):
         self.mesh_h0    = self.mesh_h0.cuda()
         self.mesh_gru   = self.mesh_gru.cuda()
Example #20
0
File: model.py Project: fzwqq/JRTS
    def __init__(
        self,
        map_size,
        input_channel=50,
        # unit_feature_size=23+,
        recurrent=False,
        hidden_size=256,
    ):
        """[summary]
        
        Arguments:
            map_size {tuple} -- (map_height, map_width)
        
        Keyword Arguments:
            input_channel {int} -- [description] (default: {21})
            unit_feature_size {int} -- [description] (default: {18})
        """
        super(ActorCritic, self).__init__()

        self.recurrent = recurrent
        map_height, map_width = map_size
        unit_feature_size = 23 + map_height + map_width  #+ map_height * map_width
        self.shared_out_size = 256
        self.conv_out_size = 16
        self.shared_to_actor_size = 128
        self.hsz = hidden_size
        self.conv_flatten_size = self.conv_out_size * ((map_height) *
                                                       (map_width))

        self.activated_agents = [
            UNIT_TYPE_NAME_BASE,
            UNIT_TYPE_NAME_BARRACKS,
            UNIT_TYPE_NAME_WORKER,
            UNIT_TYPE_NAME_HEAVY,
            UNIT_TYPE_NAME_LIGHT,
            # UNIT_TYPE_NAME_RANGED,
        ]

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))
        self.critic_conv = NNBase(map_size, input_channel, hidden_size,
                                  self.conv_out_size)

        # self.actor_conv  = nn.ModuleDict({
        #     UNIT_TYPE_NAME_WORKER: NNBase(map_size,input_channel,hidden_size,self.conv_out_size),
        #     UNIT_TYPE_NAME_BASE: NNBase(map_size,input_channel,hidden_size,self.conv_out_size),
        #     UNIT_TYPE_NAME_LIGHT:NNBase(map_size,input_channel,hidden_size,self.conv_out_size),
        #     UNIT_TYPE_NAME_BARRACKS: NNBase(map_size,input_channel,hidden_size,self.conv_out_size),
        #     UNIT_TYPE_NAME_HEAVY: NNBase(map_size,input_channel,hidden_size,self.conv_out_size)
        # })
        self.actor_conv = NNBase(map_size, input_channel, hidden_size,
                                 self.conv_out_size)

        # self.shared_conv = nn.Sequential(
        #     init_(nn.Conv2d(in_channels=input_channel, out_channels=64, kernel_size=1)), #nn.ReLU(),
        #     # nn.BatchNorm2d(64), nn.ReLU(),
        #     init_(nn.Conv2d(64, 32, 1)), #nn.ReLU(),
        #     # nn.BatchNorm2d(32), nn.ReLU(),
        #     init_(nn.Conv2d(32, self.conv_out_size, 1)),# nn.ReLU(),
        #     # nn.BatchNorm2d(self.conv_out_size), nn.ReLU(),

        #     # nn.BatchNorm2d(16,affine=False), nn.ReLU(),
        #     # init_(nn.Conv2d(32, 16, 1)), nn.ReLU(),
        #     # init_(nn.Conv2d(64, 32, 2)), nn.ReLU(),
        #     # nn.Conv2d(64, 32, 2), nn.ReLU(),
        #     nn.AdaptiveMaxPool2d((map_height, map_width)),  # n * 64 * map_height * map_width
        #     Flatten(),
        #     # nn.LayerNorm(self.conv_flatten_size)
        # )

        # self.self_attn = nn.Sequential(
        #     Pic2Vector(),
        #     # nn.TransformerEncoderLayer(d_model=16, nhead=4, dim_feedforward=64, dropout=0, activation="relu"),
        # )
        # self.self_attn = nn.MultiheadAttention(embed_dim=16,num_heads=8)
        self.p2v = Pic2Vector(channel_size=self.conv_out_size)
        self.self_attn = MultiHeadAttention(n_head=2,
                                            d_model=16,
                                            d_k=16,
                                            d_v=16,
                                            dropout=0)
        # self.self_attn.share_memory()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        # self.shared_linear = nn.Sequential(
        #     Flatten(),
        #     init_(nn.Linear(16 * (map_height) * (map_width), hidden_size)), nn.ReLU(),
        #     # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(),
        #     # init_(nn.Linear(256, 256)), nn.ReLU(),
        #     # init_(nn.Linear(hidden_size, hidden_size)),# nn.ReLU()
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     # init_(nn.Linear(128, 128)), nn.ReLU(),
        #     # init_(nn.Linear(128, self.shared_out_size)), nn.ReLU(),
        # )

        # self.critic  = nn.ModuleDict({
        #     UNIT_TYPE_NAME_WORKER: nn.Sequential(
        #     init_(nn.Linear(self.conv_flatten_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, 1)),
        # ),
        #     UNIT_TYPE_NAME_BASE: nn.Sequential(
        #     init_(nn.Linear(self.conv_flatten_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, 1)),
        # ),
        #     UNIT_TYPE_NAME_LIGHT:nn.Sequential(
        #     init_(nn.Linear(self.conv_flatten_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
        #     init_(nn.Linear(hidden_size, 1)),,
        #     # UNIT_TYPE_NAME_BARRACKS: ,
        #     # UNIT_TYPE_NAME_HEAVY:
        # })
        self.critic_mlps = nn.Sequential(
            init_(nn.Linear(self.conv_flatten_size, hidden_size)),
            nn.ReLU(),
            # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(),
            # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
            # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(),
            # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)),
            nn.ReLU(),
            # nn.BatchNorm1d(hidden_size), nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)),
            nn.ReLU(),
            # nn.BatchNorm1d(hidden_size), nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)),
            nn.ReLU(),
            # init_(nn.Linear(64, 64)), nn.ReLU(),
            # init_(nn.Linear(128, 128)), nn.ReLU(),
            # init_(nn.Linear(256, 256)), nn.ReLU(),
            # init_(nn.Linear(256, 256)), nn.ReLU(),
        )
        self.critic_out = init_(nn.Linear(hidden_size, 1))

        #-------------------------------------------#
        # self.shared_to_actor = nn.Sequential(init_(nn.Linear(hidden_size, self.shared_to_actor_size)), nn.ReLU())

        self.actor_mlps = nn.Sequential(
            # init_(nn.Linear(self.shared_out_size + unit_feature_size + encoded_utt_feature_size, hidden_size)), nn.ReLU(),
            init_(
                nn.Linear(self.conv_flatten_size + unit_feature_size,
                          hidden_size)),
            nn.ReLU(),
            # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(),
            # init_(nn.Linear(64, 64)), nn.ReLU(),
            # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
            # nn.BatchNorm1d(hidden_size,affine=False),nn.ReLU(),
            # nn.LayerNorm(normalized_shape=(64),elementwise_affine=False),
            # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
            # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
            # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
            # nn.LayerNorm(normalized_shape=(64),elementwise_affine=False),
            # init_(nn.Linear(hidden_size, hidden_size)),nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)),
            nn.ReLU(),
            # nn.BatchNorm1d(hidden_size), nn.ReLU(),
            init_(nn.Linear(hidden_size, hidden_size)),
            nn.ReLU()
            # nn.BatchNorm1d(hidden_size), nn.ReLU(),
            # nn.LayerNorm(normalized_shape=(64),elementwise_affine=False),
            # init_(nn.Linear(256, 256)), nn.ReLU(),
        )
        if recurrent:
            self.gru = nn.GRU(hidden_size, hidden_size)
            for name, param in self.gru.named_parameters():
                if 'bias' in name:
                    nn.init.constant_(param, 0)
                elif 'weight' in name:
                    nn.init.orthogonal_(param)
        # self.layer_norm = nn.LayerNorm(normalized_shape=(hidden_size),elementwise_affine=True)

        self.actor_out = nn.ModuleDict({
            UNIT_TYPE_NAME_WORKER:
            nn.Sequential(
                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                init_(
                    nn.Linear(self.conv_flatten_size + unit_feature_size,
                              hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),

                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),
                init_(
                    nn.Linear(hidden_size,
                              WorkerAction.__members__.items().__len__())),
                nn.Softmax(dim=1)),
            UNIT_TYPE_NAME_BASE:
            nn.Sequential(
                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                init_(
                    nn.Linear(self.conv_flatten_size + unit_feature_size,
                              hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),

                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),
                init_(
                    nn.Linear(hidden_size,
                              BaseAction.__members__.items().__len__())),
                nn.Softmax(dim=1),
            ),
            UNIT_TYPE_NAME_LIGHT:
            nn.Sequential(
                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                init_(
                    nn.Linear(self.conv_flatten_size + unit_feature_size,
                              hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),

                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),
                init_(
                    nn.Linear(hidden_size,
                              LightAction.__members__.items().__len__())),
                nn.Softmax(dim=1),
            ),
            UNIT_TYPE_NAME_BARRACKS:
            nn.Sequential(
                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                init_(
                    nn.Linear(self.conv_flatten_size + unit_feature_size,
                              hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),

                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),
                init_(
                    nn.Linear(hidden_size,
                              BarracksAction.__members__.items().__len__())),
                nn.Softmax(dim=1),
            ),
            UNIT_TYPE_NAME_HEAVY:
            nn.Sequential(
                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                init_(
                    nn.Linear(self.conv_flatten_size + unit_feature_size,
                              hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                init_(nn.Linear(hidden_size, hidden_size)),
                nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),

                # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(),
                # nn.BatchNorm1d(hidden_size), nn.ReLU(),
                init_(
                    nn.Linear(hidden_size,
                              HeavyAction.__members__.items().__len__())),
                nn.Softmax(dim=1),
            )
        })
Example #21
0
 def __init__(self, input_size, hidden_size, num_layers, opt, dictionary):
     super().__init__()
     self.dict = dictionary
     self.h2o = nn.Linear(hidden_size, len(dictionary))
     self.dropout = nn.Dropout(opt['dropout'])
     self.rnn = nn.GRU(input_size, hidden_size, num_layers)
Example #22
0
 def __init__(self, feature_num):
     super(Sequence, self).__init__()
     self.lstm = nn.LSTM(input_size=feature_num, hidden_size=256, num_layers=3)
     self.gru = nn.GRU(input_size=feature_num, hidden_size=256, num_layers=3)
     self.rnn = nn.RNN(input_size=feature_num, hidden_size=256, num_layers=3)
     self.linear = nn.Linear(256, 1)
Example #23
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 output_size,
                 layer_type='GRU',
                 n_layers=1,
                 is_bidirectional=False,
                 has_stack=False,
                 stack_width=None,
                 stack_depth=None,
                 ignore_idx=0,
                 use_cuda=None,
                 optimizer_instance=torch.optim.Adadelta,
                 lr=0.01):
        """
        Constructor for the StackAugmentedRNN object.

        Parameters
        ----------
        input_size: int
            number of characters in the alphabet

        hidden_size: int
            size of the RNN layer(s)

        output_size: int
            again number of characters in the alphabet

        layer_type: str (default 'GRU')
            type of the RNN layer to be used. Could be either 'LSTM' or 'GRU'.

        n_layers: int (default 1)
            number of RNN layers

        is_bidirectional: bool (default False)
            parameter specifying if RNN is bidirectional

        has_stack: bool (default False)
            parameter specifying if augmented memory stack is used

        stack_width: int (default None)
            if has_stack is True then this parameter defines width of the
            augmented stack memory

        stack_depth: int (default None)
            if has_stack is True then this parameter define depth of the augmented
            stack memory. Hint: no need fo stack depth to be larger than the
            length of the longest sequence you plan to generate

        use_cuda: bool (default None)
            parameter specifying if GPU is used for computations. If left
            unspecified, GPU will be used if available

        optimizer_instance: torch.optim object (default torch.optim.Adadelta)
            optimizer to be used for training

        lr: float (default 0.01)
            learning rate for the optimizer

        """
        super(StackAugmentedRNN, self).__init__()

        if layer_type not in ['GRU', 'LSTM']:
            raise InvalidArgumentError('Layer type must be GRU or LSTM')
        self.layer_type = layer_type
        self.is_bidirectional = is_bidirectional
        if self.is_bidirectional:
            self.num_dir = 2
        else:
            self.num_dir = 1
        if layer_type == 'LSTM':
            self.has_cell = True
        else:
            self.has_cell = False
        self.has_stack = has_stack
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        if self.has_stack:
            self.stack_width = stack_width
            self.stack_depth = stack_depth

        self.use_cuda = use_cuda
        if self.use_cuda is None:
            self.use_cuda = torch.cuda.is_available()

        self.n_layers = n_layers

        if self.has_stack:
            self.stack_controls_layer = nn.Linear(
                in_features=self.hidden_size * self.num_dir, out_features=3)

            self.stack_input_layer = nn.Linear(in_features=self.hidden_size *
                                               self.num_dir,
                                               out_features=self.stack_width)

        self.encoder = nn.Embedding(input_size, hidden_size)
        if self.has_stack:
            rnn_input_size = hidden_size + stack_width
        else:
            rnn_input_size = hidden_size
        if self.layer_type == 'LSTM':
            self.rnn = nn.LSTM(rnn_input_size,
                               hidden_size,
                               n_layers,
                               bidirectional=self.is_bidirectional)
            self.decoder = nn.Linear(hidden_size * self.num_dir, output_size)
        elif self.layer_type == 'GRU':
            self.rnn = nn.GRU(rnn_input_size,
                              hidden_size,
                              n_layers,
                              bidirectional=self.is_bidirectional)
            self.decoder = nn.Linear(hidden_size * self.num_dir, output_size)
        self.log_softmax = torch.nn.LogSoftmax(dim=1)
        self.ignore_idx = ignore_idx
        self.criterion = nn.CrossEntropyLoss(ignore_index=self.ignore_idx)
        self.lr = lr
        self.optimizer_instance = optimizer_instance
        self.optimizer = self.optimizer_instance(self.parameters(),
                                                 lr=lr,
                                                 weight_decay=0.00001)
        if self.use_cuda:
            self = self.cuda()
Example #24
0
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
Example #25
0
 def __init__(self, args, activation):
     super().__init__(args, activation)
     self.rnn = nn.GRU(input_size=args.layer_2_feats,
                       hidden_size=args.lstm_l2_feats,
                       num_layers=args.lstm_l2_layers)
Example #26
0
    def __init__(self):
        super(VariableLengthSequences, self).__init__()

        self.embedding = nn.Embedding(50, 100)
        self.rnn = nn.GRU(100, 256)
        self.fc = nn.Linear(256, 2)
Example #27
0
 def __init__(self, hidden_size):
     super(QuestionModule, self).__init__()
     self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
Example #28
0
    def __init__(self, config):
        super(AttentionDecoder, self).__init__()

        self.decoder_type = config['decoder_type']
        self.word_emb_dim = config['word_emb_dim']
        self.dec_rnn_dim = config['dec_rnn_dim']
        self.enc_rnn_dim = config['enc_rnn_dim']
        self.dpout_dec = config['dpout_dec']
        self.n_vocab = config['n_vocab']
        self.word_index = config['word_index']
        self.word_vec = config['word_vec']
        self.max_T_decoder = config['max_T_decoder']
        self.max_T_encoder = config['max_T_encoder']
        self.n_layers_dec = config['n_layers_dec']
        # for decoder intial state
        self.use_init = config['use_init']
        # attention type: dot product or linear layer
        self.att_type = config['att_type']  # 'lin' or 'dot'
        # whether to visualize attention weights
        self.att_hid_dim = config['att_hid_dim']

        self.sent_dim = 2 * config['enc_rnn_dim']
        if config['encoder_type'] in [
                "ConvNetEncoder", "InnerAttentionMILAEncoder"
        ]:
            self.sent_dim = 4 * self.sent_dim
        if config['encoder_type'] == "LSTMEncoder":
            self.sent_dim = self.sent_dim / 2

        assert self.sent_dim == 4096, str(self.sent_dim)
        # TODO: remove this when implemented linear attention
        assert self.att_type == 'dot'

        self.context_proj = nn.Linear(4 * self.sent_dim, self.dec_rnn_dim)

        self.att_ht_proj1 = nn.Sequential(
            nn.Linear(self.sent_dim, self.att_hid_dim),
            nn.Tanh(),
        )

        self.att_context_proj1 = nn.Sequential(
            nn.Linear(self.dec_rnn_dim, self.att_hid_dim),
            nn.Tanh(),
        )

        self.att_ht_before_weighting_proj1 = nn.Sequential(
            nn.Linear(self.sent_dim, self.att_hid_dim),
            nn.Tanh(),
        )

        self.att_ht_proj2 = nn.Sequential(
            nn.Linear(self.sent_dim, self.att_hid_dim),
            nn.Tanh(),
        )

        self.att_context_proj2 = nn.Sequential(
            nn.Linear(self.dec_rnn_dim, self.att_hid_dim),
            nn.Tanh(),
        )

        self.att_ht_before_weighting_proj2 = nn.Sequential(
            nn.Linear(self.sent_dim, self.att_hid_dim),
            nn.Tanh(),
        )

        self.proj_inp_dec = nn.Linear(2 * self.att_hid_dim + self.word_emb_dim,
                                      self.dec_rnn_dim)
        if self.decoder_type == 'gru':
            self.decoder_rnn = nn.GRU(self.dec_rnn_dim,
                                      self.dec_rnn_dim,
                                      self.n_layers_dec,
                                      bidirectional=False,
                                      dropout=self.dpout_dec)
        else:  # 'lstm'
            self.decoder_rnn = nn.LSTM(self.dec_rnn_dim,
                                       self.dec_rnn_dim,
                                       self.n_layers_dec,
                                       bidirectional=False,
                                       dropout=self.dpout_dec)

        # att softmax
        self.softmax_att = nn.Softmax(2)

        # vocab layer
        self.vocab_layer = nn.Linear(self.dec_rnn_dim, self.n_vocab)
Example #29
0
    def __init__(self, config, x_embed):
        super().__init__()

        self.num_layers_rnn = 1
        self.x_embed = x_embed.x_embed

        self.wdrop = config.wdrop
        self.dropoute = config.dropoute
        self.encoder_out_size = config.rnn_cell_size
        self.rnn_cell_type = config.rnn_cell_type

        self.training = True

        import warnings
        warnings.filterwarnings("ignore")

        self.model = None
        if self.rnn_cell_type.lower() == "lstm":
            self.rnn = nn.LSTM(input_size=x_embed.embedding_dim,
                               hidden_size=config.rnn_cell_size,
                               num_layers=self.num_layers_rnn,
                               bidirectional=False,
                               dropout=config.dropout,
                               batch_first=True,
                               bias=True)
            self.model = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=self.wdrop)

        elif self.rnn_cell_type.lower() == "gru":
            self.rnn = nn.GRU(input_size=x_embed.embedding_dim,
                              hidden_size=config.rnn_cell_size,
                              num_layers=self.num_layers_rnn,
                              bidirectional=False,
                              dropout=config.dropout,
                              batch_first=True,
                              bias=True)
            self.model = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=self.wdrop)

        elif self.rnn_cell_type.lower() == "qrnn":
            from torchqrnn import QRNNLayer
            self.model = QRNNLayer(input_size=x_embed.embedding_dim,
                                   hidden_size=config.rnn_cell_size,
                                   save_prev_x=True,
                                   zoneout=0,
                                   window=1,
                                   output_gate=True,
                                   use_cuda=config.use_gpu)
            self.model.linear = WeightDrop(self.model.linear, ['weight'], dropout=self.wdrop)
            # self.encoders.reset()

        self.lockdrop = LockedDropout()
        self.dropouti = 0.1

        # temporal averaging
        self.beta_ema = config.beta_ema
        if self.beta_ema > 0:
            self.avg_param = deepcopy(list(p.data for p in self.parameters()))
            if config.use_gpu:
                self.avg_param = [a.cuda() for a in self.avg_param]
            self.steps_ema = 0.

        return
 def __init__(self):
     super(GRU_Layer, self).__init__()
     self.gru = nn.GRU(input_size=300,
                       hidden_size=gru_len,
                       bidirectional=True)
     '''