def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.0, base_model="GRU"): super().__init__() if base_model == "GRU": self.rnn = nn.GRU( input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) elif base_model == "LSTM": self.rnn = nn.LSTM( input_size=d_feat, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout, ) else: raise ValueError("unknown base model name `%s`" % base_model) self.hidden_size = hidden_size self.d_feat = d_feat self.transformation = nn.Linear(self.hidden_size, self.hidden_size) self.a = nn.Parameter(torch.randn(self.hidden_size * 2, 1)) self.a.requires_grad = True self.fc = nn.Linear(self.hidden_size, self.hidden_size) self.fc_out = nn.Linear(hidden_size, 1) self.leaky_relu = nn.LeakyReLU() self.softmax = nn.Softmax(dim=1)
def __init__(self, n_skill, max_seq=100, embed_dim=128, num_heads=8, dropout=0.2): super(SAKTModel, self).__init__() self.n_skill = n_skill self.embed_dim = embed_dim embed_dim = 32 * 6 + 256 self.embedding = nn.Embedding(4, 32) self.user_answer_embedding = nn.Embedding(6, 32) self.prior_question_had_explanation_embedding = nn.Embedding(4, 32) self.e_embedding = nn.Embedding(n_skill + 1, 256) self.part_embedding = nn.Embedding(8, 32) self.elapsed_time_embedding = nn.Embedding(302, 32) self.duration_previous_content_embedding = nn.Embedding(302, 32) encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dropout=dropout) self.transformer_enc = nn.TransformerEncoder( encoder_layer=encoder_layer, num_layers=4) self.gru = nn.GRU(input_size=embed_dim, hidden_size=embed_dim) self.continuous_embedding = nn.Sequential(nn.BatchNorm1d(99), nn.Linear(1, embed_dim // 2), nn.LayerNorm(embed_dim // 2)) self.cat_embedding = nn.Sequential( nn.Linear(embed_dim, embed_dim // 2), nn.LayerNorm(embed_dim // 2)) self.layer_normal = nn.LayerNorm(embed_dim) self.ffn = FFN(embed_dim) self.dropout = nn.Dropout(dropout / 2) self.pred = nn.Linear(embed_dim // 4, 1)
def __init__(self, input_size, hidden_size, output_size, num_layers=4, attn_dim=64, fc_dim=512, attention=False): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.num_layers = num_layers self.fc_dim = fc_dim self.attention = attention # LOL self.gru = nn.GRU( input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True, dropout=.5, bidirectional=True) if attention: self.attn = Attention(self.input_size, self.hidden_size, attn_dim) # 64 self.fc1 = nn.Linear(hidden_size*2, self.fc_dim) self.bn1 = nn.BatchNorm1d(self.fc_dim) self.prelu1 = nn.PReLU(self.fc_dim) self.dp1 = nn.Dropout(.5) self.fc2 = nn.Linear(self.fc_dim, 64) self.bn2 = nn.BatchNorm1d(64) self.prelu2 = nn.PReLU(64) self.fc3 = nn.Linear(64, self.output_size) self.bn3 = nn.BatchNorm1d(self.output_size) self.prelu3 = nn.PReLU(self.output_size) self.init_weights()
def __init__(self, vocab, n_layers, hidden_size, batch_size, embedding_dim=10): super(Gru, self).__init__() self.vocab_size = len(vocab.values()) self.vocab = vocab self.n_layers = n_layers self.hidden_size = hidden_size self.batch_size = batch_size self.dropout = nn.Dropout(p=0.5) self.embedding_dim = embedding_dim # Recurrent layer padding_idx = self.vocab['<pad>'] self.word_embedding = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim=self.embedding_dim, padding_idx=padding_idx) self.linear1 = nn.Linear(in_features=self.embedding_dim, out_features=30) self.linear2 = nn.Linear(in_features=30, out_features=40) self.gru = nn.GRU(input_size=40, hidden_size=self.hidden_size, num_layers=self.n_layers, bidirectional=False, dropout=0.5) # Output layer self.l_out = nn.Linear(in_features=self.hidden_size, out_features=self.vocab_size, bias=False)
def __init__(self, input_size, embedding_size, hidden_size, dropout=0.5, n_layer=1, pretrained=False): super(Utterance_encoder_ggcn, self).__init__() self.embedding_size = embedding_size self.hidden_size = hidden_size self.input_size = input_size self.n_layer = n_layer self.embed = nn.Embedding(input_size, self.embedding_size) self.gru = nn.GRU(self.embedding_size, self.hidden_size, num_layers=n_layer, dropout=dropout, bidirectional=True) # self.hidden_proj = nn.Linear(n_layer * 2 * self.hidden_size, hidden_size) # self.bn = nn.BatchNorm1d(num_features=hidden_size) self.init_weight()
def __init__(self, input_features, rnn_features, num_layers=1, drop=0.0, rnn_type='LSTM', rnn_bidirectional=False): super(MaxoutRNN, self).__init__() self.bidirectional = rnn_bidirectional if rnn_type == 'LSTM': self.rnn = nn.LSTM(input_size=input_features, hidden_size=rnn_features, dropout=drop, num_layers=num_layers, batch_first=True, bidirectional=rnn_bidirectional) elif rnn_type == 'GRU': self.rnn = nn.GRU(input_size=input_features, hidden_size=rnn_features, dropout=drop, num_layers=num_layers, batch_first=True, bidirectional=rnn_bidirectional) else: raise ValueError('Unsupported RNN type') self.features = rnn_features self._init_rnn(self.rnn.weight_ih_l0) self._init_rnn(self.rnn.weight_hh_l0) self.rnn.bias_ih_l0.data.zero_() self.rnn.bias_hh_l0.data.zero_()
def __init__(self, input_size, hidden_size, dropout=0.25): super(CNNRNNBaseline, self).__init__() self.rnn = nn.GRU(512, hidden_size, num_layers=3, batch_first=True, dropout=dropout) self.linear1 = nn.Linear(hidden_size, input_size) self.linear2 = nn.Linear(hidden_size, input_size) self.drop1 = nn.Dropout(dropout) self.drop2 = nn.Dropout(dropout) # self.linear_mask1 = nn.Linear(input_size, input_size) # self.linear_mask2 = nn.Linear(input_size, input_size) # conv self.pool1 = nn.MaxPool2d(kernel_size=(2, 2)) self.conv1 = nn.Conv2d(in_channels=1, out_channels=256, kernel_size=(3, 3)) self.conv2 = nn.ConvTranspose2d(in_channels=256, out_channels=1, kernel_size=(3, 3))
def __init__(self, embedding_size, hidden_size, tgt_vocab_size, embedding=None, num_layers=4, dropout=0.5): super(GruDecoder, self).__init__() # Keep for reference self.hidden_size = hidden_size self.embedding_size = embedding_size self.tgt_vocab_size = tgt_vocab_size if embedding is not None: self.embedding = embedding else: self.embedding = nn.Embedding(tgt_vocab_size, embedding_size, padding_idx=PAD) self.embedding_dropout = nn.Dropout(dropout) self.attn = BilinearAttention(query_size=hidden_size, key_size=2 * hidden_size, hidden_size=hidden_size, dropout=dropout, coverage=False) self.gru = nn.GRU(2 * hidden_size + embedding_size, hidden_size, bidirectional=False, num_layers=num_layers, dropout=dropout) self.readout = nn.Linear( embedding_size + hidden_size + 2 * hidden_size, hidden_size)
def __init__(self, input_dim: int, hidden_dim: int, num_layers: int, dropout: float, batch_size: int, use_gpu: bool, no_dropout=False): super().__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.num_layers = num_layers self.dropout = dropout self.batch_size = batch_size self.use_gpu = use_gpu self.model = nn.GRU(input_size=self.input_dim, hidden_size=self.hidden_dim, num_layers=self.num_layers, batch_first=True, bidirectional=False, dropout=self.dropout if not no_dropout else 0) if self.use_gpu: self.model.cuda() self.init_hidden()
def __init__(self, loss_func=None, teacher_forcing_ratio=0.5, num_feats=3, dropout=0.2): """ Args: lost_func -- pytorch loss function. Note only loss functions where lower is better are supported currently. (default L1Loss aka MAE) teacher_forcing_ratio -- float between 0 and 1. Percentage of time to force the model to train on target data (rather than its own recursive output embedding_in -- int number of indices the embedding maps from (default 145603 - number of series) embedding_out -- int number of dimensions embedding maps to (default 20) num_feats -- int number of self made features (ie age, day of week, week of year) (default 3) """ super().__init__() self.hidden_units = 128 self.n_layers = 2 self.num_feats = num_feats self.rnn = nn.GRU(input_size=1 + num_feats, hidden_size=self.hidden_units, num_layers=self.n_layers, batch_first=True, dropout=dropout).cuda() self.out = nn.Linear(self.hidden_units, 1).cuda() self.loss_func = nn.L1Loss() if loss_func is None else loss_func self.teacher_forcing_ratio = teacher_forcing_ratio
def __init__( self, numerical_input_dim, cat_vocab_sizes, cat_embedding_dim, embedding_dim, ): # only 1 categorical feature for now super(Encoder, self).__init__() self.numerical_input_dim = numerical_input_dim self.embedding_dim = embedding_dim self.cat_vocab_sizes = cat_vocab_sizes # TODO: experiment with out dim self.cat_embedding_dim = cat_embedding_dim self.num_event_encoder = nn.BatchNorm1d(numerical_input_dim) self.cat_encoder = nn.Embedding(cat_vocab_sizes[0], self.cat_embedding_dim) self.sequence_encoder = nn.GRU(numerical_input_dim + self.cat_embedding_dim, embedding_dim, batch_first=False)
def __init__(self, in_dim, K=16, projections=[128, 128]): super(CBHG, self).__init__() self.in_dim = in_dim self.relu = nn.ReLU() self.conv1d_banks = nn.ModuleList([ BatchNormConv1d(in_dim, in_dim, kernel_size=k, stride=1, padding=k // 2, activation=self.relu) for k in range(1, K + 1) ]) self.max_pool1d = nn.MaxPool1d(kernel_size=2, stride=1, padding=1) in_sizes = [K * in_dim] + projections[:-1] activations = [self.relu] * (len(projections) - 1) + [None] self.conv1d_projections = nn.ModuleList([ BatchNormConv1d(in_size, out_size, kernel_size=3, stride=1, padding=1, activation=ac) for (in_size, out_size, ac) in zip(in_sizes, projections, activations) ]) self.pre_highway = nn.Linear(projections[-1], in_dim, bias=False) self.highways = nn.ModuleList( [Highway(in_dim, in_dim) for _ in range(4)]) self.gru = nn.GRU(in_dim, in_dim, 1, batch_first=True, bidirectional=True)
def __init__(self, args, vocab_size, pretrain_embedding=None): super(Network, self).__init__() self.args = args self.logger = logging.getLogger("ntcir14") self.embed_size = args.embed_size # 300 as default self.hidden_size = args.hidden_size # 150 as default self.vocab_size = vocab_size self.dropout_rate = args.dropout_rate self.encode_gru_num_layer = 1 self.embedding = nn.Embedding(self.vocab_size, self.embed_size) if pretrain_embedding is not None: self.embedding.weight = nn.Parameter(torch.from_numpy(pretrain_embedding)) self.logger.info('model load pretrained embedding successfully.') self.question_encode_gru = nn.GRU(self.hidden_size, self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate, num_layers=self.encode_gru_num_layer) self.passage_encode_gru = nn.GRU(self.hidden_size, self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate, num_layers=self.encode_gru_num_layer) self.early_match_gru = nn.GRU(self.hidden_size, self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate) self.late_match_gru = nn.GRU(self.hidden_size * 4, self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate) self.early_late_gru = nn.GRU(self.hidden_size * 2, self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate) self.question_passage_gru = nn.GRU(self.hidden_size * 4, self.hidden_size, bidirectional=True, batch_first=True, dropout=self.dropout_rate) # self.dot_attention_question = DotAttention(self.hidden_size * 2, self.hidden_size, self.dropout_rate) self.dot_attention_late_passage = DotAttention(self.embed_size, self.hidden_size, self.dropout_rate) self.dot_attention_late_question = DotAttention(self.embed_size, self.hidden_size, self.dropout_rate) self.dot_attention_late_match = DotAttention(self.hidden_size * 2, self.hidden_size, self.dropout_rate) self.dot_attention_early_match = DotAttention(self.embed_size, self.hidden_size, self.dropout_rate) self.dot_attention_early_passage = DotAttention(self.hidden_size * 2, self.hidden_size, self.dropout_rate) self.summ_question = Summ(self.embed_size, self.hidden_size, self.dropout_rate) self.summ_passage = Summ(self.hidden_size * 2, self.hidden_size, self.dropout_rate) self.linear_score = nn.Linear(self.hidden_size * 2, 1)
def __init__(self, obs_space, action_space, image_dim=128, memory_dim=128, instr_dim=128, use_instr=False, lang_model="gru", use_memory=False, arch="cnn1", aux_info=None, vocabulary=None, learner=False, corrector=False, corr_length=3, corr_own_vocab=False, corr_vocab_size=0, pretrained_corrector=False, use_critic=False, dropout=0.5, corrector_frozen=False, random_corrector=False, var_corr_len=False, weigh_corrections=False, return_internal_repr=False): super().__init__() self.vocab = vocabulary # Vocabulary object, from obss_preprocessor # Decide which components are enabled self.use_instr = use_instr self.use_memory = use_memory self.arch = arch self.lang_model = lang_model self.aux_info = aux_info self.image_dim = image_dim self.memory_dim = memory_dim self.instr_dim = instr_dim self.use_learner = learner self.use_corrector = corrector self.corr_own_vocab = corr_own_vocab self.pretrained_corrector = pretrained_corrector self.obs_space = obs_space self.policy_input_size = 0 self.use_critic = use_critic if self.use_learner: if arch == "cnn1": self.image_conv = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(2, 2)), nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2), nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(2, 2)), nn.ReLU(), nn.Conv2d(in_channels=32, out_channels=image_dim, kernel_size=(2, 2)), nn.ReLU()) elif arch == "cnn2": self.image_conv = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=16, kernel_size=(3, 3)), nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2, ceil_mode=True), nn.Conv2d(in_channels=16, out_channels=image_dim, kernel_size=(3, 3)), nn.ReLU()) elif arch == "filmcnn": if not self.use_instr: raise ValueError( "FiLM architecture can be used when instructions are enabled" ) self.image_conv_1 = nn.Sequential( nn.Conv2d(in_channels=64, out_channels=32, kernel_size=(2, 2)), nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2)) self.image_conv_2 = nn.Sequential( nn.Conv2d(in_channels=64, out_channels=32, kernel_size=(2, 2)), nn.ReLU(), nn.Conv2d(in_channels=32, out_channels=128, kernel_size=(2, 2)), nn.ReLU()) elif arch.startswith("expert_filmcnn"): if not self.use_instr: raise ValueError( "FiLM architecture can be used when instructions are enabled" ) self.image_conv = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(2, 2), padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2)) self.film_pool = nn.MaxPool2d(kernel_size=(2, 2), stride=2) elif arch == 'embcnn1': self.image_conv = nn.Sequential( ImageBOWEmbedding(obs_space["image"], embedding_dim=16, padding_idx=0, reduce_fn=torch.mean), nn.ReLU(), nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3)), nn.ReLU(), nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3)), nn.ReLU(), nn.Conv2d(in_channels=32, out_channels=image_dim, kernel_size=(3, 3)), nn.ReLU()) else: raise ValueError( "Incorrect architecture name: {}".format(arch)) # Define instruction embedding if self.use_instr: if self.lang_model in ['gru', 'conv', 'bigru', 'attgru']: self.word_embedding = nn.Embedding(obs_space["instr"], self.instr_dim) if self.lang_model in ['gru', 'bigru', 'attgru']: gru_dim = self.instr_dim if self.lang_model in ['bigru', 'attgru']: gru_dim //= 2 self.instr_rnn = nn.GRU( self.instr_dim, gru_dim, batch_first=True, bidirectional=(self.lang_model in ['bigru', 'attgru'])) self.final_instr_dim = self.instr_dim else: kernel_dim = 64 kernel_sizes = [3, 4] self.instr_convs = nn.ModuleList([ nn.Conv2d(1, kernel_dim, (K, self.instr_dim)) for K in kernel_sizes ]) self.final_instr_dim = kernel_dim * len(kernel_sizes) elif self.lang_model == 'bow': hidden_units = [ obs_space["instr"], self.instr_dim, self.instr_dim ] layers = [] for n_in, n_out in zip(hidden_units, hidden_units[1:]): layers.append(nn.Linear(n_in, n_out)) layers.append(nn.ReLU()) self.instr_bow = nn.Sequential(*layers) self.final_instr_dim = instr_dim if self.lang_model == 'attgru': self.memory2key = nn.Linear(self.memory_size, self.final_instr_dim) # Define memory if self.use_memory: self.memory_rnn = nn.LSTMCell(self.image_dim, self.memory_dim) self.policy_input_size += self.memory_dim # Resize image embedding self.embedding_size = self.semi_memory_size if self.use_instr and arch != "filmcnn" and not arch.startswith( "expert_filmcnn"): self.embedding_size += self.final_instr_dim if arch == "filmcnn": self.controller_1 = AgentControllerFiLM( in_features=self.final_instr_dim, out_features=64, in_channels=3, imm_channels=16) self.controller_2 = AgentControllerFiLM( in_features=self.final_instr_dim, out_features=64, in_channels=32, imm_channels=32) if arch.startswith("expert_filmcnn"): if arch == "expert_filmcnn": num_module = 2 else: num_module = int(arch[(arch.rfind('_') + 1):]) self.controllers = [] for ni in range(num_module): if ni < num_module - 1: mod = ExpertControllerFiLM( in_features=self.final_instr_dim, out_features=128, in_channels=128, imm_channels=128) else: mod = ExpertControllerFiLM( in_features=self.final_instr_dim, out_features=self.image_dim, in_channels=128, imm_channels=128) self.controllers.append(mod) self.add_module('FiLM_Controler_' + str(ni), mod) # Initialize parameters correctly. # Put this here, because otherwise pretrained corrector's linear weights are overwritten self.apply(initialize_parameters) if self.use_corrector: self.corr_vocab_size = corr_vocab_size self.corr_length = corr_length self.var_corr_len = var_corr_len if not self.pretrained_corrector: if self.corr_own_vocab: num_corr_embeddings = corr_vocab_size vocabulary_corr = None else: num_corr_embeddings = self.obs_space['instr'] vocabulary_corr = self.vocab self.corrector = Corrector(image_dim=self.image_dim, memory_dim=self.memory_dim, instr_dim=self.instr_dim, num_embeddings=num_corr_embeddings, num_rnn_layers=1, vocabulary=vocabulary_corr, corr_length=self.corr_length, obs_space=self.obs_space, var_len=self.var_corr_len) else: self.load_pretrained_corrector( self.pretrained_corrector, corrector_frozen=corrector_frozen) corr_vocab_size = self.corrector.word_embedding_corrector.num_embeddings if self.corr_own_vocab: if self.var_corr_len: num_corr_embeddings = corr_vocab_size + 1 else: num_corr_embeddings = corr_vocab_size self.word_embedding_corrections = nn.Embedding( num_corr_embeddings, self.instr_dim) corr_rnn_hidden = 512 # currently constant self.corr_rnn = nn.GRU(input_size=self.instr_dim, hidden_size=corr_rnn_hidden, batch_first=True) else: if self.use_learner: self.word_embedding_corrections = self.word_embedding self.corr_rnn = self.instr_rnn else: self.word_embedding_corrections = self.corrector.instr_embedding self.corr_rnn = self.corrector.instr_rnn self.policy_input_size += self.corr_rnn.hidden_size self.corr_dropout = nn.Dropout(p=dropout) if random_corrector: self.corrector.randomize() self.weigh_corrections = weigh_corrections if self.weigh_corrections: # parameter to determine weight of corrections from previous entropy self.entropy_weight = nn.Linear( 1, 1) #nn.Parameter(torch.randn(1)) if self.use_critic: # Define critic's model (used in PPO, not in IL) self.critic = nn.Sequential(nn.Linear(self.policy_input_size, 64), nn.Tanh(), nn.Linear(64, 1)) # Define actor's model self.actor = nn.Sequential(nn.Linear(self.policy_input_size, 64), nn.Tanh(), nn.Linear(64, action_space.n)) # Define head for extra info if self.aux_info: self.extra_heads = None self.add_heads() self.return_internal_repr = return_internal_repr
def __init__(self, input_dim, embed_dim, dropout): super(GRUBlock, self).__init__() self.gru = nn.GRU(input_size=input_dim, hidden_size=embed_dim) self.layer_norm = nn.LayerNorm(embed_dim) self.dropout = nn.Dropout(dropout)
def __init__(self, vocab_size, hidden_size): super(QuestionModule, self).__init__() self.vocab_size = vocab_size # Size of the vocabulary used in word embedding self.hidden_size = hidden_size # Size of the hidden state of GRU self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
def __init__(self, hidden_size, K=16, projection_size=128, num_gru_layers=2, max_pool_kernel_size=2, is_post=False): """ :param hidden_size: dimension of hidden unit :param K: # of convolution banks :param projection_size: dimension of projection unit :param num_gru_layers: # of layers of GRUcell :param max_pool_kernel_size: max pooling kernel size :param is_post: whether post processing or not """ super(CBHG, self).__init__() self.hidden_size = hidden_size self.num_gru_layers = num_gru_layers self.projection_size = projection_size self.convbank_list = nn.ModuleList() self.convbank_list.append( nn.Conv1d(in_channels=projection_size, out_channels=hidden_size, kernel_size=1, padding=int(np.floor(1 / 2)))) for i in range(2, K + 1): self.convbank_list.append( nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=i, padding=int(np.floor(i / 2)))) self.batchnorm_list = nn.ModuleList() for i in range(1, K + 1): self.batchnorm_list.append(nn.BatchNorm1d(hidden_size)) convbank_outdim = hidden_size * K if is_post: self.conv_projection_1 = nn.Conv1d(in_channels=convbank_outdim, out_channels=hidden_size * 2, kernel_size=3, padding=int(np.floor(3 / 2))) self.conv_projection_2 = nn.Conv1d(in_channels=hidden_size * 2, out_channels=projection_size, kernel_size=3, padding=int(np.floor(3 / 2))) self.batchnorm_proj_1 = nn.BatchNorm1d(hidden_size * 2) else: self.conv_projection_1 = nn.Conv1d(in_channels=convbank_outdim, out_channels=hidden_size, kernel_size=3, padding=int(np.floor(3 / 2))) self.conv_projection_2 = nn.Conv1d(in_channels=hidden_size, out_channels=projection_size, kernel_size=3, padding=int(np.floor(3 / 2))) self.batchnorm_proj_1 = nn.BatchNorm1d(hidden_size) self.batchnorm_proj_2 = nn.BatchNorm1d(projection_size) self.max_pool = nn.MaxPool1d(max_pool_kernel_size, stride=1, padding=1) self.highway = Highwaynet(self.projection_size) self.gru = nn.GRU(self.projection_size, self.hidden_size, num_layers=2, batch_first=True, bidirectional=True)
def __init__(self, config, num_rnn = 1): super().__init__() self.hidden_size = config.hidden_size self.num_layers = num_rnn self.biGRU = nn.GRU(config.hidden_size, config.hidden_size, self.num_layers, batch_first = True, bidirectional = True)
def init_mesh_module(self): self.mesh_h0 = autograd.Variable(torch.randn(1, 1, self.embedding_dim)) self.mesh_gru = nn.GRU(self.embedding_dim, self.embedding_dim) if(use_cuda): self.mesh_h0 = self.mesh_h0.cuda() self.mesh_gru = self.mesh_gru.cuda()
def __init__( self, map_size, input_channel=50, # unit_feature_size=23+, recurrent=False, hidden_size=256, ): """[summary] Arguments: map_size {tuple} -- (map_height, map_width) Keyword Arguments: input_channel {int} -- [description] (default: {21}) unit_feature_size {int} -- [description] (default: {18}) """ super(ActorCritic, self).__init__() self.recurrent = recurrent map_height, map_width = map_size unit_feature_size = 23 + map_height + map_width #+ map_height * map_width self.shared_out_size = 256 self.conv_out_size = 16 self.shared_to_actor_size = 128 self.hsz = hidden_size self.conv_flatten_size = self.conv_out_size * ((map_height) * (map_width)) self.activated_agents = [ UNIT_TYPE_NAME_BASE, UNIT_TYPE_NAME_BARRACKS, UNIT_TYPE_NAME_WORKER, UNIT_TYPE_NAME_HEAVY, UNIT_TYPE_NAME_LIGHT, # UNIT_TYPE_NAME_RANGED, ] init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0), nn.init.calculate_gain('relu')) self.critic_conv = NNBase(map_size, input_channel, hidden_size, self.conv_out_size) # self.actor_conv = nn.ModuleDict({ # UNIT_TYPE_NAME_WORKER: NNBase(map_size,input_channel,hidden_size,self.conv_out_size), # UNIT_TYPE_NAME_BASE: NNBase(map_size,input_channel,hidden_size,self.conv_out_size), # UNIT_TYPE_NAME_LIGHT:NNBase(map_size,input_channel,hidden_size,self.conv_out_size), # UNIT_TYPE_NAME_BARRACKS: NNBase(map_size,input_channel,hidden_size,self.conv_out_size), # UNIT_TYPE_NAME_HEAVY: NNBase(map_size,input_channel,hidden_size,self.conv_out_size) # }) self.actor_conv = NNBase(map_size, input_channel, hidden_size, self.conv_out_size) # self.shared_conv = nn.Sequential( # init_(nn.Conv2d(in_channels=input_channel, out_channels=64, kernel_size=1)), #nn.ReLU(), # # nn.BatchNorm2d(64), nn.ReLU(), # init_(nn.Conv2d(64, 32, 1)), #nn.ReLU(), # # nn.BatchNorm2d(32), nn.ReLU(), # init_(nn.Conv2d(32, self.conv_out_size, 1)),# nn.ReLU(), # # nn.BatchNorm2d(self.conv_out_size), nn.ReLU(), # # nn.BatchNorm2d(16,affine=False), nn.ReLU(), # # init_(nn.Conv2d(32, 16, 1)), nn.ReLU(), # # init_(nn.Conv2d(64, 32, 2)), nn.ReLU(), # # nn.Conv2d(64, 32, 2), nn.ReLU(), # nn.AdaptiveMaxPool2d((map_height, map_width)), # n * 64 * map_height * map_width # Flatten(), # # nn.LayerNorm(self.conv_flatten_size) # ) # self.self_attn = nn.Sequential( # Pic2Vector(), # # nn.TransformerEncoderLayer(d_model=16, nhead=4, dim_feedforward=64, dropout=0, activation="relu"), # ) # self.self_attn = nn.MultiheadAttention(embed_dim=16,num_heads=8) self.p2v = Pic2Vector(channel_size=self.conv_out_size) self.self_attn = MultiHeadAttention(n_head=2, d_model=16, d_k=16, d_v=16, dropout=0) # self.self_attn.share_memory() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) # self.shared_linear = nn.Sequential( # Flatten(), # init_(nn.Linear(16 * (map_height) * (map_width), hidden_size)), nn.ReLU(), # # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(), # # init_(nn.Linear(256, 256)), nn.ReLU(), # # init_(nn.Linear(hidden_size, hidden_size)),# nn.ReLU() # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # # init_(nn.Linear(128, 128)), nn.ReLU(), # # init_(nn.Linear(128, self.shared_out_size)), nn.ReLU(), # ) # self.critic = nn.ModuleDict({ # UNIT_TYPE_NAME_WORKER: nn.Sequential( # init_(nn.Linear(self.conv_flatten_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, 1)), # ), # UNIT_TYPE_NAME_BASE: nn.Sequential( # init_(nn.Linear(self.conv_flatten_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, 1)), # ), # UNIT_TYPE_NAME_LIGHT:nn.Sequential( # init_(nn.Linear(self.conv_flatten_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, 1)),, # # UNIT_TYPE_NAME_BARRACKS: , # # UNIT_TYPE_NAME_HEAVY: # }) self.critic_mlps = nn.Sequential( init_(nn.Linear(self.conv_flatten_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(64, 64)), nn.ReLU(), # init_(nn.Linear(128, 128)), nn.ReLU(), # init_(nn.Linear(256, 256)), nn.ReLU(), # init_(nn.Linear(256, 256)), nn.ReLU(), ) self.critic_out = init_(nn.Linear(hidden_size, 1)) #-------------------------------------------# # self.shared_to_actor = nn.Sequential(init_(nn.Linear(hidden_size, self.shared_to_actor_size)), nn.ReLU()) self.actor_mlps = nn.Sequential( # init_(nn.Linear(self.shared_out_size + unit_feature_size + encoded_utt_feature_size, hidden_size)), nn.ReLU(), init_( nn.Linear(self.conv_flatten_size + unit_feature_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size,affine=False), nn.ReLU(), # init_(nn.Linear(64, 64)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size,affine=False),nn.ReLU(), # nn.LayerNorm(normalized_shape=(64),elementwise_affine=False), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.LayerNorm(normalized_shape=(64),elementwise_affine=False), # init_(nn.Linear(hidden_size, hidden_size)),nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU() # nn.BatchNorm1d(hidden_size), nn.ReLU(), # nn.LayerNorm(normalized_shape=(64),elementwise_affine=False), # init_(nn.Linear(256, 256)), nn.ReLU(), ) if recurrent: self.gru = nn.GRU(hidden_size, hidden_size) for name, param in self.gru.named_parameters(): if 'bias' in name: nn.init.constant_(param, 0) elif 'weight' in name: nn.init.orthogonal_(param) # self.layer_norm = nn.LayerNorm(normalized_shape=(hidden_size),elementwise_affine=True) self.actor_out = nn.ModuleDict({ UNIT_TYPE_NAME_WORKER: nn.Sequential( # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_( nn.Linear(self.conv_flatten_size + unit_feature_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), init_( nn.Linear(hidden_size, WorkerAction.__members__.items().__len__())), nn.Softmax(dim=1)), UNIT_TYPE_NAME_BASE: nn.Sequential( # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_( nn.Linear(self.conv_flatten_size + unit_feature_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), init_( nn.Linear(hidden_size, BaseAction.__members__.items().__len__())), nn.Softmax(dim=1), ), UNIT_TYPE_NAME_LIGHT: nn.Sequential( # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_( nn.Linear(self.conv_flatten_size + unit_feature_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), init_( nn.Linear(hidden_size, LightAction.__members__.items().__len__())), nn.Softmax(dim=1), ), UNIT_TYPE_NAME_BARRACKS: nn.Sequential( # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_( nn.Linear(self.conv_flatten_size + unit_feature_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), init_( nn.Linear(hidden_size, BarracksAction.__members__.items().__len__())), nn.Softmax(dim=1), ), UNIT_TYPE_NAME_HEAVY: nn.Sequential( # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_( nn.Linear(self.conv_flatten_size + unit_feature_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), # init_(nn.Linear(hidden_size, hidden_size)), nn.ReLU(), # nn.BatchNorm1d(hidden_size), nn.ReLU(), init_( nn.Linear(hidden_size, HeavyAction.__members__.items().__len__())), nn.Softmax(dim=1), ) })
def __init__(self, input_size, hidden_size, num_layers, opt, dictionary): super().__init__() self.dict = dictionary self.h2o = nn.Linear(hidden_size, len(dictionary)) self.dropout = nn.Dropout(opt['dropout']) self.rnn = nn.GRU(input_size, hidden_size, num_layers)
def __init__(self, feature_num): super(Sequence, self).__init__() self.lstm = nn.LSTM(input_size=feature_num, hidden_size=256, num_layers=3) self.gru = nn.GRU(input_size=feature_num, hidden_size=256, num_layers=3) self.rnn = nn.RNN(input_size=feature_num, hidden_size=256, num_layers=3) self.linear = nn.Linear(256, 1)
def __init__(self, input_size, hidden_size, output_size, layer_type='GRU', n_layers=1, is_bidirectional=False, has_stack=False, stack_width=None, stack_depth=None, ignore_idx=0, use_cuda=None, optimizer_instance=torch.optim.Adadelta, lr=0.01): """ Constructor for the StackAugmentedRNN object. Parameters ---------- input_size: int number of characters in the alphabet hidden_size: int size of the RNN layer(s) output_size: int again number of characters in the alphabet layer_type: str (default 'GRU') type of the RNN layer to be used. Could be either 'LSTM' or 'GRU'. n_layers: int (default 1) number of RNN layers is_bidirectional: bool (default False) parameter specifying if RNN is bidirectional has_stack: bool (default False) parameter specifying if augmented memory stack is used stack_width: int (default None) if has_stack is True then this parameter defines width of the augmented stack memory stack_depth: int (default None) if has_stack is True then this parameter define depth of the augmented stack memory. Hint: no need fo stack depth to be larger than the length of the longest sequence you plan to generate use_cuda: bool (default None) parameter specifying if GPU is used for computations. If left unspecified, GPU will be used if available optimizer_instance: torch.optim object (default torch.optim.Adadelta) optimizer to be used for training lr: float (default 0.01) learning rate for the optimizer """ super(StackAugmentedRNN, self).__init__() if layer_type not in ['GRU', 'LSTM']: raise InvalidArgumentError('Layer type must be GRU or LSTM') self.layer_type = layer_type self.is_bidirectional = is_bidirectional if self.is_bidirectional: self.num_dir = 2 else: self.num_dir = 1 if layer_type == 'LSTM': self.has_cell = True else: self.has_cell = False self.has_stack = has_stack self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size if self.has_stack: self.stack_width = stack_width self.stack_depth = stack_depth self.use_cuda = use_cuda if self.use_cuda is None: self.use_cuda = torch.cuda.is_available() self.n_layers = n_layers if self.has_stack: self.stack_controls_layer = nn.Linear( in_features=self.hidden_size * self.num_dir, out_features=3) self.stack_input_layer = nn.Linear(in_features=self.hidden_size * self.num_dir, out_features=self.stack_width) self.encoder = nn.Embedding(input_size, hidden_size) if self.has_stack: rnn_input_size = hidden_size + stack_width else: rnn_input_size = hidden_size if self.layer_type == 'LSTM': self.rnn = nn.LSTM(rnn_input_size, hidden_size, n_layers, bidirectional=self.is_bidirectional) self.decoder = nn.Linear(hidden_size * self.num_dir, output_size) elif self.layer_type == 'GRU': self.rnn = nn.GRU(rnn_input_size, hidden_size, n_layers, bidirectional=self.is_bidirectional) self.decoder = nn.Linear(hidden_size * self.num_dir, output_size) self.log_softmax = torch.nn.LogSoftmax(dim=1) self.ignore_idx = ignore_idx self.criterion = nn.CrossEntropyLoss(ignore_index=self.ignore_idx) self.lr = lr self.optimizer_instance = optimizer_instance self.optimizer = self.optimizer_instance(self.parameters(), lr=lr, weight_decay=0.00001) if self.use_cuda: self = self.cuda()
def __init__(self, input_size, hidden_size): super(EncoderRNN, self).__init__() self.hidden_size = hidden_size self.embedding = nn.Embedding(input_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size)
def __init__(self, args, activation): super().__init__(args, activation) self.rnn = nn.GRU(input_size=args.layer_2_feats, hidden_size=args.lstm_l2_feats, num_layers=args.lstm_l2_layers)
def __init__(self): super(VariableLengthSequences, self).__init__() self.embedding = nn.Embedding(50, 100) self.rnn = nn.GRU(100, 256) self.fc = nn.Linear(256, 2)
def __init__(self, hidden_size): super(QuestionModule, self).__init__() self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
def __init__(self, config): super(AttentionDecoder, self).__init__() self.decoder_type = config['decoder_type'] self.word_emb_dim = config['word_emb_dim'] self.dec_rnn_dim = config['dec_rnn_dim'] self.enc_rnn_dim = config['enc_rnn_dim'] self.dpout_dec = config['dpout_dec'] self.n_vocab = config['n_vocab'] self.word_index = config['word_index'] self.word_vec = config['word_vec'] self.max_T_decoder = config['max_T_decoder'] self.max_T_encoder = config['max_T_encoder'] self.n_layers_dec = config['n_layers_dec'] # for decoder intial state self.use_init = config['use_init'] # attention type: dot product or linear layer self.att_type = config['att_type'] # 'lin' or 'dot' # whether to visualize attention weights self.att_hid_dim = config['att_hid_dim'] self.sent_dim = 2 * config['enc_rnn_dim'] if config['encoder_type'] in [ "ConvNetEncoder", "InnerAttentionMILAEncoder" ]: self.sent_dim = 4 * self.sent_dim if config['encoder_type'] == "LSTMEncoder": self.sent_dim = self.sent_dim / 2 assert self.sent_dim == 4096, str(self.sent_dim) # TODO: remove this when implemented linear attention assert self.att_type == 'dot' self.context_proj = nn.Linear(4 * self.sent_dim, self.dec_rnn_dim) self.att_ht_proj1 = nn.Sequential( nn.Linear(self.sent_dim, self.att_hid_dim), nn.Tanh(), ) self.att_context_proj1 = nn.Sequential( nn.Linear(self.dec_rnn_dim, self.att_hid_dim), nn.Tanh(), ) self.att_ht_before_weighting_proj1 = nn.Sequential( nn.Linear(self.sent_dim, self.att_hid_dim), nn.Tanh(), ) self.att_ht_proj2 = nn.Sequential( nn.Linear(self.sent_dim, self.att_hid_dim), nn.Tanh(), ) self.att_context_proj2 = nn.Sequential( nn.Linear(self.dec_rnn_dim, self.att_hid_dim), nn.Tanh(), ) self.att_ht_before_weighting_proj2 = nn.Sequential( nn.Linear(self.sent_dim, self.att_hid_dim), nn.Tanh(), ) self.proj_inp_dec = nn.Linear(2 * self.att_hid_dim + self.word_emb_dim, self.dec_rnn_dim) if self.decoder_type == 'gru': self.decoder_rnn = nn.GRU(self.dec_rnn_dim, self.dec_rnn_dim, self.n_layers_dec, bidirectional=False, dropout=self.dpout_dec) else: # 'lstm' self.decoder_rnn = nn.LSTM(self.dec_rnn_dim, self.dec_rnn_dim, self.n_layers_dec, bidirectional=False, dropout=self.dpout_dec) # att softmax self.softmax_att = nn.Softmax(2) # vocab layer self.vocab_layer = nn.Linear(self.dec_rnn_dim, self.n_vocab)
def __init__(self, config, x_embed): super().__init__() self.num_layers_rnn = 1 self.x_embed = x_embed.x_embed self.wdrop = config.wdrop self.dropoute = config.dropoute self.encoder_out_size = config.rnn_cell_size self.rnn_cell_type = config.rnn_cell_type self.training = True import warnings warnings.filterwarnings("ignore") self.model = None if self.rnn_cell_type.lower() == "lstm": self.rnn = nn.LSTM(input_size=x_embed.embedding_dim, hidden_size=config.rnn_cell_size, num_layers=self.num_layers_rnn, bidirectional=False, dropout=config.dropout, batch_first=True, bias=True) self.model = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=self.wdrop) elif self.rnn_cell_type.lower() == "gru": self.rnn = nn.GRU(input_size=x_embed.embedding_dim, hidden_size=config.rnn_cell_size, num_layers=self.num_layers_rnn, bidirectional=False, dropout=config.dropout, batch_first=True, bias=True) self.model = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=self.wdrop) elif self.rnn_cell_type.lower() == "qrnn": from torchqrnn import QRNNLayer self.model = QRNNLayer(input_size=x_embed.embedding_dim, hidden_size=config.rnn_cell_size, save_prev_x=True, zoneout=0, window=1, output_gate=True, use_cuda=config.use_gpu) self.model.linear = WeightDrop(self.model.linear, ['weight'], dropout=self.wdrop) # self.encoders.reset() self.lockdrop = LockedDropout() self.dropouti = 0.1 # temporal averaging self.beta_ema = config.beta_ema if self.beta_ema > 0: self.avg_param = deepcopy(list(p.data for p in self.parameters())) if config.use_gpu: self.avg_param = [a.cuda() for a in self.avg_param] self.steps_ema = 0. return
def __init__(self): super(GRU_Layer, self).__init__() self.gru = nn.GRU(input_size=300, hidden_size=gru_len, bidirectional=True) '''