def decode(self, trg_input, src_map, oov_list, enc_context, enc_hidden, trg_mask, ctx_mask): ''' :param trg_input: (batch_size, trg_len) src_map : (batch_size, src_len), almost the same with src but oov words are replaced with temporary oov index, for copy mechanism to map the probs of pointed words to vocab words. The word index can be beyond vocab_size, e.g. 50000, 50001, 50002 etc, depends on how many oov words appear in the source text context vector: (batch_size, src_len, hidden_size * num_direction) the outputs (hidden vectors) of encoder :returns decoder_probs : (batch_size, trg_seq_len, vocab_size + max_oov_number) decoder_outputs : (batch_size, trg_seq_len, hidden_size) attn_weights : (batch_size, trg_seq_len, src_seq_len) copy_attn_weights : (batch_size, trg_seq_len, src_seq_len) ''' batch_size = trg_input.size(0) src_len = enc_context.size(1) trg_len = trg_input.size(1) context_dim = enc_context.size(2) trg_hidden_dim = self.trg_hidden_dim # prepare the init hidden vector, (batch_size, dec_hidden_dim) -> 2 * (1, batch_size, dec_hidden_dim) init_hidden = self.init_decoder_state(enc_hidden[0], enc_hidden[1]) # enc_context has to be reshaped before dot attention (batch_size, src_len, context_dim) -> (batch_size, src_len, trg_hidden_dim) enc_context = nn.Tanh()(self.encoder2decoder_hidden(enc_context.contiguous().view(-1, context_dim))).view(batch_size, src_len, trg_hidden_dim) # maximum length to unroll max_length = trg_input.size(1) - 1 # Teacher Forcing self.current_batch += 1 if self.do_teacher_forcing(): logging.info("Training batches with Teacher Forcing") ''' Normal RNN procedure ''' # truncate the last word, as there's no further word after it for decoder to predict trg_input = trg_input[:, :-1] # initialize target embedding and reshape the targets to be time step first trg_emb = self.embedding(trg_input) # (batch_size, trg_len, embed_dim) trg_emb = trg_emb.permute(1, 0, 2) # (trg_len, batch_size, embed_dim) # both in/output of decoder LSTM is batch-second (trg_len, batch_size, trg_hidden_dim) decoder_outputs, hidden = self.decoder( trg_emb, init_hidden ) # Get the h_tilde (batch_size, trg_len, trg_hidden_dim) and attention weights (batch_size, trg_len, src_len) h_tildes, attn_weights, attn_logits = self.attention_layer(decoder_outputs.permute(1, 0, 2), enc_context) # compute the output decode_logit and read-out as probs: p_x = Softmax(W_s * h_tilde), (batch_size, trg_len, trg_hidden_size) -> (batch_size * trg_len, vocab_size) # h_tildes=(batch_size, trg_len, trg_hidden_size) -> decoder2vocab(h_tildes.view)=(batch_size * trg_len, vocab_size) -> decoder_logits=(batch_size, trg_len, vocab_size) decoder_logits = self.decoder2vocab(h_tildes.view(-1, trg_hidden_dim)).view(batch_size, max_length, -1) ''' Copy Mechanism ''' # copy_weights and copy_logits is (batch_size, trg_len, src_len) if self.copy_attention_layer: _, copy_weights, copy_logits = self.copy_attention_layer(decoder_outputs.permute(1, 0, 2), enc_context) else: copy_logits = attn_logits # merge the generative and copying probs, (batch_size, trg_len, vocab_size + max_oov_number) decoder_log_probs = self.merge_copy_probs(decoder_logits, copy_logits, src_map, oov_list) # (batch_size, trg_len, vocab_size + max_oov_number) decoder_outputs = decoder_outputs.permute(1, 0, 2) # (batch_size, trg_len, trg_hidden_dim) else: logging.info("Training batches with All Sampling") ''' Normal RNN procedure ''' # take the first word (should be BOS <s>) of each target sequence (batch_size, 1) trg_input = trg_input[:, 0].unsqueeze(1) decoder_log_probs = [] decoder_outputs= [] attn_weights = [] copy_weights = [] for di in range(max_length): # initialize target embedding and reshape the targets to be time step first trg_emb = self.embedding(trg_input) # (batch_size, 1, embed_dim) trg_emb = trg_emb.permute(1, 0, 2) # (1, batch_size, embed_dim) # this is trg_len first decoder_output, hidden = self.decoder( trg_emb, init_hidden ) # Get the h_tilde (hidden after attention) and attention weights. h_tilde (batch_size,1,trg_hidden), attn_weight & attn_logit(batch_size,1,src_len) h_tilde, attn_weight, attn_logit = self.attention_layer(decoder_output.permute(1, 0, 2), enc_context) # compute the output decode_logit and read-out as probs: p_x = Softmax(W_s * h_tilde) # h_tilde=(batch_size, 1, trg_hidden_size) -> decoder2vocab(h_tilde.view)=(batch_size * 1, vocab_size) -> decoder_logit=(batch_size, 1, vocab_size) decoder_logit = self.decoder2vocab(h_tilde.view(-1, trg_hidden_dim)).view(batch_size, 1, -1) ''' Copy Mechanism ''' # copy_weights and copy_logits is (batch_size, trg_len, src_len) if self.copy_attention_layer: _, copy_weight, copy_logit = self.copy_attention_layer(decoder_output.permute(1, 0, 2), enc_context) else: copy_weight = attn_weight copy_logit = attn_logit # merge the generative and copying probs (batch_size, 1, vocab_size + max_oov_number) decoder_log_prob = self.merge_copy_probs(decoder_logit, copy_logit, src_map, oov_list) ''' Find the next word ''' # (deprecated, should not merge)before locating the topk, we need to move the probs of oovs to <unk> # oov2unk_prob = self.merge_oov2unk(decoder_log_prob, max_oov_number) top_v, top_idx = decoder_log_prob.data.topk(1, dim=-1) # replace the oov words to <unk> top_idx[top_idx >= self.vocab_size] = self.unk_word top_idx = Variable(top_idx.squeeze(2)) # top_idx and next_index are (batch_size, 1) trg_input = top_idx.cuda() if torch.cuda.is_available() else top_idx # permute to trg_len first, otherwise the cat operation would mess up things decoder_log_probs.append(decoder_log_prob.permute(1, 0, 2)) decoder_outputs.append(decoder_output) attn_weights.append(attn_weight.permute(1, 0, 2)) copy_weights.append(copy_weight.permute(1, 0, 2)) # convert output into the right shape and make batch first decoder_log_probs = torch.cat(decoder_log_probs, 0).permute(1, 0, 2) # (batch_size, trg_seq_len, vocab_size + max_oov_number) decoder_outputs = torch.cat(decoder_outputs, 0).permute(1, 0, 2) # (batch_size, trg_seq_len, hidden_size) attn_weights = torch.cat(attn_weights, 0).permute(1, 0, 2) # (batch_size, trg_seq_len, src_seq_len) copy_weights = torch.cat(copy_weights, 0).permute(1, 0, 2) # (batch_size, trg_seq_len, src_seq_len) # Return final outputs (logits after log_softmax), hidden states, and attention weights (for visualization) return decoder_log_probs, decoder_outputs, attn_weights, copy_weights
def __init__(self, input_nc, output_nc, ngf=32, n_downsampling=4, norm_layer=nn.BatchNorm2d): super(Encoder, self).__init__() self.output_nc = output_nc model = [ nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), norm_layer(ngf), nn.ReLU(True), ] ### downsample for i in range(n_downsampling): mult = 2 ** i model += [ nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1), norm_layer(ngf * mult * 2), nn.ReLU(True), ] ### upsample for i in range(n_downsampling): mult = 2 ** (n_downsampling - i) model += [ nn.ConvTranspose2d( ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1 ), norm_layer(int(ngf * mult / 2)), nn.ReLU(True), ] model += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()] self.model = nn.Sequential(*model)
def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect'): assert(n_blocks >= 0) super(ResnetGeneratorMMReverse, self).__init__() self.input_nc = input_nc self.output_nc = output_nc self.ngf = ngf self.gpu_ids = gpu_ids if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm2d else: use_bias = norm_layer == nn.InstanceNorm2d model = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), norm_layer(ngf), nn.ReLU(True)] n_downsampling = 2 for i in range(n_downsampling): mult = 2**i model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), norm_layer(ngf * mult * 2), nn.ReLU(True)] pre_f_blocks = 4 pre_l_blocks = 7 mult = 2**n_downsampling for i in range(pre_f_blocks): model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] model_pre = [] for i in range(pre_f_blocks,pre_l_blocks): model_pre += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] model_post1 = [] model_post2 = [] for i in range(pre_l_blocks,n_blocks): model_post1 += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] model_post2 += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] for i in range(n_downsampling): mult = 2**(n_downsampling - i) model_post1 += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias), norm_layer(int(ngf * mult / 2)), nn.ReLU(True)] model_post2 += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias), norm_layer(int(ngf * mult / 2)), nn.ReLU(True)] model_post1 += [nn.ReflectionPad2d(3)] model_post1 += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] model_post1 += [nn.Tanh()] model_post2 += [nn.ReflectionPad2d(3)] model_post2 += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] model_post2 += [nn.Tanh()] self.model_post1 = nn.Sequential(*model_post1) self.model_post2 = nn.Sequential(*model_post2) self.model_pre = nn.Sequential(*model_pre) self.model = nn.Sequential(*model)
def __init__(self, vocabs, opt, predictor_tgt=None, predictor_src=None, PreModelClass='TransformerPredictor'): super().__init__(vocabs=vocabs, opt=opt) if not predictor_tgt: if opt.load_pred_target: predictor_tgt = eval(PreModelClass).from_file( opt.load_pred_target, opt) else: predictor_tgt = eval(PreModelClass)(vocabs, opt, predict_inverse=False) if not predictor_src: if opt.load_pred_source: predictor_src = eval(PreModelClass).from_file( opt.load_pred_source, opt) else: predictor_src = eval(PreModelClass)(vocabs, opt, predict_inverse=True) if opt.token_level: if predictor_src: predictor_src.vocabs = vocabs if predictor_tgt: predictor_tgt.vocabs = vocabs self.predictor_tgt = predictor_tgt self.predictor_src = predictor_src self.mlp = None self.sentence_pred = None self.sentence_sigma = None self.lstm_input_size = 2 * opt.hidden_pred + opt.out_embeddings_size if opt.mlp_est: self.mlp = nn.Sequential( nn.Linear(self.lstm_input_size, opt.hidden_est), nn.Tanh()) self.lstm_input_size = opt.hidden_est self.lstm = nn.LSTM( input_size=self.lstm_input_size, hidden_size=self.opt.hidden_est, num_layers=self.opt.rnn_layers_est, batch_first=True, dropout=self.opt.dropout_est, bidirectional=True, ) sentence_input_size = 2 * opt.rnn_layers_est * opt.hidden_est self.sentence_pred = nn.Sequential( nn.Linear(sentence_input_size, sentence_input_size // 2), nn.Sigmoid(), nn.Linear(sentence_input_size // 2, sentence_input_size // 4), nn.Sigmoid(), nn.Linear(sentence_input_size // 4, 1), ) if self.opt.sentence_ll: # Predict truncated Gaussian distribution self.sentence_sigma = nn.Sequential( nn.Linear(sentence_input_size, sentence_input_size // 2), nn.Sigmoid(), nn.Linear(sentence_input_size // 2, sentence_input_size // 4), nn.Sigmoid(), nn.Linear(sentence_input_size // 4, 1), nn.Sigmoid(), ) self.mse_loss = nn.MSELoss(reduction='sum') if opt.start_stop: self.start_PreQEFV = nn.Parameter( torch.zeros(1, 1, opt.out_embeddings_size)) self.end_PreQEFV = nn.Parameter( torch.zeros(1, 1, opt.out_embeddings_size)) self.opt = opt
def __init__(self, cell='gru', use_baseline=True, n_actions=10, n_units=64, fusion_dim=128, n_input=76, n_hidden=128, demo_dim=17, n_output=1, dropout=0.0, lamda=0.5, device='cpu'): super(Agent, self).__init__() self.cell = cell self.use_baseline = use_baseline self.n_actions = n_actions self.n_units = n_units self.n_input = n_input self.n_hidden = n_hidden self.n_output = n_output self.dropout = dropout self.lamda = lamda self.fusion_dim = fusion_dim self.demo_dim = demo_dim self.device = device self.agent1_action = [] self.agent1_prob = [] self.agent1_entropy = [] self.agent1_baseline = [] self.agent2_action = [] self.agent2_prob = [] self.agent2_entropy = [] self.agent2_baseline = [] self.agent1_fc1 = nn.Linear(self.n_hidden + self.demo_dim, self.n_units) self.agent2_fc1 = nn.Linear(self.n_input + self.demo_dim, self.n_units) self.agent1_fc2 = nn.Linear(self.n_units, self.n_actions) self.agent2_fc2 = nn.Linear(self.n_units, self.n_actions) if use_baseline == True: self.agent1_value = nn.Linear(self.n_units, 1) self.agent2_value = nn.Linear(self.n_units, 1) if self.cell == 'lstm': self.rnn = nn.LSTMCell(self.n_input, self.n_hidden) else: self.rnn = nn.GRUCell(self.n_input, self.n_hidden) for name, param in self.rnn.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.orthogonal_(param) if dropout > 0.0: self.nn_dropout = nn.Dropout(p=dropout) self.init_h = nn.Linear(self.demo_dim, self.n_hidden) self.init_c = nn.Linear(self.demo_dim, self.n_hidden) self.fusion = nn.Linear(self.n_hidden + self.demo_dim, self.fusion_dim) self.output = nn.Linear(self.fusion_dim, self.n_output) self.sigmoid = nn.Sigmoid() self.softmax = nn.Softmax() self.tanh = nn.Tanh() self.relu = nn.ReLU()
out = (out > 0.5) * 1 return out.data.numpy() # 绘制出决策面 plot_decision_boundary(lambda x: plot_network(x), x.numpy(), y.numpy()) plt.title("two layer network") plt.show() """下面介绍Sequential与Module""" # Sequential 允许我们构建序列化的模块,而 Module 是一种更加灵活的模型定义方式 # Sequential方法构建模型 seq_net = nn.Sequential( nn.Linear(2, 4), # 输入层是2个神经元,隐藏层是4个神经元 xw + b nn.Tanh(), nn.Linear(4, 1) # 隐藏层是4个神经元,输出层是1个神经元 ) # 序列模块可以通过索引访问每一层 print(seq_net[0]) # 打印出第一层的权重 print(seq_net[0].weight) # 通过parameters可以获得模型的参数 param = seq_net.parameters() # 定义优化算法 optimizer = optim.SGD(param, 1.) # 训练10000次 for epoch in range(10000): out = seq_net(Variable(x)) loss = loss_func(out, Variable(y)) optimizer.zero_grad()
def __init__(self, cfg): super(VqaModelDncQC, self).__init__() self.cfg = cfg self.img_encoder = ImgEncoder( cfg["hyperparameters"]["commun_embed_size"]) self.qst_encoder = QstEncoderDnc(cfg) if cfg["dnc_c"]["nonlinearity"] == "tanh": self.nonlinearity = nn.Tanh() elif cfg["dnc_c"]["nonlinearity"] == "relu": self.nonlinearity = nn.ReLU() elif cfg["dnc_c"]["nonlinearity"] == "sigmoid": self.nonlinearity = nn.Sigmoid() else: raise ValueError( "<{}> is not a valid non-linearity function.".format( cfg["dnc_c"]["nonlinearity"])) self.tanh = nn.Tanh() self.dropout = nn.Dropout(cfg["hyperparameters"]["dropout"]) if cfg["dnc_c"]["type"] == "MLP": self.dnc = DNC_MLP( input_size=cfg["hyperparameters"]["commun_embed_size"], output_size=cfg["dnc_c"]["output_size"], hidden_size=cfg["dnc_c"]["hidden_dim"], num_hidden_layers=cfg["dnc_c"]["num_layers_hidden"], dropout=cfg["dnc_c"]["dropout"], nr_cells=cfg["dnc_c"]["n"], cell_size=cfg["dnc_c"]["w"], read_heads=cfg["dnc_c"]["r"], nonlinearity=self.nonlinearity, gpu_id=cfg["hyperparameters"]["gpu_id"], debug=cfg["dnc_c"]["debug"], clip=20, ) elif cfg["dnc_c"]["type"] == "LSTM": self.dnc = DNC( input_size=cfg["hyperparameters"]["commun_embed_size"], output_size=cfg["dnc_c"]["output_size"], hidden_size=cfg["dnc_c"]["hidden_dim"], rnn_type=cfg["dnc_c"]["rnn_type"], num_layers=cfg["dnc_c"]["num_layers"], num_hidden_layers=cfg["dnc_c"]["num_layers_hidden"], bias=True, batch_first=True, dropout=cfg["dnc_c"]["dropout"], bidirectional=cfg["dnc_c"]["bidirectional"], nr_cells=cfg["dnc_c"]["n"], cell_size=cfg["dnc_c"]["w"], read_heads=cfg["dnc_c"]["r"], gpu_id=cfg["hyperparameters"]["gpu_id"], independent_linears=True, share_memory=True, debug=cfg["dnc_c"]["debug"], clip=20) else: raise ValueError("dnc controller type <{}> is not defined".format( cfg["dnc"]["dnc_c_type"])) if cfg["dnc_c"]["concat_out_rv"]: in_fc_1 = cfg["dnc_c"][ "output_size"] + cfg["dnc_c"]["w"] * cfg["dnc_c"]["r"] else: in_fc_1 = cfg["dnc_c"]["output_size"] self.fc_1 = nn.Linear(in_fc_1, cfg["hyperparameters"]["ans_vocab_size"]) self.fc_2 = nn.Linear(cfg["hyperparameters"]["ans_vocab_size"], cfg["hyperparameters"]["ans_vocab_size"])
def __init__(self, input_size, hidden_size, output_size): super(TwoLayerLRSeq, self).__init__() self.model = nn.Sequential(nn.Linear(input_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, output_size))
def __init__(self, input_size, use_stn=False, use_attention=False): super(PPG2ECG, self).__init__() self.use_stn = use_stn self.use_attention = use_attention # build main transformer self.main = nn.Sequential( # encoder nn.Conv1d(1, 32, kernel_size=31, stride=2, padding=15), nn.PReLU(32), nn.Conv1d(32, 64, 31, 1, 15), nn.PReLU(64), nn.Conv1d(64, 128, 31, 2, 15), nn.PReLU(128), nn.Conv1d(128, 256, 31, 1, 15), nn.PReLU(256), nn.Conv1d(256, 512, 31, 2, 15), nn.PReLU(512), # decoder nn.ConvTranspose1d( 512, 256, kernel_size=31, stride=2, padding=15, output_padding=1), nn.PReLU(256), nn.ConvTranspose1d(256, 128, 31, 1, 15), nn.PReLU(128), nn.ConvTranspose1d(128, 64, 31, 2, 15, 1), nn.PReLU(64), nn.ConvTranspose1d(64, 32, 31, 1, 15), nn.PReLU(32), nn.ConvTranspose1d(32, 1, 31, 2, 15, 1), nn.Tanh(), ) # build stn (optional) if use_stn: # pylint: disable=not-callable self.restriction = torch.tensor( [1, 0, 0, 0], dtype=torch.float, requires_grad=False) self.register_buffer('restriction_const', self.restriction) self.stn_conv = nn.Sequential( nn.Conv1d( in_channels=1, out_channels=8, kernel_size=7, stride=1), nn.MaxPool1d(kernel_size=2, stride=2), nn.Conv1d( in_channels=8, out_channels=10, kernel_size=5, stride=1), nn.MaxPool1d(kernel_size=2, stride=2), ) n_stn_conv = self.get_stn_conv_out(input_size) self.stn_fc = nn.Sequential( Flatten(), nn.Linear(n_stn_conv, 32), nn.ReLU(True), nn.Linear(32, 4) ) self.stn_fc[3].weight.data.zero_() self.stn_fc[3].bias.data = torch.FloatTensor([1, 0, 1, 0]) # build attention network (optional) if use_attention: self.attn = nn.Sequential( nn.Linear(input_size, input_size), nn.ReLU(), nn.Linear(input_size, input_size) ) self.attn_len = input_size
def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm3d, use_dropout=False, n_blocks=6, padding_type='replicate'): """Construct a Resnet-based generator Parameters: input_nc (int) -- the number of channels in input images output_nc (int) -- the number of channels in output images ngf (int) -- the number of filters in the last conv layer norm_layer -- normalization layer use_dropout (bool) -- if use dropout layers n_blocks (int) -- the number of ResNet blocks padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero """ assert (n_blocks >= 0) super(ResnetGenerator, self).__init__() if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm3d else: use_bias = norm_layer == nn.InstanceNorm3d model = [ nn.ReplicationPad3d(3), nn.Conv3d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), norm_layer(ngf), nn.ReLU(True) ] n_downsampling = 2 for i in range(n_downsampling): # add downsampling layers mult = 3**i model += [ nn.Conv3d(ngf * mult, ngf * mult * 3, kernel_size=3, stride=2, padding=1, bias=use_bias), norm_layer(ngf * mult * 3), nn.ReLU(True) ] mult = 3**n_downsampling for i in range(n_blocks): # add ResNet blocks model += [ ResnetBlock3D(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias) ] for i in range(n_downsampling): # add upsampling layers mult = 3**(n_downsampling - i) model += [ nn.ConvTranspose3d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias), norm_layer(int(ngf * mult / 2)), nn.ReLU(True) ] model += [nn.ReplicationPad3d(3)] model += [nn.Conv3d(ngf, output_nc, kernel_size=7, padding=0)] model += [nn.Tanh()] self.model = nn.Sequential(*model)
def __init__(self, input_size, hidden_size, output_size): super(TwoLayerLR, self).__init__() self.fc1 = nn.Linear(input_size, hidden_size) self.tanh = nn.Tanh() self.fc2 = nn.Linear(hidden_size, output_size)
def __init__(self, outer_nc, inner_nc, input_nc=None, submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False): """Construct a Unet submodule with skip connections. Parameters: outer_nc (int) -- the number of filters in the outer conv layer inner_nc (int) -- the number of filters in the inner conv layer input_nc (int) -- the number of channels in input images/features submodule (UnetSkipConnectionBlock) -- previously defined submodules outermost (bool) -- if this module is the outermost module innermost (bool) -- if this module is the innermost module norm_layer -- normalization layer use_dropout (bool) -- if use dropout layers. """ super(UnetSkipConnectionBlock, self).__init__() self.outermost = outermost if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm2d else: use_bias = norm_layer == nn.InstanceNorm2d if input_nc is None: input_nc = outer_nc downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) downrelu = nn.LeakyReLU(0.2, True) downnorm = norm_layer(inner_nc) uprelu = nn.ReLU(True) upnorm = norm_layer(outer_nc) if outermost: upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1) down = [downconv] up = [uprelu, upconv, nn.Tanh()] model = down + [submodule] + up elif innermost: upconv = nn.ConvTranspose2d(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) down = [downrelu, downconv] up = [uprelu, upconv, upnorm] model = down + up else: upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) down = [downrelu, downconv, downnorm] up = [uprelu, upconv, upnorm] if use_dropout: model = down + [submodule] + up + [nn.Dropout(0.5)] else: model = down + [submodule] + up self.model = nn.Sequential(*model)
def forward(self, input): tanh = nn.Tanh() height_batch = self.preprocess(input) height_batch = height_batch.view(-1, 4 * self.ngf, 4, 4) _4x4 = height_batch _8x8 = self._4_to_8(_4x4) _16x16 = self._8_to_16(_8x8) upsample = nn.Upsample(size=(32, 32), mode='bilinear') height_batch = (tanh(self._16_to_32(_16x16)) + \ upsample(tanh(self._16_to_16(_16x16))) + \ upsample(tanh(self._8_to_8(_8x8))) + \ upsample(tanh(self._4_to_4(_4x4)))) / 4.0 height_batch = height_batch.permute(0, 3, 2, 1) if np.any(np.isnan(height_batch.data.numpy())): print('NANNANNAN') exit() if self.save_heightfield: height_batch_np = height_batch.data.numpy() height_flatten = np.zeros([32 * 8, 32 * 8, 1]) for i in range(8): for j in range(8): img = height_batch_np[8 * i + j, :, :, :] height_flatten[32 * i:32 * (i + 1), 32 * j:32 * (j + 1), :] = img image.imwrite( height_flatten.squeeze(), 'results/heightfield_gan/heightfield_%06d.png' % iteration) output = Variable(torch.zeros([input.shape[0], 1, 32, 32])) for i in range(input.shape[0]): height = torch.stack([\ Variable(torch.from_numpy(np.zeros(heightfield_res, dtype=np.float32))), height_batch[i, :, :, 0], Variable(torch.from_numpy(np.zeros(heightfield_res, dtype=np.float32)))], dim=-1) height = height.view([-1, 3]) shape_plane.vertices = plane_vertices + height if self.save_heightfield: v = shape_plane.vertices.data.numpy() ind = shape_plane.indices.data.numpy() + 1 with open('results/heightfield_gan/model_%06d_%03d.obj' \ % (self.iteration, i), 'w') as f: for vid in range(v.shape[0]): f.write('v %f %f %f\n' % (v[vid, 0], v[vid, 1], v[vid, 2])) for iid in range(ind.shape[0]): f.write('f %d %d %d\n' % (ind[iid, 0], ind[iid, 1], ind[iid, 2])) shape_plane.normals = compute_vertex_normal( shape_plane.vertices, shape_plane.indices) cam = camera.Camera(\ position = Variable(torch.from_numpy(np.array([self.xz[i][0], 3, self.xz[i][1]], dtype=np.float32))), look_at = Variable(torch.from_numpy(np.array([0, 0, 0], dtype=np.float32))), up = Variable(torch.from_numpy(np.array([0, 1, 0], dtype=np.float32))), cam_to_world = None, fov = Variable(torch.from_numpy(np.array([45.0], dtype=np.float32))), clip_near = Variable(torch.from_numpy(np.array([0.01], dtype=np.float32))), clip_far = Variable(torch.from_numpy(np.array([10000.0], dtype=np.float32))), resolution = self.resolution) args = render_pytorch.RenderFunction.serialize_scene(\ cam,materials,shapes,lights,self.resolution,4,1) render = render_pytorch.RenderFunction.apply img = render(random.randint(0, 1048576), *args) img = img.permute([2, 1, 0]) output[i, :, :, :] = img[0, :, :] return output
def __init__(self, input_size): super().__init__() self.dense = nn.Linear(input_size, input_size) self.activation = nn.Tanh()
def __init__( self, in_channels: int, latent_dim: int, hidden_dims: List = None, hidden_dims2: List = None, lr: float = 0.005, weight_decay: Optional[float] = 0, scheduler_gamma: Optional[float] = 0.95, ) -> None: super(TwoStageVAE, self).__init__(lr=lr, weight_decay=weight_decay, scheduler_gamma=scheduler_gamma) self.latent_dim = latent_dim modules = [] if hidden_dims is None: hidden_dims = [32, 64, 128, 256, 512] if hidden_dims2 is None: hidden_dims2 = [1024, 1024] # Build Encoder for h_dim in hidden_dims: modules.append( nn.Sequential( nn.Conv2d( in_channels, out_channels=h_dim, kernel_size=3, stride=2, padding=1, ), nn.BatchNorm2d(h_dim), nn.LeakyReLU(), )) in_channels = h_dim self.encoder = nn.Sequential(*modules) self.fc_mu = nn.Linear(hidden_dims[-1] * 4, latent_dim) self.fc_var = nn.Linear(hidden_dims[-1] * 4, latent_dim) # Build Decoder modules = [] self.decoder_input = nn.Linear(latent_dim, hidden_dims[-1] * 4) hidden_dims.reverse() for i in range(len(hidden_dims) - 1): modules.append( nn.Sequential( nn.ConvTranspose2d( hidden_dims[i], hidden_dims[i + 1], kernel_size=3, stride=2, padding=1, output_padding=1, ), nn.BatchNorm2d(hidden_dims[i + 1]), nn.LeakyReLU(), )) self.decoder = nn.Sequential(*modules) self.final_layer = nn.Sequential( nn.ConvTranspose2d( hidden_dims[-1], hidden_dims[-1], kernel_size=3, stride=2, padding=1, output_padding=1, ), nn.BatchNorm2d(hidden_dims[-1]), nn.LeakyReLU(), nn.Conv2d(hidden_dims[-1], out_channels=3, kernel_size=3, padding=1), nn.Tanh(), ) # ---------------------- Second VAE ---------------------------# encoder2 = [] in_channels = self.latent_dim for h_dim in hidden_dims2: encoder2.append( nn.Sequential(nn.Linear(in_channels, h_dim), nn.BatchNorm1d(h_dim), nn.LeakyReLU())) in_channels = h_dim self.encoder2 = nn.Sequential(*encoder2) self.fc_mu2 = nn.Linear(hidden_dims2[-1], self.latent_dim) self.fc_var2 = nn.Linear(hidden_dims2[-1], self.latent_dim) decoder2 = [] hidden_dims2.reverse() in_channels = self.latent_dim for h_dim in hidden_dims2: decoder2.append( nn.Sequential(nn.Linear(in_channels, h_dim), nn.BatchNorm1d(h_dim), nn.LeakyReLU())) in_channels = h_dim self.decoder2 = nn.Sequential(*decoder2)
def __init__(self, norm_layer=nn.BatchNorm2d, classes=529): super(SIGGRAPHGenerator, self).__init__() # Conv1 model1 = [ nn.Conv2d(4, 64, kernel_size=3, stride=1, padding=1, bias=True), ] model1 += [ nn.ReLU(True), ] model1 += [ nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=True), ] model1 += [ nn.ReLU(True), ] model1 += [ norm_layer(64), ] # add a subsampling operation # Conv2 model2 = [ nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=True), ] model2 += [ nn.ReLU(True), ] model2 += [ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True), ] model2 += [ nn.ReLU(True), ] model2 += [ norm_layer(128), ] # add a subsampling layer operation # Conv3 model3 = [ nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=True), ] model3 += [ nn.ReLU(True), ] model3 += [ nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), ] model3 += [ nn.ReLU(True), ] model3 += [ nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), ] model3 += [ nn.ReLU(True), ] model3 += [ norm_layer(256), ] # add a subsampling layer operation # Conv4 model4 = [ nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=True), ] model4 += [ nn.ReLU(True), ] model4 += [ nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True), ] model4 += [ nn.ReLU(True), ] model4 += [ nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True), ] model4 += [ nn.ReLU(True), ] model4 += [ norm_layer(512), ] # Conv5 model5 = [ nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True), ] model5 += [ nn.ReLU(True), ] model5 += [ nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True), ] model5 += [ nn.ReLU(True), ] model5 += [ nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True), ] model5 += [ nn.ReLU(True), ] model5 += [ norm_layer(512), ] # Conv6 model6 = [ nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True), ] model6 += [ nn.ReLU(True), ] model6 += [ nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True), ] model6 += [ nn.ReLU(True), ] model6 += [ nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=True), ] model6 += [ nn.ReLU(True), ] model6 += [ norm_layer(512), ] # Conv7 model7 = [ nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True), ] model7 += [ nn.ReLU(True), ] model7 += [ nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True), ] model7 += [ nn.ReLU(True), ] model7 += [ nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True), ] model7 += [ nn.ReLU(True), ] model7 += [ norm_layer(512), ] # Conv7 model8up = [ nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=True) ] model3short8 = [ nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), ] model8 = [ nn.ReLU(True), ] model8 += [ nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), ] model8 += [ nn.ReLU(True), ] model8 += [ nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), ] model8 += [ nn.ReLU(True), ] model8 += [ norm_layer(256), ] # Conv9 model9up = [ nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=True), ] model2short9 = [ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True), ] # add the two feature maps above model9 = [ nn.ReLU(True), ] model9 += [ nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True), ] model9 += [ nn.ReLU(True), ] model9 += [ norm_layer(128), ] # Conv10 model10up = [ nn.ConvTranspose2d(128, 128, kernel_size=4, stride=2, padding=1, bias=True), ] model1short10 = [ nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=True), ] # add the two feature maps above model10 = [ nn.ReLU(True), ] model10 += [ nn.Conv2d(128, 128, kernel_size=3, dilation=1, stride=1, padding=1, bias=True), ] model10 += [ nn.LeakyReLU(negative_slope=.2), ] # classification output model_class = [ nn.Conv2d(256, classes, kernel_size=1, padding=0, dilation=1, stride=1, bias=True), ] # regression output model_out = [ nn.Conv2d(128, 2, kernel_size=1, padding=0, dilation=1, stride=1, bias=True), ] model_out += [nn.Tanh()] self.model1 = nn.Sequential(*model1) self.model2 = nn.Sequential(*model2) self.model3 = nn.Sequential(*model3) self.model4 = nn.Sequential(*model4) self.model5 = nn.Sequential(*model5) self.model6 = nn.Sequential(*model6) self.model7 = nn.Sequential(*model7) self.model8up = nn.Sequential(*model8up) self.model8 = nn.Sequential(*model8) self.model9up = nn.Sequential(*model9up) self.model9 = nn.Sequential(*model9) self.model10up = nn.Sequential(*model10up) self.model10 = nn.Sequential(*model10) self.model3short8 = nn.Sequential(*model3short8) self.model2short9 = nn.Sequential(*model2short9) self.model1short10 = nn.Sequential(*model1short10) self.model_class = nn.Sequential(*model_class) self.model_out = nn.Sequential(*model_out) self.upsample4 = nn.Sequential(*[ nn.Upsample(scale_factor=4, mode='bilinear'), ]) self.softmax = nn.Sequential(*[ nn.Softmax(dim=1), ])
def __init__(self, config): super().__init__() self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.activation = nn.Tanh()
import numpy as np import csv import math import os.path import timeit from collections import deque import pickle from multiprocessing import Pool N, D_in, H, D_out = 4361, 22, 100, 10 # = Variable(torch.zeros(N, D_in),requires_grad=False) #y = Variable(torch.zeros(N, 3), requires_grad=False) relu = nn.ReLU() sig = nn.Sigmoid() tanh = nn.Tanh() loss_fn = nn.MSELoss() learning_rate = 0.0001 class CarData: def __init__(self, dataList): self.outputdata = [] self.sensordata = [] self.outputdata.append(dataList[0]) self.outputdata.append(dataList[1]) self.outputdata.append(dataList[2]) for i in range(3, len(dataList)): self.sensordata.append(dataList[i]) def get_output_data(self):
def __init__(self, num_input, num_hidden, num_output): super(Module_model, self).__init__() self.layer1 = nn.Linear(num_input, num_hidden) self.layer2 = nn.Tanh() self.layer3 = nn.Linear(num_hidden, num_output)
def __init__(self, out_dim): super(SAP, self).__init__() # Setup self.act_fn = nn.Tanh() self.sap_layer = SelfAttentionPooling(out_dim)
def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4., qkv_bias=True, qk_scale=None, representation_size=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., hybrid_backbone=None, norm_layer=None): """ Args: img_size (int, tuple): input image size patch_size (int, tuple): patch size in_chans (int): number of input channels num_classes (int): number of classes for classification head embed_dim (int): embedding dimension depth (int): depth of transformer num_heads (int): number of attention heads mlp_ratio (int): ratio of mlp hidden dim to embedding dim qkv_bias (bool): enable bias for qkv if True qk_scale (float): override default qk scale of head_dim ** -0.5 if set representation_size (Optional[int]): enable and set representation layer (pre-logits) to this value if set drop_rate (float): dropout rate attn_drop_rate (float): attention dropout rate drop_path_rate (float): stochastic depth rate hybrid_backbone (nn.Module): CNN backbone to use in-place of PatchEmbed module norm_layer: (nn.Module): normalization layer """ super().__init__() self.num_classes = num_classes self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6) if hybrid_backbone is not None: self.patch_embed = HybridEmbed(hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim) else: self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim) num_patches = self.patch_embed.num_patches self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embed = nn.Parameter( torch.zeros(1, num_patches + 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth) ] # stochastic depth decay rule self.blocks = nn.ModuleList([ Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer) for i in range(depth) ]) self.norm = norm_layer(embed_dim) # Representation layer if representation_size: self.num_features = representation_size self.pre_logits = nn.Sequential( OrderedDict([('fc', nn.Linear(embed_dim, representation_size)), ('act', nn.Tanh())])) else: self.pre_logits = nn.Identity() # Classifier head self.head = nn.Linear( self.num_features, num_classes) if num_classes > 0 else nn.Identity() trunc_normal_(self.pos_embed, std=.02) trunc_normal_(self.cls_token, std=.02) self.apply(self._init_weights)
def __init__(self, conv_dim=64): super(Generator, self).__init__() input_dim = 256 curr_dim = input_dim # Makeup representation fully connected layer layers_makeup = [] layers_makeup.append(nn.Linear(32, 256)) layers_makeup.append(nn.ReLU(inplace=True)) layers_makeup.append(nn.Linear(256, 256)) layers_makeup.append(nn.ReLU(inplace=True)) layers_makeup.append(nn.Linear(256, 512)) self.makeup = nn.Sequential(*layers_makeup) # Pose and Face blend layers_blend = [] for i in range(4): layers_blend.append( ResidualBlock(dim_in=input_dim * 2, dim_out=input_dim * 2)) layers_blend.append( nn.Conv2d(input_dim * 2, input_dim, kernel_size=3, stride=1, padding=1, bias=False)) layers_blend.append(nn.InstanceNorm2d(input_dim, affine=True)) layers_blend.append(nn.ReLU(inplace=True)) self.blend = nn.Sequential(*layers_blend) # Main Generator self.res_1 = ResidualBlock(dim_in=input_dim, dim_out=input_dim) self.res_2 = ResidualBlock(dim_in=input_dim, dim_out=input_dim) self.res_3 = ResidualBlock(dim_in=input_dim, dim_out=input_dim) self.res_4 = ResidualBlock(dim_in=input_dim, dim_out=input_dim) # Up-Sampling layers = [] for i in range(2): layers.append( nn.ConvTranspose2d(curr_dim, curr_dim // 2, kernel_size=4, stride=2, padding=1, bias=False)) layers.append(nn.InstanceNorm2d(curr_dim // 2, affine=True)) layers.append(nn.ReLU(inplace=True)) curr_dim = curr_dim // 2 layers.append( nn.Conv2d(curr_dim, 3, kernel_size=7, stride=1, padding=3, bias=False)) layers.append(nn.InstanceNorm2d(3, affine=True)) layers.append(nn.Tanh()) self.main = nn.Sequential(*layers)
def __init__(self, outer_nc, inner_nc, input_nc=None, submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False): super(UnetSkipConnectionBlock, self).__init__() self.outermost = outermost if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm2d else: use_bias = norm_layer == nn.InstanceNorm2d if input_nc is None: input_nc = outer_nc downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) downrelu = nn.LeakyReLU(0.2, True) downnorm = norm_layer(inner_nc) uprelu = nn.ReLU(True) upnorm = norm_layer(outer_nc) if outermost: upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1) down = [downconv] up = [uprelu, upconv, nn.Tanh()] model = down + [submodule] + up elif innermost: upconv = nn.ConvTranspose2d(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) down = [downrelu, downconv] up = [uprelu, upconv, upnorm] model = down + up else: upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias) down = [downrelu, downconv, downnorm] up = [uprelu, upconv, upnorm] if use_dropout: model = down + [submodule] + up + [nn.Dropout(0.5)] else: model = down + [submodule] + up self.model = nn.Sequential(*model)
def __init__(self, receptive_field=1024, mgc_size=60, upsample_size=200, filter_size=256, num_blocks=4): super(VocoderNetwork, self).__init__() self.RECEPTIVE_FIELD = receptive_field self.NUM_NETWORKS = 1 self.MGC_SIZE = mgc_size self.UPSAMPLE_SIZE = upsample_size self.NUM_BLOCKS = num_blocks self.convolutions = torch.nn.ModuleList( [WaveNet(self.RECEPTIVE_FIELD, mgc_size, filter_size) for ii in range(num_blocks)]) self.conditioning = nn.Sequential(nn.Linear(mgc_size, mgc_size * upsample_size), nn.Tanh()) self.pre_output = torch.nn.ModuleList([nn.Linear(filter_size, 256) for ii in range(num_blocks)]) self.mean_layer = torch.nn.ModuleList([nn.Linear(256, 1) for ii in range(num_blocks)]) self.stdev_layer = torch.nn.ModuleList([nn.Linear(256, 1) for ii in range(num_blocks)])
def __init__(self, input_dim, output_dim, kernel_size, stride, padding=0, conv_padding=0, norm='none', activation='relu', pad_type='zero', transpose=False): super(Conv2dBlock, self).__init__() self.use_bias = True # initialize padding if pad_type == 'reflect': self.pad = nn.ReflectionPad2d(padding) elif pad_type == 'replicate': self.pad = nn.ReplicationPad2d(padding) elif pad_type == 'zero': self.pad = nn.ZeroPad2d(padding) elif pad_type == 'none': self.pad = None else: assert 0, "Unsupported padding type: {}".format(pad_type) # initialize normalization norm_dim = output_dim if norm == 'bn': self.norm = nn.BatchNorm2d(norm_dim) elif norm == 'in': self.norm = nn.InstanceNorm2d(norm_dim) elif norm == 'none': self.norm = None else: assert 0, "Unsupported normalization: {}".format(norm) # initialize activation if activation == 'relu': self.activation = nn.ReLU(inplace=True) elif activation == 'lrelu': self.activation = nn.LeakyReLU(0.2, inplace=True) elif activation == 'prelu': self.activation = nn.PReLU() elif activation == 'selu': self.activation = nn.SELU(inplace=True) elif activation == 'tanh': self.activation = nn.Tanh() elif activation == 'none': self.activation = None else: assert 0, "Unsupported activation: {}".format(activation) # initialize convolution if transpose: self.conv = nn.ConvTranspose2d(input_dim, output_dim, kernel_size, stride, padding=conv_padding, output_padding=conv_padding, bias=self.use_bias) else: self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride, padding=conv_padding, bias=self.use_bias)
def __init__(self, device, w2v_weights, tag_to_itx, hidden_dim, drop_rate, bidirectional=False, freeze=True, embedding_norm=6, c2v_weights=None, pad_word_length=16, embedder="none", more_features=False): super(LstmCrf, self).__init__() self.device = device self.hidden_dim = hidden_dim self.tagset_size = len(tag_to_itx) self.embedding_dim = w2v_weights.shape[1] self.w2v_weights = w2v_weights self.c2v_weights = c2v_weights self.pad_word_length = pad_word_length self.bidirectional = bidirectional self.embedder = embedder self.more_features = more_features self.drop_rate = drop_rate self.drop = nn.Dropout(self.drop_rate) # Use the Elmo embedder instead of the classical ones. if self.embedder != "none": if self.embedder == "elmo-combined": self.embedding = ElmoCombiner() elif self.embedder == "elmo": self.embedding = ElmoCombiner(freeze=True) else: self.embedding = None self.embedding_dim = 768 if self.embedder == "bert" else 1024 else: self.embedding = nn.Embedding.from_pretrained( torch.FloatTensor(w2v_weights), freeze=freeze) self.embedding.max_norm = embedding_norm # We add the dimensionality of the other features (POS and spaCy). if self.more_features: self.embedding_dim += 58 + 18 # recurrent and mapping to tagset self.recurrent = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim // (1 if not self.bidirectional else 2), bidirectional=self.bidirectional, batch_first=True) self.bnorm = nn.BatchNorm2d(1) self.fc = nn.Linear(self.hidden_dim, self.tagset_size + 2) # + 2 because of start and end token self.bnorm2 = nn.BatchNorm2d(1) # crf for scoring at a global level self.crf = CRF(self.device, self.tagset_size) # setup convolution on characters if c2v_weights are passed if self.c2v_weights is not None: self.char_embedding_dim = c2v_weights.shape[1] self.char_embedding = nn.Embedding.from_pretrained( torch.FloatTensor(c2v_weights), freeze=True) self.char_embedding.max_norm = embedding_norm self.feats = 20 # for the output channels of the conv layers self.recurrent = nn.LSTM(self.embedding_dim + 50, self.hidden_dim // (1 if not self.bidirectional else 2), batch_first=True, bidirectional=self.bidirectional) # conv layers for single character, pairs of characters, 3x characters self.ngram1 = nn.Sequential( nn.Conv2d(1, self.feats * 1, kernel_size=(1, self.char_embedding_dim), stride=(1, self.char_embedding_dim), padding=0), nn.Dropout2d(p=self.drop_rate), nn.MaxPool2d(kernel_size=(self.pad_word_length, 1)), nn.Tanh(), ) self.ngram2 = nn.Sequential( nn.Conv2d(1, self.feats * 2, kernel_size=(2, self.char_embedding_dim), stride=(1, self.char_embedding_dim), padding=0), nn.Dropout2d(p=self.drop_rate), nn.MaxPool2d(kernel_size=(self.pad_word_length - 1, 1)), nn.Tanh(), ) self.ngram3 = nn.Sequential( nn.Conv2d(1, self.feats * 3, kernel_size=(3, self.char_embedding_dim), stride=(1, self.char_embedding_dim), padding=0), nn.Dropout2d(p=self.drop_rate), nn.MaxPool2d(kernel_size=(self.pad_word_length - 2, 1)), nn.Tanh(), ) # seq layers to elaborate on the output of conv layers self.fc1 = nn.Sequential(nn.Linear(self.feats, 10), ) self.fc2 = nn.Sequential(nn.Linear(self.feats * 2, 20), ) self.fc3 = nn.Sequential(nn.Linear(self.feats * 3, 20), )
def __init__( self, input_nc, output_nc, ngf=64, k_size=3, n_downsampling=8, norm_layer=nn.BatchNorm2d, padding_type="reflect", opt=None, ): super(GlobalGenerator_DCDCv2, self).__init__() activation = nn.ReLU(True) model = [ nn.ReflectionPad2d(3), nn.Conv2d(input_nc, min(ngf, opt.mc), kernel_size=7, padding=0), norm_layer(ngf), activation, ] ### downsample for i in range(opt.start_r): mult = 2 ** i model += [ nn.Conv2d( min(ngf * mult, opt.mc), min(ngf * mult * 2, opt.mc), kernel_size=k_size, stride=2, padding=1, ), norm_layer(min(ngf * mult * 2, opt.mc)), activation, ] for i in range(opt.start_r, n_downsampling - 1): mult = 2 ** i model += [ nn.Conv2d( min(ngf * mult, opt.mc), min(ngf * mult * 2, opt.mc), kernel_size=k_size, stride=2, padding=1, ), norm_layer(min(ngf * mult * 2, opt.mc)), activation, ] model += [ ResnetBlock( min(ngf * mult * 2, opt.mc), padding_type=padding_type, activation=activation, norm_layer=norm_layer, opt=opt, ) ] model += [ ResnetBlock( min(ngf * mult * 2, opt.mc), padding_type=padding_type, activation=activation, norm_layer=norm_layer, opt=opt, ) ] mult = 2 ** (n_downsampling - 1) if opt.spatio_size == 32: model += [ nn.Conv2d( min(ngf * mult, opt.mc), min(ngf * mult * 2, opt.mc), kernel_size=k_size, stride=2, padding=1, ), norm_layer(min(ngf * mult * 2, opt.mc)), activation, ] if opt.spatio_size == 64: model += [ ResnetBlock( min(ngf * mult * 2, opt.mc), padding_type=padding_type, activation=activation, norm_layer=norm_layer, opt=opt, ) ] model += [ ResnetBlock( min(ngf * mult * 2, opt.mc), padding_type=padding_type, activation=activation, norm_layer=norm_layer, opt=opt, ) ] # model += [nn.Conv2d(min(ngf * mult * 2, opt.mc), min(ngf, opt.mc), 1, 1)] if opt.feat_dim > 0: model += [nn.Conv2d(min(ngf * mult * 2, opt.mc), opt.feat_dim, 1, 1)] self.encoder = nn.Sequential(*model) # decode model = [] if opt.feat_dim > 0: model += [nn.Conv2d(opt.feat_dim, min(ngf * mult * 2, opt.mc), 1, 1)] # model += [nn.Conv2d(min(ngf, opt.mc), min(ngf * mult * 2, opt.mc), 1, 1)] o_pad = 0 if k_size == 4 else 1 mult = 2 ** n_downsampling model += [ ResnetBlock( min(ngf * mult, opt.mc), padding_type=padding_type, activation=activation, norm_layer=norm_layer, opt=opt, ) ] if opt.spatio_size == 32: model += [ nn.ConvTranspose2d( min(ngf * mult, opt.mc), min(int(ngf * mult / 2), opt.mc), kernel_size=k_size, stride=2, padding=1, output_padding=o_pad, ), norm_layer(min(int(ngf * mult / 2), opt.mc)), activation, ] if opt.spatio_size == 64: model += [ ResnetBlock( min(ngf * mult, opt.mc), padding_type=padding_type, activation=activation, norm_layer=norm_layer, opt=opt, ) ] for i in range(1, n_downsampling - opt.start_r): mult = 2 ** (n_downsampling - i) model += [ ResnetBlock( min(ngf * mult, opt.mc), padding_type=padding_type, activation=activation, norm_layer=norm_layer, opt=opt, ) ] model += [ ResnetBlock( min(ngf * mult, opt.mc), padding_type=padding_type, activation=activation, norm_layer=norm_layer, opt=opt, ) ] model += [ nn.ConvTranspose2d( min(ngf * mult, opt.mc), min(int(ngf * mult / 2), opt.mc), kernel_size=k_size, stride=2, padding=1, output_padding=o_pad, ), norm_layer(min(int(ngf * mult / 2), opt.mc)), activation, ] for i in range(n_downsampling - opt.start_r, n_downsampling): mult = 2 ** (n_downsampling - i) model += [ nn.ConvTranspose2d( min(ngf * mult, opt.mc), min(int(ngf * mult / 2), opt.mc), kernel_size=k_size, stride=2, padding=1, output_padding=o_pad, ), norm_layer(min(int(ngf * mult / 2), opt.mc)), activation, ] if opt.use_segmentation_model: model += [nn.ReflectionPad2d(3), nn.Conv2d(min(ngf, opt.mc), output_nc, kernel_size=7, padding=0)] else: model += [ nn.ReflectionPad2d(3), nn.Conv2d(min(ngf, opt.mc), output_nc, kernel_size=7, padding=0), nn.Tanh(), ] self.decoder = nn.Sequential(*model)
def __init__(self, channels=3): super(Generator, self).__init__() self.down1 = UNetDown(channels, 64, normalize=False) self.down2 = UNetDown(64, 128) self.down3 = UNetDown(128, 256) self.down4 = UNetDown(256, 512, dropout=0.5) self.down5 = UNetDown(512, 512, dropout=0.5) self.down6 = UNetDown(512, 512, dropout=0.5) self.down7 = UNetDown(512, 512, dropout=0.5, normalize=False) self.up1 = UNetUp(512, 512, dropout=0.5) self.up2 = UNetUp(1024, 512, dropout=0.5) self.up3 = UNetUp(1024, 512, dropout=0.5) self.up4 = UNetUp(1024, 256) self.up5 = UNetUp(512, 128) self.up6 = UNetUp(256, 64) self.final = nn.Sequential(nn.ConvTranspose2d(128, channels, 4, stride=2, padding=1), nn.Tanh())
def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect'): assert(n_blocks >= 0) super(ResnetGeneratorMM, self).__init__() self.input_nc = input_nc self.output_nc = output_nc self.ngf = ngf self.gpu_ids = gpu_ids if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm2d else: use_bias = norm_layer == nn.InstanceNorm2d model1 = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), norm_layer(ngf), nn.ReLU(True)] model2 = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), norm_layer(ngf), nn.ReLU(True)] n_downsampling = 2 for i in range(n_downsampling): mult = 2**i model1 += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), norm_layer(ngf * mult * 2), nn.ReLU(True)] model2 += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), norm_layer(ngf * mult * 2), nn.ReLU(True)] pre_f_blocks = 4 pre_l_blocks = 7 mult = 2**n_downsampling for i in range(pre_f_blocks): model1 += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] model2 += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] model_pre = [] model_post = [] for i in range(pre_f_blocks,pre_l_blocks): model_pre += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] for i in range(pre_l_blocks,n_blocks): model_post += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)] model_fusion = [] #p = 0 #if padding_type == 'reflect': # model_fusion += [nn.ReflectionPad2d(1)] #elif padding_type == 'replicate': # model_fusion += [nn.ReplicationPad2d(1)] #elif padding_type == 'zero': # p = 1 #else: # raise NotImplementedError('padding [%s] is not implemented' % padding_type) model_fusion = [nn.Conv2d(ngf * mult *2, ngf * mult , kernel_size=3, padding=1, bias=use_bias), norm_layer(ngf * mult), nn.ReLU(True)] model = [] for i in range(n_downsampling): mult = 2**(n_downsampling - i) model_post += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias), norm_layer(int(ngf * mult / 2)), nn.ReLU(True)] model_post += [nn.ReflectionPad2d(3)] model_post += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] model_post += [nn.Tanh()] self.model1 = nn.Sequential(*model1) self.model2 = nn.Sequential(*model2) self.model_pre = nn.Sequential(*model_pre) self.model_post = nn.Sequential(*model_post) self.model_fusion = nn.Sequential(*model_fusion)
def generate(self, trg_input, dec_hidden, enc_context, src_map=None, oov_list=None, max_len=1, return_attention=False): ''' Given the initial input, state and the source contexts, return the top K restuls for each time step :param trg_input: just word indexes of target texts (usually zeros indicating BOS <s>) :param dec_hidden: hidden states for decoder RNN to start with :param enc_context: context encoding vectors :param src_map: required if it's copy model :param oov_list: required if it's copy model :param k (deprecated): Top K to return :param feed_all_timesteps: it's one-step predicting or feed all inputs to run through all the time steps :param get_attention: return attention vectors? :return: ''' # assert isinstance(input_list, list) or isinstance(input_list, tuple) # assert isinstance(input_list[0], list) or isinstance(input_list[0], tuple) batch_size = trg_input.size(0) src_len = enc_context.size(1) trg_len = trg_input.size(1) context_dim = enc_context.size(2) trg_hidden_dim = self.trg_hidden_dim attn_weights = [] copy_weights = [] log_probs = [] # enc_context has to be reshaped before dot attention (batch_size, src_len, context_dim) -> (batch_size, src_len, trg_hidden_dim) enc_context = nn.Tanh()(self.encoder2decoder_hidden(enc_context.contiguous().view(-1, context_dim))).view(batch_size, src_len, trg_hidden_dim) for i in range(max_len): # print('TRG_INPUT: %s' % str(trg_input.size())) # print(trg_input.data.numpy()) trg_emb = self.embedding(trg_input) # (batch_size, trg_len = 1, emb_dim) trg_emb = trg_emb.permute(1, 0, 2) # (trg_len, batch_size, embed_dim) # (seq_len, batch_size, hidden_size * num_directions) decoder_output, dec_hidden = self.decoder( trg_emb, dec_hidden ) # Get the h_tilde (hidden after attention) and attention weights h_tilde, attn_weight, attn_logit = self.attention_layer(decoder_output.permute(1, 0, 2), enc_context) # compute the output decode_logit and read-out as probs: p_x = Softmax(W_s * h_tilde) # (batch_size, trg_len, trg_hidden_size) -> (batch_size, 1, vocab_size) decoder_logit = self.decoder2vocab(h_tilde.view(-1, trg_hidden_dim)) if not hasattr(self, 'copy_model'): decoder_log_prob = torch.nn.functional.log_softmax(decoder_logit, dim=-1).view(batch_size, 1, self.vocab_size) else: decoder_logit = decoder_logit.view(batch_size, 1, self.vocab_size) # copy_weights and copy_logits is (batch_size, trg_len, src_len) if self.copy_attention_layer: _, copy_weight, copy_logit = self.copy_attention_layer(decoder_output.permute(1, 0, 2), enc_context) else: copy_weight = attn_weight copy_logit = attn_logit copy_weights.append(copy_weight.permute(1, 0, 2)) # (1, batch_size, src_len) # merge the generative and copying probs (batch_size, 1, vocab_size + max_unk_word) decoder_log_prob = self.merge_copy_probs(decoder_logit, copy_logit, src_map, oov_list) # Prepare for the next iteration, get the top word, top_idx and next_index are (batch_size, K) top_1_v, top_1_idx = decoder_log_prob.data.topk(1, dim=-1) # (batch_size, 1) trg_input = Variable(top_1_idx.squeeze(2)) # trg_input = Variable(top_1_idx).cuda() if torch.cuda.is_available() else Variable(top_1_idx) # (batch_size, 1) # append to return lists log_probs.append(decoder_log_prob.permute(1, 0, 2)) # (1, batch_size, vocab_size) attn_weights.append(attn_weight.permute(1, 0, 2)) # (1, batch_size, src_len) # permute to trg_len first, otherwise the cat operation would mess up things log_probs = torch.cat(log_probs, 0).permute(1, 0, 2) # (batch_size, max_len, K) attn_weights = torch.cat(attn_weights, 0).permute(1, 0, 2) # (batch_size, max_len, src_seq_len) # Only return the hidden vectors of the last time step. # tuple of (num_layers * num_directions, batch_size, trg_hidden_dim)=(1, batch_size, trg_hidden_dim) # Return final outputs, hidden states, and attention weights (for visualization) if return_attention: if not hasattr(self, 'copy_model'): return log_probs, dec_hidden, attn_weights else: copy_weights = torch.cat(copy_weights, 0).permute(1, 0, 2) # (batch_size, max_len, src_seq_len) return log_probs, dec_hidden, (attn_weights, copy_weights) else: return log_probs, dec_hidden