def __init__(self, inference_network_type, src_embeddings, tgt_embeddings, rnn_type, src_layers, tgt_layers, rnn_size, dropout, attn_type="mlp", dist_type="none", norm_alpha=1.0, norm_beta=1.0, normalization="bn"): super(InferenceNetwork, self).__init__() self.attn_type = attn_type self.dist_type = dist_type self.inference_network_type = inference_network_type self.normalization = normalization # trainable alpha and beta self.mean_norm_alpha = nn.Parameter(torch.FloatTensor([1.])) self.mean_norm_beta = nn.Parameter(torch.FloatTensor([0.])) self.std_norm_alpha = nn.Parameter(torch.FloatTensor([1.])) self.std_norm_beta = nn.Parameter(torch.FloatTensor([0.])) if dist_type == "none": self.mask_val = float("-inf") else: self.mask_val = 1e-2 if inference_network_type == 'embedding_only': #self.src_encoder = src_embeddings self.tgt_encoder = tgt_embeddings elif inference_network_type == 'brnn': #self.src_encoder = RNNEncoder(rnn_type, True, src_layers, rnn_size, # dropout, src_embeddings, False) self.tgt_encoder = RNNEncoder(rnn_type, True, tgt_layers, rnn_size, dropout, tgt_embeddings, False) elif inference_network_type == 'rnn': #self.src_encoder = RNNEncoder(rnn_type, False, src_layers, rnn_size, # dropout, src_embeddings, False) self.tgt_encoder = RNNEncoder(rnn_type, False, tgt_layers, rnn_size, dropout, tgt_embeddings, False) self.W = torch.nn.Linear(rnn_size, rnn_size) self.rnn_size = rnn_size # to parametrize log normal distribution H = rnn_size if self.attn_type == "general": self.linear_in = nn.Linear(H, H, bias=False) if self.dist_type == "normal": self.W_mu = self.linear_in self.W_sigma = nn.Linear(H, H, bias=False) elif self.attn_type == "mlpadd": self.linear_context = nn.Linear(H, H, bias=False) self.linear_query = nn.Linear(H, H, bias=True) self.v = nn.Linear(H, 1, bias=False) if self.dist_type == "normal": self.v_mu = self.v self.v_sigma = nn.Linear(H, 1, bias=False) elif self.attn_type == "mlp": if self.dist_type == "normal": # TODO(demi): make 100 configurable self.linear_1 = nn.Linear(rnn_size + rnn_size, 500) self.linear_2 = nn.Linear(500, 500) self.mean_out = nn.Linear(500, 1) self.std_out = nn.Linear(500, 1) self.softplus = nn.Softplus() elif self.attn_type == "dotmlp": self.linear_in = nn.Linear(H, H, bias=False) if self.dist_type == "normal": self.W_mu = self.linear_in pass # unfinished if self.normalization == "bn": if self.dist_type == "normal": self.bn_mu = nn.BatchNorm1d(1, affine=True) self.bn_std = nn.BatchNorm1d(1, affine=True) elif self.normalization == "ln": if self.dist_type == "normal": self.mean_norm_alpha = nn.Parameter(torch.Tensor([1])) self.std_norm_alpha = nn.Parameter(torch.Tensor([1])) self.mean_norm_beta = nn.Parameter(torch.Tensor([0])) self.std_norm_beta = nn.Parameter(torch.Tensor([0])) elif self.normalization == "lnsigma": if self.dist_type == "normal": self.mean_norm_beta = nn.Parameter(torch.Tensor([0])) self.std_norm_beta = nn.Parameter(torch.Tensor([0]))
class Net(torch.nn.Module): def __init__(self, n_feature, n_hidden): super(Net, self).__init__() self.hidden = torch.nn.Linear(n_feature, n_hidden) # hidden layer self.predict = torch.nn.Linear(n_hidden, 1) # output layer def forward(self, x): x = F.relu(self.hidden(x)) x = self.predict(x) return x X_train.shape N = 1236 p = 3 softplus = nn.Softplus() regression_model = model #Net(p,p) regression_model = RegressionModel(2) loss_fn = torch.nn.MSELoss()#MSELoss(reduction='sum') optim = torch.optim.SGD(regression_model.parameters(), lr=0.01)#torch.optim.Adam(regression_model.parameters(), lr=0.1) num_iterations = 200000 for j in range(num_iterations): # run the model forward on the data y_pred = regression_model(x_data).squeeze(-1) # calculate the mse loss loss = loss_fn(y_pred, y_data) # initialize gradients to zero optim.zero_grad()
def __init__(self, block, num_blocks, num_classes=10, activation='ReLU', softplus_beta=1, out_dim=10, use_BN=False, along=False): super(PreActResNet_twobranch_DenseV1Multi, self).__init__() self.in_planes = 64 self.activation = activation self.softplus_beta = softplus_beta self.along = along self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.bn = normal_func(512 * block.expansion, track_running_stats=track_running_stats, affine=affine) self.linear = nn.Linear(512 * block.expansion, num_classes) ### Multi self.conv_layer1 = nn.Sequential( nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(512), nn.ReLU()) #nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0)) self.conv_layer2 = nn.Sequential( nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(512), nn.ReLU()) #nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0)) self.conv_layer3 = nn.Sequential( nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(512), nn.ReLU()) #nn.Conv2d(512, 512, kernel_size=1, stride=1, padding=0)) if use_BN: self.dense = nn.Sequential( nn.Linear(512 * block.expansion * 4, 512 * block.expansion), nn.BatchNorm1d(512 * block.expansion), nn.ReLU(), nn.Linear(512 * block.expansion, out_dim)) print('with BN') else: self.dense = nn.Sequential( nn.Linear(512 * block.expansion * 4, 512 * block.expansion), nn.ReLU(), nn.Linear(512 * block.expansion, out_dim)) if activation == 'ReLU': # self.relu = nn.ReLU(inplace=True) self.relu = nn.ReLU() print('ReLU') elif activation == 'Softplus': self.relu = nn.Softplus(beta=softplus_beta, threshold=20) print('Softplus') print('Use activation of ' + activation)
def trainModelNoPrint(r_model): predTain = np.argmax(r_model(X).detach().numpy(), axis=1) targetTrain = Y.numpy() results = precision_recall_fscore_support(predTain, targetTrain) return np.mean(results[2]) # define our possible activation functions possibleActivations = [ nn.LeakyReLU(), nn.Sigmoid(), nn.Tanh(), nn.ReLU(), nn.PReLU(), nn.SELU(), nn.Softplus() ] # An example of a representation of our neural network nnHypers = dict() nnHypers['lr'] = 0.08 nnHypers['neurons'] = np.array([25, 30]) nnHypers['dropout'] = np.array([0, 0.25]) nnHypers['bias'] = True nnHypers['epochs'] = 2500 nnHypers['inputsize'] = train_input.shape[1] nnHypers['activation'] = 6 nnHypers['outputsize'] = 4 nnHypers['lasthidden'] = 30
def main(opts): # Create the data loader loader = sunnerData.DataLoader(sunnerData.ImageDataset( root=[[opts.path]], transform=transforms.Compose([ sunnertransforms.Resize((1024, 1024)), sunnertransforms.ToTensor(), sunnertransforms.ToFloat(), sunnertransforms.Transpose(sunnertransforms.BHWC2BCHW), sunnertransforms.Normalize(), ])), batch_size=opts.batch_size, shuffle=True, ) # Create the model start_epoch = 0 G = StyleGenerator().to(opts.device) D = StyleDiscriminator().to(opts.device) # Load the pre-trained weight if os.path.exists(opts.resume): INFO("Load the pre-trained weight!") state = torch.load(opts.resume) G.load_state_dict(state['G']) D.load_state_dict(state['D']) start_epoch = state['start_epoch'] else: INFO("Pre-trained weight cannot load successfully, train from scratch!") # Create the criterion, optimizer and scheduler optim_D = optim.Adam(D.parameters(), lr=0.0001, betas=(0.5, 0.999)) optim_G = optim.Adam(G.parameters(), lr=0.0001, betas=(0.5, 0.999)) scheduler_D = optim.lr_scheduler.ExponentialLR(optim_D, gamma=0.99) scheduler_G = optim.lr_scheduler.ExponentialLR(optim_G, gamma=0.99) # Train fix_z = torch.randn([opts.batch_size, 512]).to(opts.device) softplus = nn.Softplus() Loss_D_list = [0.0] Loss_G_list = [0.0] for ep in range(start_epoch, opts.epoch): bar = tqdm(loader) loss_D_list = [] loss_G_list = [] for i, (real_img,) in enumerate(bar): # ======================================================================================================= # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) # ======================================================================================================= # Compute adversarial loss toward discriminator D.zero_grad() real_img = real_img.to(opts.device) real_logit = D(real_img) fake_img = G(torch.randn([real_img.size(0), 512]).to(opts.device)) fake_logit = D(fake_img.detach()) d_loss = softplus(fake_logit).mean() d_loss = d_loss + softplus(-real_logit).mean() if opts.r1_gamma != 0.0: r1_penalty = R1Penalty(real_img.detach(), D) d_loss = d_loss + r1_penalty * (opts.r1_gamma * 0.5) if opts.r2_gamma != 0.0: r2_penalty = R2Penalty(fake_img.detach(), D) d_loss = d_loss + r2_penalty * (opts.r2_gamma * 0.5) loss_D_list.append(d_loss.item()) # Update discriminator d_loss.backward() optim_D.step() # ======================================================================================================= # (2) Update G network: maximize log(D(G(z))) # ======================================================================================================= G.zero_grad() fake_logit = D(fake_img) g_loss = softplus(-fake_logit).mean() loss_G_list.append(g_loss.item()) # Update generator g_loss.backward() optim_G.step() # Output training stats bar.set_description("Epoch {} [{}, {}] [G]: {} [D]: {}".format(ep, i+1, len(loader), loss_G_list[-1], loss_D_list[-1])) # Save the result Loss_G_list.append(np.mean(loss_G_list)) Loss_D_list.append(np.mean(loss_D_list)) # Check how the generator is doing by saving G's output on fixed_noise with torch.no_grad(): fake_img = G(fix_z).detach().cpu() save_image(fake_img, os.path.join(opts.det, 'images', str(ep) + '.png'), nrow=4, normalize=True) # Save model state = { 'G': G.state_dict(), 'D': D.state_dict(), 'Loss_G': Loss_G_list, 'Loss_D': Loss_D_list, 'start_epoch': ep, } torch.save(state, os.path.join(opts.det, 'models', 'latest.pth')) scheduler_D.step() scheduler_G.step() # Plot the total loss curve Loss_D_list = Loss_D_list[1:] Loss_G_list = Loss_G_list[1:] plotLossCurve(opts, Loss_D_list, Loss_G_list)
def __init__(self, n_data=100, T = 10, factor_dim=10, z_dim=5, u_dim=2, zF_dim=5, n_class=5, sigma_obs = 1e-2, image_dims = None, maxima_locs = None, voxel_locations = None, use_cuda=False): super(DMFA, self).__init__() self.im_dims = image_dims # observation noise self.sig_obs = sigma_obs # 3D coordinates of voxels: # of voxels times 3 self.voxl_locs = voxel_locations # instantiate pytorch modules used in the model and guide below self.temp = TemporalFactors() self.trans = GatedTransition() self.spat = SpatialFactors(factor_dim, zF_dim) """ # define uniform pior p(c) """ self.p_c = torch.ones(n_class) / n_class """ # define Gaussian pior p(z_F) """ self.p_z_F_mu = torch.zeros(zF_dim) self.p_z_F_sig = torch.ones(zF_dim) """ # define trainable parameters that help define # the probability distribution p(z_0|c) """ self.z_0_mu = torch.zeros(n_class, z_dim) self.z_0_sig = torch.ones(n_class, z_dim) """ # define trainable parameters that help define # the probability distributions for inference # q(c), q(z_0)...q(z_T), q(w_1)...q(w_T), q(z_F), q(F_loc), q(F_scale) """ self.softmax = nn.Softmax(dim = 1) self.softplus = nn.Softplus() self.q_c = torch.ones(n_data, n_class) / n_class self.q_z_0_mu = nn.Parameter(torch.rand(n_data, z_dim)- 1/2) init_sig = ((torch.ones(n_data, z_dim) / (2 * n_class) * 0.1).exp() - 1).log() self.q_z_0_sig = nn.Parameter(init_sig) self.q_z_mu = nn.Parameter(torch.rand(n_data, T, z_dim) - 1/2) init_sig = ((torch.ones(n_data, T, z_dim) / (2 * n_class) * 0.1).exp() - 1).log() self.q_z_sig = nn.Parameter(init_sig) self.q_w_mu = nn.Parameter(torch.rand(n_data, T, factor_dim)- 1/2) init_sig = ((torch.ones(n_data, T, factor_dim) / (2 * n_class) * 0.1).exp() - 1).log() self.q_w_sig = nn.Parameter(init_sig) self.q_z_F_mu = nn.Parameter(torch.zeros(zF_dim)) init_sig = (torch.ones(zF_dim).exp() - 1).log() self.q_z_F_sig = nn.Parameter(init_sig) if maxima_locs is not None: self.q_F_loc_mu = nn.Parameter(torch.FloatTensor(maxima_locs[::len(maxima_locs)//factor_dim][:factor_dim])) else: self.q_F_loc_mu = nn.Parameter((torch.rand(factor_dim, 3) - 1/2) * torch.FloatTensor(image_dims)) self.q_F_loc_mu.data = torch.FloatTensor([[5,5,5],[-5,-5,-5]]) init_sig = ((torch.ones(factor_dim, 3) * torch.FloatTensor(image_dims) / (2 * factor_dim) * 0.02).exp() - 1).log() self.q_F_loc_sig = init_sig init_sig = ((torch.FloatTensor([2, 5.5])).exp() - 1).log() self.q_F_scale_mu = nn.Parameter(init_sig) init_sig = ((self.softplus(self.q_F_scale_mu).data.mean() * 0.05).exp() - 1).log() #Edited self.q_F_scale_sig = init_sig self.use_cuda = use_cuda # if on gpu cuda-ize all pytorch (sub)modules if use_cuda: self.cuda()
def __init__(self, input_size, hidden_size_1, num_classes): super(Net1, self).__init__() self.fc1 = nn.Linear(input_size, hidden_size_1) self.a = nn.Softplus() self.fc2 = nn.Linear(hidden_size_1, num_classes)
def __init__(self, outputs, inputs): super(BBBSqueezeNet, self).__init__() self.conv1 = BBBConv2d(inputs, 64, kernel_size=3, stride=2) self.soft1 = nn.Softplus() self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) # Fire module 1 self.squeeze1 = BBBConv2d(64, 16, kernel_size=1) self.squeeze_activation1 = nn.Softplus() self.expand3x3_1 = BBBConv2d(16, 128, kernel_size=3, padding=1) self.expand3x3_activation1 = nn.Softplus() # Fire module 2 self.squeeze2 = BBBConv2d(128, 16, kernel_size=1) self.squeeze_activation2 = nn.Softplus() self.expand3x3_2 = BBBConv2d(16, 128, kernel_size=3, padding=1) self.expand3x3_activation2 = nn.Softplus() self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) # Fire module 3 self.squeeze3 = BBBConv2d(128, 32, kernel_size=1) self.squeeze_activation3 = nn.Softplus() self.expand3x3_3 = BBBConv2d(32, 256, kernel_size=3, padding=1) self.expand3x3_activation3 = nn.Softplus() # Fire module 4 self.squeeze4 = BBBConv2d(256, 32, kernel_size=1) self.squeeze_activation4 = nn.Softplus() self.expand3x3_4 = BBBConv2d(32, 256, kernel_size=3, padding=1) self.expand3x3_activation4 = nn.Softplus() self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) # Fire module 5 self.squeeze5 = BBBConv2d(256, 48, kernel_size=1) self.squeeze_activation5 = nn.Softplus() self.expand3x3_5 = BBBConv2d(48, 384, kernel_size=3, padding=1) self.expand3x3_activation5 = nn.Softplus() # Fire module 6 self.squeeze6 = BBBConv2d(384, 48, kernel_size=1) self.squeeze_activation6 = nn.Softplus() self.expand3x3_6 = BBBConv2d(48, 384, kernel_size=3, padding=1) self.expand3x3_activation6 = nn.Softplus() # Fire module 7 self.squeeze7 = BBBConv2d(384, 64, kernel_size=1) self.squeeze_activation7 = nn.Softplus() self.expand3x3_7 = BBBConv2d(64, 512, kernel_size=3, padding=1) self.expand3x3_activation7 = nn.Softplus() # Fire module 8 self.squeeze8 = BBBConv2d(512, 64, kernel_size=1) self.squeeze_activation8 = nn.Softplus() self.expand3x3_8 = BBBConv2d(64, 512, kernel_size=3, padding=1) self.expand3x3_activation8 = nn.Softplus() self.drop1 = nn.Dropout(p=0.5) self.conv2 = BBBConv2d(512, outputs, kernel_size=1) self.soft2 = nn.Softplus() self.flatten = FlattenLayer(13 * 13 * 100) self.fc1 = BBBLinearFactorial(13 * 13 * 100, outputs) layers = [ self.conv1, self.soft1, self.pool1, self.squeeze1, self.squeeze_activation1, self.expand3x3_1, self.expand3x3_activation1, self.squeeze2, self.squeeze_activation2, self.expand3x3_2, self.expand3x3_activation2, self.pool2, self.squeeze3, self.squeeze_activation3, self.expand3x3_3, self.expand3x3_activation3, self.squeeze4, self.squeeze_activation4, self.expand3x3_4, self.expand3x3_activation4, self.pool3, self.squeeze5, self.squeeze_activation5, self.expand3x3_5, self.expand3x3_activation5, self.squeeze6, self.squeeze_activation6, self.expand3x3_6, self.expand3x3_activation6, self.squeeze7, self.squeeze_activation7, self.expand3x3_7, self.expand3x3_activation7, self.squeeze8, self.squeeze_activation8, self.expand3x3_8, self.expand3x3_activation8, self.drop1, self.conv2, self.soft2, self.flatten, self.fc1 ] self.layers = nn.ModuleList(layers)
def train(config): gpu_manage(config) ### DATASET LOAD ### print('===> Loading datasets') dataset = Dataset(config) print('dataset:', len(dataset)) train_size = int(0.6 * len(dataset)) test_size = len(dataset) - train_size train_dataset, test_dataset = torch.utils.data.random_split( dataset, [train_size, test_size]) print('train dataset:', len(train_dataset)) print('test dataset:', len(test_dataset)) training_data_loader = DataLoader(dataset=train_dataset, num_workers=config.threads, batch_size=config.batchsize, shuffle=True) test_data_loader = DataLoader(dataset=test_dataset, num_workers=config.threads, batch_size=config.test_batchsize, shuffle=False) ### MODELS LOAD ### print('===> Loading models') if config.gen_model == 'unet': gen = UNet(in_ch=config.in_ch, out_ch=config.out_ch, gpu_ids=config.gpu_ids) else: print('The generator model does not exist') if config.gen_init is not None: param = torch.load(config.gen_init) gen.load_state_dict(param) print('load {} as pretrained model'.format(config.gen_init)) dis = Discriminator(in_ch=config.in_ch, out_ch=config.out_ch, gpu_ids=config.gpu_ids) if config.dis_init is not None: param = torch.load(config.dis_init) dis.load_state_dict(param) print('load {} as pretrained model'.format(config.dis_init)) # setup optimizer opt_gen = optim.Adam(gen.parameters(), lr=config.lr, betas=(config.beta1, 0.999), weight_decay=0.00001) opt_dis = optim.Adam(dis.parameters(), lr=config.lr, betas=(config.beta1, 0.999), weight_decay=0.00001) real_a = torch.FloatTensor(config.batchsize, config.in_ch, 256, 256) real_b = torch.FloatTensor(config.batchsize, config.out_ch, 256, 256) criterionL1 = nn.L1Loss() criterionMSE = nn.MSELoss() criterionSoftplus = nn.Softplus() if config.cuda: gen = gen.cuda(0) dis = dis.cuda(0) criterionL1 = criterionL1.cuda(0) criterionMSE = criterionMSE.cuda(0) criterionSoftplus = criterionSoftplus.cuda(0) real_a = real_a.cuda(0) real_b = real_b.cuda(0) real_a = Variable(real_a) real_b = Variable(real_b) logreport = LogReport(log_dir=config.out_dir) testreport = TestReport(log_dir=config.out_dir) print('===> begin') # main for epoch in range(1, config.epoch + 1): for iteration, batch in enumerate(training_data_loader, 1): real_a_cpu, real_b_cpu = batch[0], batch[1] real_a.data.resize_(real_a_cpu.size()).copy_(real_a_cpu) real_b.data.resize_(real_b_cpu.size()).copy_(real_b_cpu) fake_b = gen.forward(real_a) ################ ### Update D ### ################ opt_dis.zero_grad() # train with fake fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = dis.forward(fake_ab.detach()) batchsize, _, w, h = pred_fake.size() loss_d_fake = torch.sum( criterionSoftplus(pred_fake)) / batchsize / w / h # train with real real_ab = torch.cat((real_a, real_b), 1) pred_real = dis.forward(real_ab) loss_d_real = torch.sum( criterionSoftplus(-pred_real)) / batchsize / w / h # Combined loss loss_d = loss_d_fake + loss_d_real loss_d.backward() if epoch % config.minimax == 0: opt_dis.step() ################ ### Update G ### ################ opt_gen.zero_grad() # First, G(A) should fake the discriminator fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = dis.forward(fake_ab) loss_g_gan = torch.sum( criterionSoftplus(-pred_fake)) / batchsize / w / h # Second, G(A) = B loss_g_l1 = criterionL1(fake_b, real_b) * config.lamb loss_g = loss_g_gan + loss_g_l1 loss_g.backward() opt_gen.step() # log if iteration % 10 == 0: print( "===> Epoch[{}]({}/{}): loss_d_fake: {:.4f} loss_d_real: {:.4f} loss_g_gan: {:.4f} loss_g_l1: {:.4f}" .format(epoch, iteration, len(training_data_loader), loss_d_fake.item(), loss_d_real.item(), loss_g_gan.item(), loss_g_l1.item())) log = {} log['epoch'] = epoch log['iteration'] = len(training_data_loader) * (epoch - 1) + iteration log['gen/loss'] = loss_g.item() log['dis/loss'] = loss_d.item() logreport(log) print('epoch', epoch, 'finished') with torch.no_grad(): log_test = test(config, test_data_loader, gen, criterionMSE, epoch) testreport(log_test) print('test finished') if epoch % config.snapshot_interval == 0: checkpoint(config, epoch, gen, dis) logreport.save_lossgraph() testreport.save_lossgraph()
def __init__(self, l2_reg=0.02, angle_bound=1., lambda_ang=2): super(AngularLoss, self).__init__() self.l2_reg = l2_reg self.angle_bound = angle_bound self.lambda_ang = lambda_ang self.softplus = nn.Softplus()
def __init__(self, input_dim, hidden_dim): super(ContinuousLSTMCell, self).__init__() self.linear = nn.Linear(input_dim, 7 * hidden_dim) self.linear_hidden = nn.Linear(hidden_dim, 7 * hidden_dim) #self.scaled_softplus = ScaledSoftplus(1) self.nonlinearity = nn.Softplus()
def __init__(self, input_dim, hidden_dim, output_dim, rnn_layer_num, conv_type='GCN', bias=True): super(VGRNN, self).__init__() self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim self.rnn_layer_num = rnn_layer_num self.conv_type = conv_type self.bias = bias self.method_name = 'VGRNN' assert conv_type in ['GCN', 'SAGE', 'GIN'] if conv_type == 'GCN': self.phi_x = nn.Sequential( nn.Linear(input_dim, hidden_dim, bias=bias), nn.ReLU()) self.phi_z = nn.Sequential( nn.Linear(output_dim, hidden_dim, bias=bias), nn.ReLU()) self.enc = GCNConv(hidden_dim + hidden_dim, hidden_dim, bias=bias) self.enc_mean = GCNConv(hidden_dim, output_dim, act=lambda x: x, bias=bias) self.enc_std = GCNConv(hidden_dim, output_dim, act=F.softplus, bias=bias) self.prior = nn.Sequential( nn.Linear(hidden_dim, hidden_dim, bias=bias), nn.ReLU()) self.prior_mean = nn.Sequential( nn.Linear(hidden_dim, output_dim, bias=bias)) self.prior_std = nn.Sequential( nn.Linear(hidden_dim, output_dim, bias=bias), nn.Softplus()) self.rnn = graph_gru_gcn(hidden_dim + hidden_dim, hidden_dim, rnn_layer_num, bias=bias) elif conv_type == 'SAGE': self.phi_x = nn.Sequential( nn.Linear(input_dim, hidden_dim, bias=bias), nn.ReLU()) self.phi_z = nn.Sequential( nn.Linear(output_dim, hidden_dim, bias=bias), nn.ReLU()) self.enc = SAGEConv(hidden_dim + hidden_dim, hidden_dim, bias=bias) self.enc_mean = SAGEConv(hidden_dim, output_dim, act=lambda x: x, bias=bias) self.enc_std = SAGEConv(hidden_dim, output_dim, act=F.softplus, bias=bias) self.prior = nn.Sequential( nn.Linear(hidden_dim, hidden_dim, bias=bias), nn.ReLU()) self.prior_mean = nn.Sequential( nn.Linear(hidden_dim, output_dim, bias=bias)) self.prior_std = nn.Sequential( nn.Linear(hidden_dim, output_dim, bias=bias), nn.Softplus()) self.rnn = graph_gru_sage(hidden_dim + hidden_dim, hidden_dim, rnn_layer_num, bias=bias) else: # 'GIN': self.phi_x = nn.Sequential( nn.Linear(input_dim, hidden_dim, bias=bias), nn.ReLU()) self.phi_z = nn.Sequential( nn.Linear(output_dim, hidden_dim, bias=bias), nn.ReLU()) self.enc = GINConv( nn.Sequential( nn.Linear(hidden_dim + hidden_dim, hidden_dim, bias=bias), nn.ReLU())) self.enc_mean = GINConv( nn.Sequential(nn.Linear(hidden_dim, output_dim, bias=bias))) self.enc_std = GINConv( nn.Sequential(nn.Linear(hidden_dim, output_dim, bias=bias), nn.Softplus())) self.prior = nn.Sequential( nn.Linear(hidden_dim, hidden_dim, bias=bias), nn.ReLU()) self.prior_mean = nn.Sequential( nn.Linear(hidden_dim, output_dim, bias=bias)) self.prior_std = nn.Sequential( nn.Linear(hidden_dim, output_dim, bias=bias), nn.Softplus()) self.rnn = graph_gru_gcn(hidden_dim + hidden_dim, hidden_dim, rnn_layer_num, bias=bias) self.dec = InnerProductDecoder(act=lambda x: x)
def __init__(self, config): super(VHCR, self).__init__() self.config = config self.encoder = layers.EncoderRNN(config.vocab_size, config.embedding_size, config.encoder_hidden_size, config.rnn, config.num_layers, config.bidirectional, config.dropout) context_input_size = (config.num_layers * config.encoder_hidden_size * self.encoder.num_directions + config.z_conv_size) self.context_encoder = layers.ContextRNN(context_input_size, config.context_size, config.rnn, config.num_layers, config.dropout) self.unk_sent = nn.Parameter(torch.randn(context_input_size - config.z_conv_size)) self.z_conv2context = layers.FeedForward(config.z_conv_size, config.num_layers * config.context_size, num_layers=1, activation=config.activation) context_input_size = (config.num_layers * config.encoder_hidden_size * self.encoder.num_directions) self.context_inference = layers.ContextRNN(context_input_size, config.context_size, config.rnn, config.num_layers, config.dropout, bidirectional=True) self.decoder = layers.DecoderRNN(config.vocab_size, config.embedding_size, config.decoder_hidden_size, config.rnncell, config.num_layers, config.dropout, config.word_drop, config.max_unroll, config.sample, config.temperature, config.beam_size) self.context2decoder = layers.FeedForward(config.context_size + config.z_sent_size + config.z_conv_size, config.num_layers * config.decoder_hidden_size, num_layers=1, activation=config.activation) self.softplus = nn.Softplus() self.conv_posterior_h = layers.FeedForward(config.num_layers * self.context_inference.num_directions * config.context_size, config.context_size, num_layers=2, hidden_size=config.context_size, activation=config.activation) self.conv_posterior_mu = nn.Linear(config.context_size, config.z_conv_size) self.conv_posterior_var = nn.Linear(config.context_size, config.z_conv_size) self.sent_prior_h = layers.FeedForward(config.context_size + config.z_conv_size, config.context_size, num_layers=1, hidden_size=config.z_sent_size, activation=config.activation) self.sent_prior_mu = nn.Linear(config.context_size, config.z_sent_size) self.sent_prior_var = nn.Linear(config.context_size, config.z_sent_size) self.sent_posterior_h = layers.FeedForward(config.z_conv_size + config.encoder_hidden_size * self.encoder.num_directions * config.num_layers + config.context_size, config.context_size, num_layers=2, hidden_size=config.context_size, activation=config.activation) self.sent_posterior_mu = nn.Linear(config.context_size, config.z_sent_size) self.sent_posterior_var = nn.Linear(config.context_size, config.z_sent_size) if config.tie_embedding: self.decoder.embedding = self.encoder.embedding
def __init__(self, config): super(VHRED, self).__init__() self.config = config self.encoder = layers.EncoderRNN(config.vocab_size, config.embedding_size, config.encoder_hidden_size, config.rnn, config.num_layers, config.bidirectional, config.dropout) context_input_size = (config.num_layers * config.encoder_hidden_size * self.encoder.num_directions) self.context_encoder = layers.ContextRNN(context_input_size, config.context_size, config.rnn, config.num_layers, config.dropout) self.decoder = layers.DecoderRNN(config.vocab_size, config.embedding_size, config.decoder_hidden_size, config.rnncell, config.num_layers, config.dropout, config.word_drop, config.max_unroll, config.sample, config.temperature, config.beam_size) self.context2decoder = layers.FeedForward(config.context_size + config.z_sent_size, config.num_layers * config.decoder_hidden_size, num_layers=1, activation=config.activation) self.softplus = nn.Softplus() self.prior_h = layers.FeedForward(config.context_size, config.context_size, num_layers=2, hidden_size=config.context_size, activation=config.activation) self.prior_mu = nn.Linear(config.context_size, config.z_sent_size) self.prior_var = nn.Linear(config.context_size, config.z_sent_size) self.posterior_h = layers.FeedForward(config.encoder_hidden_size * self.encoder.num_directions * config.num_layers + config.context_size, config.context_size, num_layers=2, hidden_size=config.context_size, activation=config.activation) self.posterior_mu = nn.Linear(config.context_size, config.z_sent_size) self.posterior_var = nn.Linear(config.context_size, config.z_sent_size) if config.tie_embedding: self.decoder.embedding = self.encoder.embedding if config.bow: self.bow_h = layers.FeedForward(config.z_sent_size, config.decoder_hidden_size, num_layers=1, hidden_size=config.decoder_hidden_size, activation=config.activation) self.bow_predict = nn.Linear(config.decoder_hidden_size, config.vocab_size)
from typing import Union import torch from torch import nn Activation = Union[str, nn.Module] _str_to_activation = { 'relu': nn.ReLU(), 'tanh': nn.Tanh(), 'leaky_relu': nn.LeakyReLU(), 'sigmoid': nn.Sigmoid(), 'selu': nn.SELU(), 'softplus': nn.Softplus(), 'identity': nn.Identity(), } def build_mlp( input_size: int, output_size: int, n_layers: int, size: int, activation: Activation = 'tanh', output_activation: Activation = 'identity', ) -> nn.Module: """ Builds a feedforward neural network arguments:
def __init__(self, svhn_path, curlfrac=0.5, supfrac=0.5, k=1, shuffle=True, augment=False, use_cuda=False, dload_dataset=False): self.k = k self.softplus = nn.Softplus() self.bulk = Net_Bulk() self.head = Net_Head() normalize = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) augcolor = [ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5) ] augaffine = [ transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)) ] augtrans = transforms.Compose([ transforms.RandomApply(augcolor, p=0.8), transforms.RandomApply(augaffine, p=0.8), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) contrasttrans = transforms.Compose([ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) if augment: transform = augtrans else: transform = normalize self.suptrainset = datasets.SVHN(svhn_path, split='train', transform=transform, target_transform=None, download=dload_dataset) self.testset = datasets.SVHN(svhn_path, split='test', transform=normalize, target_transform=None, download=dload_dataset) if curlfrac + supfrac > 1.0: print("CURL fraction plus SUP fraction cannot exceed 1") print("Setting to defaults") curlfrac, supfrac = 0.5, 0.5 trainset_size = len(self.suptrainset) indices = list(range(trainset_size)) end = int(np.floor((curlfrac + supfrac) * trainset_size)) curlend = int(np.floor(curlfrac / (supfrac + curlfrac) * end)) if shuffle: np.random.shuffle(indices) curltrain_indices = indices[:curlend] suptrain_indices = indices[curlend:end] self.curltrain_indices = curltrain_indices print(f"Number of labeled images: {len(suptrain_indices)}") print(f"Number of unlabeled images: {len(curltrain_indices)}") self.suptrain_sampler = SubsetRandomSampler(suptrain_indices) self.curltrain_sampler = SubsetRandomSampler(curltrain_indices) #self.curltrainset = ContrastedData(svhn_path, split='train', accepted_indices=curltrain_indices, contrast_transform=contrasttrans, k=k, transform=transform, download=dload_dataset) self.curltrainset = ApproxContrastedData( svhn_path, split='train', contrast_transform=contrasttrans, k=k, transform=normalize, download=dload_dataset) if use_cuda: if torch.cuda.is_available(): self.device = torch.device('cuda') else: print("CUDA not available") self.device = torch.device('cpu') else: self.device = torch.device('cpu') self.approxclasses = [] for i in range(10): self.approxclasses.append([]) self.bulk.to(self.device) self.head.to(self.device)
optim_dmfa.step() loss_value += loss_dmfa.item() acc = torch.sum(dmfa.q_c.argmax(dim=1)==classes).float()/n_data time_end = time.time() print('elapsed time (min) : %0.1f' % ((time_end-time_start)/60)) print('====> Epoch: %d ELBO_Loss : %0.4f Acc: %0.2f' % ((i + 1), loss_value / len(train_loader.dataset), acc)) torch.save(dmfa.state_dict(), PATH_DMFA) alphas[i] = dmfa.trans.alpha.item() betas[i] = dmfa.trans.beta.item() lins[i] = dmfa.trans.lin.item() facs_mu[i] = (dmfa.q_F_loc_mu - torch.FloatTensor([[7.5,7.5,7.5],[-7.5,-7.5,-7.5]])).pow(2).mean().sqrt().item() facs_sig[i] = (nn.Softplus()(dmfa.q_F_scale_mu) - torch.FloatTensor([3,4.5])).pow(2).mean().sqrt().item() if i % 50 == 0: lw = 1 fig = plt.figure() ax = fig.add_subplot(111) ax.set_title('parameter estimation', fontsize=16) ax.set_xlabel("epoch number", fontsize=16) ax.tick_params(axis="y", labelcolor="r") ax.plot(facs_mu[:500], "r-^", label='factors location RMSE', markevery = 50, linewidth = lw, markersize = 5) ax.plot(facs_sig[:500], "r->", label='factors scale RMSE', markevery = 50, linewidth = lw, markersize = 5) ax.legend(framealpha = 0, loc='upper left') ax.set_ylim(0,3) ax.margins(x=0.03) ax2 = ax.twinx()
def __init__(self, dim_in, dim_h, dim_out): super().__init__() self.lin1 = nn.Linear(dim_in, dim_h) self.mu = nn.Linear(dim_h, dim_out) self.sigma = nn.Linear(dim_h, dim_out) self.softplus = nn.Softplus()
def __init__( self, n_keypoints = 17, shape_basis_size = 10, mult_shape_by_class_mask = False, squared_reprojection_loss = False, n_fully_connected = 1024, n_layers = 6, keypoint_rescale = float(1), keypoint_norm_type = 'to_mean', projection_type = 'orthographic', z_augment = True, z_augment_rot_angle = float(np.pi), z_equivariance = False, z_equivariance_rot_angle = float(np.pi)/4, # < 0 means same as z_augment_rot_angle compose_z_equivariant_rot = True, # TODO: remove this soon! camera_translation = False, camera_xy_translation = True, argmin_translation = False, argmin_translation_test = False, argmin_translation_min_depth = 3., argmin_to_augmented = False, camera_scale = False, argmin_scale = False, argmin_scale_test = False, loss_normalization = 'kp_total_count', independent_phi_for_aug = False, shape_pred_wd = 1., connectivity_setup = 'NONE', custom_param_groups = False, use_huber = False, huber_scaling = 0.1, alpha_bias = True, canonicalization = { 'use': False, 'n_layers': 6, 'n_rand_samples': 4, 'rot_angle': float(np.pi), 'n_fully_connected': 1024, }, linear_instead_of_conv = False, perspective_depth_threshold = 0.1, depth_offset = 0., replace_keypoints_with_input = False, root_joint = 0, loss_weights = { 'l_reprojection': 1., 'l_canonicalization': 1. }, log_vars = [ \ 'objective', 'dist_reprojection', 'l_reprojection', 'l_canonicalization' ], **kwargs ): super(C3DPO, self).__init__() # autoassign constructor params to self auto_init_args(self) # factorization net self.phi = nn.Sequential( \ *make_trunk( dim_in=self.n_keypoints * 3 , # 2 dim loc, 1 dim visibility n_fully_connected=self.n_fully_connected, n_layers=self.n_layers ) ) if linear_instead_of_conv: layer_init_fn = linear_layer else: layer_init_fn = conv1x1 # shape coefficient predictor self.alpha_layer = layer_init_fn(self.n_fully_connected, self.shape_basis_size, init='normal0.01', cnv_args={ 'bias': self.alpha_bias, 'kernel_size': 1 }) # 3D shape predictor self.shape_layer = layer_init_fn(self.shape_basis_size, 3 * n_keypoints, init='normal0.01') # rotation predictor (predicts log-rotation) self.rot_layer = layer_init_fn(self.n_fully_connected, 3, init='normal0.01') if self.camera_translation: # camera translation self.translation_layer = layer_init_fn(self.n_fully_connected, 3, init='normal0.01') if self.camera_scale: # camera scale (non-negative predictions) self.scale_layer = nn.Sequential( \ layer_init_fn(self.n_fully_connected,1,init='normal0.01'), nn.Softplus() ) if self.canonicalization['use']: # canonicalization net: self.psi = nn.Sequential( \ *make_trunk( dim_in=self.n_keypoints*3 , n_fully_connected=self.canonicalization['n_fully_connected'], n_layers=self.canonicalization['n_layers'] ) ) self.alpha_layer_psi = conv1x1( \ self.n_fully_connected, self.shape_basis_size, init='normal0.01')
def forward(self, x): m = nn.Softplus() x = m(x) return x**1.1
def __init__(self, args): super().__init__() C, H, W = args.image_dims x_dim = C * H * W # -------------------- # p model for z -- PixelCNN++ # -------------------- self.p_z = PixelCNN( nr_resnet=1, nr_filters=10, input_channels=args.z_depth, nr_logistic_mix=args.mixture_comps ) # Input_channels = 1 if N x N x 1 grid, 3 if N x N x 3 grid #(nr_resnet=args.nr_resnet, nr_filters=args.nr_filters, #input_channels=input_channels, nr_logistic_mix=args.nr_logistic_mix) # D.Normal(torch.tensor(0., device=args.device), torch.tensor(1., device=args.device)) # -------------------- # p model -- SSL paper generative semi supervised model M2 # -------------------- self.p_y = D.OneHotCategorical( probs=1 / args.y_dim * torch.ones(1, args.y_dim, device=args.device)) # self.p_z = D.Normal(torch.tensor(0., device=args.device), torch.tensor(1., device=args.device)) # Old # parametrized data likelihood p(x|y,z) self.decoder = nn.Sequential( #nn.Dropout(0.5), nn.Linear(args.z_depth * args.z_dim**2 + args.y_dim, args.hidden_dim), nn.BatchNorm1d(args.hidden_dim), nn.Softplus(), nn.Linear(args.hidden_dim, args.hidden_dim), nn.BatchNorm1d(args.hidden_dim), nn.Softplus(), # nn.Dropout(0.5), nn.Linear(args.hidden_dim, x_dim), # nn.BatchNorm1d(x_dim), nn.Softplus()) #self.decoder_cnn = nn.Sequential( # #nn.Dropout(0.5), # nn.Conv2d(in_channels=args.image_dims[0], out_channels=10, kernel_size=3, stride=1, padding=1), ### <----------- EVT TILFØJ FLERE CHANNELS # nn.BatchNorm2d(10), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 # nn.Softplus(), # nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, stride=1, padding=1), # nn.BatchNorm2d(20), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 # nn.Softplus(), # # nn.MaxPool2d(kernel_size=2, stride=2, padding=0), # # nn.Dropout(0.4), # nn.Conv2d(in_channels=20, out_channels=args.image_dims[0], kernel_size=3, stride=1, padding=1), # #nn.BatchNorm2d(args.image_dims[0]), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 # #nn.Softplus() # ) # Transposed Conv test # Before: 1 -> 10 -> 20 -> 1 self.decoder_tcnn = nn.Sequential( # nn.Dropout(0.5), nn.ConvTranspose2d( in_channels=args.image_dims[0], out_channels=10, kernel_size=3, stride=1, padding=1), ### <----------- EVT TILFØJ FLERE CHANNELS nn.BatchNorm2d( 10 ), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 nn.Softplus(), nn.Conv2d(in_channels=10, out_channels=20, kernel_size=5, stride=1, padding=2), nn.Softplus(), nn.ConvTranspose2d(in_channels=20, out_channels=20, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d( 20 ), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 nn.Softplus(), # nn.MaxPool2d(kernel_size=2, stride=2, padding=0), # nn.Dropout(0.4), nn.ConvTranspose2d(in_channels=20, out_channels=args.image_dims[0], kernel_size=3, stride=1, padding=1), #nn.BatchNorm2d(args.image_dims[0]), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 #nn.Softplus() ) # -------------------- # q model -- SSL paper eq 4 # -------------------- # parametrized q(y|x) = Cat(y|pi_phi(x)) -- outputs parametrization of categorical distribution #before: 1 -> 10 -> 20 self.encoder_y_cnn = nn.Sequential( # nn.Dropout(0.5), nn.Conv2d(in_channels=args.image_dims[0], out_channels=10, kernel_size=5, stride=1, padding=2), nn.BatchNorm2d( 10 ), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 nn.Softplus(), nn.MaxPool2d(kernel_size=2, stride=2, padding=0), # nn.Dropout(0.4), nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, stride=1, padding=1), nn.Softplus(), nn.Conv2d(in_channels=20, out_channels=20, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d( 20 ), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 nn.Softplus(), nn.MaxPool2d(kernel_size=2, stride=2, padding=0)) self.encoder_y = nn.Sequential( # nn.Dropout(0.5), nn.Linear(20 * H // 4 * W // 4, args.hidden_dim), # x_dim i stedet for # nn.BatchNorm1d(args.hidden_dim), nn.Softplus(), #nn.Linear(args.hidden_dim, args.hidden_dim), #nn.Softplus(), nn.Linear(args.hidden_dim, args.hidden_dim), # nn.BatchNorm1d(args.hidden_dim), nn.Softplus(), # nn.Dropout(0.5), nn.Linear(args.hidden_dim, args.y_dim)) # parametrized q(z|x,y) = Normal(z|mu_phi(x,y), diag(sigma2_phi(x))) -- output parametrizations for mean and diagonal variance of a Normal distribution #before: 1 -> 10 -> 20 self.encoder_z_cnn = nn.Sequential( # nn.Dropout(0.5), nn.Conv2d(in_channels=args.image_dims[0], out_channels=10, kernel_size=5, stride=1, padding=2), nn.BatchNorm2d( 10 ), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 nn.Softplus(), nn.Conv2d(in_channels=10, out_channels=20, kernel_size=5, stride=1, padding=2), #nn.BatchNorm2d(20), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 nn.Softplus(), nn.MaxPool2d(kernel_size=2, stride=2, padding=0), # nn.Dropout(0.4), nn.Conv2d(in_channels=20, out_channels=20, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d( 20 ), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 nn.Softplus(), #nn.Conv2d(in_channels=20, out_channels=20, kernel_size=3, stride=1, padding=1), #nn.BatchNorm2d(20), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 #nn.Softplus(), #nn.MaxPool2d(kernel_size=2, stride=2, padding=0), #nn.Conv2d(in_channels=20, out_channels=20, kernel_size=3, stride=1, padding=1), ##nn.BatchNorm2d(20), # batch normalization before activation function as suggested in Ioffe and Szegedy 2015 #nn.Softplus(), nn.MaxPool2d(kernel_size=2, stride=2, padding=0)) self.encoder_z = nn.Sequential( # nn.Dropout(0.5), nn.Linear(20 * H // 4 * W // 4 + args.y_dim, args.hidden_dim), # x_dim + args.y_dim # nn.BatchNorm1d(args.hidden_dim), nn.Softplus(), nn.Linear(args.hidden_dim, args.hidden_dim), # nn.BatchNorm1d(args.hidden_dim), nn.Softplus(), # nn.Dropout(0.5), nn.Linear(args.hidden_dim, 2 * args.z_depth * args.z_dim**2)) # initialize weights to N(0, 0.001) and biases to 0 (cf SSL section 4.4) for p in self.parameters(): p.data.normal_(0, 0.001) if p.ndimension() == 1: p.data.fill_(0.)
return self.f(x) class Swish(nn.Module): def __init__(self): super(Swish, self).__init__() self.beta = nn.Parameter(torch.tensor(1.0)) def forward(self, x): return x * torch.sigmoid(self.beta * x) NONLINEARITIES = { "tanh": nn.Tanh(), "relu": nn.ReLU(), "softplus": nn.Softplus(), "sigmoid": nn.Sigmoid(), "elu": nn.ELU(), "swish": Swish(), "square": Lambda(lambda x: x**2), "identity": Lambda(lambda x: x), } class MySpMM(torch.autograd.Function): @staticmethod def forward(ctx, sp_mat, dense_mat): ctx.save_for_backward(sp_mat, dense_mat) return torch.mm(sp_mat, dense_mat)
def __init__(self, node_feature_len, edge_embedding_len, init_node_embedding_units, n_heads=4, attention_feature_lens=(), task="regression", n_class=2, activation=None, remember_func="residual", in_dropout=0, attention_dropout=0, readout_dropout=0, readout_hidden_units=None, device=None): assert task == "classification" or task == "regression" super(GNNModel, self).__init__() self.task = task self.n_heads = n_heads self.readout_hidden_units = readout_hidden_units self.remember_func = remember_func # self.lstm_hidden = init_node_embedding_units[-1] self.activation = nn.Softplus() if activation is None else activation self.input_dropout_layer = nn.Dropout(in_dropout) # force the node embedding to be of node_feature_len if init_node_embedding_units[-1] != node_feature_len: init_node_embedding_units = list(init_node_embedding_units[:-1]) + \ [node_feature_len] self.node_embedding_layer = MLP(node_feature_len, init_node_embedding_units, activation=self.activation) self.gnn_layers = nn.ModuleList([ GNNLayer(node_embedding_len=node_feature_len, edge_embedding_len=edge_embedding_len, n_head=n_head, activation=activation, attention_len=attention_len, attention_dropout=attention_dropout, remember_func=remember_func, device=self.device) for n_head, attention_len in zip(n_heads, attention_feature_lens) ]) # readout if self.readout_hidden_units is not None: self.readout_hidden_layers = MLP(node_feature_len, self.readout_hidden_units, activation=self.activation) self.readout_layer = nn.Linear( self.readout_hidden_units[-1], n_class if self.task == "classification" else 1) else: self.readout_layer = nn.Linear( node_feature_len, n_class if self.task == "classification" else 1) self.readout_dropout_layer = nn.Dropout(readout_dropout) if self.task == "classification": self.logsoftmax = nn.LogSoftmax(dim=1) self.device = device if device is not None else \ torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def __init__( self, num_atoms, bond_feat_dim, num_targets, atom_embedding_size=64, num_graph_conv_layers=6, num_dist_layers=0, num_const_layers=0, fc_feat_size=128, dist_feat_dim=128, const_feat_dim=128, D_feat_dim=128, max_num_nbr=12, energy_mode="Harmonic", max_opt_steps=300, min_opt_steps=10, opt_step_size=0.3, momentum=0.8, ): super(DOGSS, self).__init__(num_atoms, bond_feat_dim, num_targets) self.max_num_nbr = max_num_nbr self.max_opt_steps = max_opt_steps self.min_opt_steps = min_opt_steps self.opt_step_size = opt_step_size self.momentum = momentum self.energy_mode = energy_mode self.embedding = nn.Linear(self.num_atoms, atom_embedding_size) self.convs = nn.ModuleList([ DOGSSConv(node_dim=atom_embedding_size, edge_dim=self.bond_feat_dim) for _ in range(num_graph_conv_layers) ]) self.conv_to_bond_distance = nn.Linear( 2 * atom_embedding_size + bond_feat_dim, dist_feat_dim) self.bond_distance_bn = nn.BatchNorm1d(dist_feat_dim) self.conv_to_bond_constant = nn.Linear( 2 * atom_embedding_size + bond_feat_dim, const_feat_dim) self.bond_constant_bn = nn.BatchNorm1d(const_feat_dim) self.softplus = nn.Softplus() if num_dist_layers > 1: layers_dist = [] for i in range(num_dist_layers - 1): layers_dist.append(nn.Linear(dist_feat_dim, dist_feat_dim)) layers_dist.append(nn.BatchNorm1d(dist_feat_dim)) layers_dist.append(nn.Softplus()) self.layers_dist = nn.Sequential(*layers_dist) self.bond_distance = nn.Linear(dist_feat_dim, 1) if num_const_layers > 1: layers_const = [] for i in range(num_const_layers - 1): layers_const.append(nn.Linear(const_feat_dim, const_feat_dim)) layers_const.append(nn.BatchNorm1d(const_feat_dim)) layers_const.append(nn.Softplus()) self.layers_const = nn.Sequential(*layers_const) self.bond_constant = nn.Linear(const_feat_dim, 1)
activation = nn.ModuleDict([ ['relu', nn.ReLU()], ['hardtanh', nn.Hardtanh()], ['relu6', nn.ReLU6()], ['sigmoid', nn.Sigmoid()], ['tanh', nn.Tanh()], ['softmax', nn.Softmax()], ['softmax2d', nn.Softmax2d()], ['logsoftmax', nn.LogSoftmax()], ['elu', nn.ELU()], ['selu', nn.SELU()], # ['celu', nn.CELU()], ['hardshrink', nn.Hardshrink()], ['leakyrelu', nn.LeakyReLU()], ['logsigmoid', nn.LogSigmoid()], ['softplus', nn.Softplus()], ['softshrink', nn.Softshrink()], ['prelu', nn.PReLU()], ['softsign', nn.Softsign()], ['softmin', nn.Softmin()], ['tanhshrink', nn.Tanhshrink()], ['rrelu', nn.RReLU()], ['glu', nn.GLU()], ]) loss = nn.ModuleDict([ ['l1', nn.L1Loss()], ['nll', nn.NLLLoss()], ['kldiv', nn.KLDivLoss()], ['mse', nn.MSELoss()], ['bce', nn.BCELoss()],
def forward(self, x): y = self.features(x) sp = nn.Softplus(beta=5, threshold=1) y[:, :7] = sp(y[:, :7].clone()) y[:, 7] = torch.sigmoid(y[:, 7]) return y
pixel_res=256, raw=True, cond=False, half_image_size=False, kloss_dataset=True, # true for mit push ) model_config = EKFEstimatorConfig( is_smooth=True, latent_dim=hy_config.latent_dim, ctrl_dim=6 if dataset_config.kloss_dataset else 8, # DEBUG: these are in flux dataset=dataset_config, dyn_hidden_units=64, dyn_layers=3, dyn_nonlinearity=nn.Softplus(beta=2, threshold=20), obs_hidden_units=64, obs_layers=3, obs_nonlinearity=nn.Softplus(beta=2, threshold=20), is_continuous=True, ramp_iters=200, burn_in=100, dkl_anneal_iter=1000, alpha=0.5, beta=1.0, atol=1e-9, # default: 1e-9 rtol=1e-7, # default: 1e-7 z_pred=True, ) exp_config = ExpConfig(
def __init__(self, dim): super().__init__() self.main = nn.Sequential(nn.Linear(2, dim), nn.PReLU(), nn.Linear(dim, dim), nn.PReLU(), nn.Linear(dim, 1), nn.Softplus())
def __init__(self): super().__init__() self.tanh = nn.Tanh() self.softplus = nn.Softplus()
def __init__(self, outputs, inputs): super(BBBAlexNet, self).__init__() self.q_logvar_init = 0.05 self.p_logvar_init = math.log(0.05) self.classifier = BBBLinearFactorial(self.q_logvar_init, self.p_logvar_init, 1 * 1 * 128, outputs) self.conv1 = BBBConv2d(self.q_logvar_init, self.p_logvar_init, inputs, 64, kernel_size=11, stride=4, padding=5) self.soft1 = nn.Softplus() self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = BBBConv2d(self.q_logvar_init, self.p_logvar_init, 64, 192, kernel_size=5, padding=2) self.soft2 = nn.Softplus() self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) self.conv3 = BBBConv2d(self.q_logvar_init, self.p_logvar_init, 192, 384, kernel_size=3, padding=1) self.soft3 = nn.Softplus() self.conv4 = BBBConv2d(self.q_logvar_init, self.p_logvar_init, 384, 256, kernel_size=3, padding=1) self.soft4 = nn.Softplus() self.conv5 = BBBConv2d(self.q_logvar_init, self.p_logvar_init, 256, 128, kernel_size=3, padding=1) self.soft5 = nn.Softplus() self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) # self.flatten = FlattenLayer(1 * 1 * 128) # self.fc1 = BBBLinearFactorial(q_logvar_init, N, p_logvar_init, 1* 1 * 128, outputs) layers = [ self.conv1, self.soft1, self.pool1, self.conv2, self.soft2, self.pool2, self.conv3, self.soft3, self.conv4, self.soft4, self.conv5, self.soft5, self.pool3 ] self.layers = nn.ModuleList(layers)