def test_forget_mult_cuda(): this_tests(ForgetMultGPU, BwdForgetMultGPU) x,f = torch.randn(5,3,20).cuda().chunk(2, dim=2) x,f = x.contiguous().requires_grad_(True),f.contiguous().requires_grad_(True) th_x,th_f = detach_and_clone(x),detach_and_clone(f) for (bf, bw) in [(True,True), (False,True), (True,False), (False,False)]: forget_mult = BwdForgetMultGPU if bw else ForgetMultGPU th_out = forget_mult_CPU(th_x, th_f, hidden_init=None, batch_first=bf, backward=bw) th_loss = th_out.pow(2).mean() th_loss.backward() out = forget_mult.apply(x, f, None, bf) loss = out.pow(2).mean() loss.backward() assert torch.allclose(th_out,out, rtol=1e-4, atol=1e-5) assert torch.allclose(th_x.grad,x.grad, rtol=1e-4, atol=1e-5) assert torch.allclose(th_f.grad,f.grad, rtol=1e-4, atol=1e-5) for p in [x,f, th_x, th_f]: p = p.detach() p.grad = None h = torch.randn((5 if bf else 3), 10).cuda().requires_grad_(True) th_h = detach_and_clone(h) th_out = forget_mult_CPU(th_x, th_f, hidden_init=th_h, batch_first=bf, backward=bw) th_loss = th_out.pow(2).mean() th_loss.backward() out = forget_mult.apply(x.contiguous(), f.contiguous(), h, bf) loss = out.pow(2).mean() loss.backward() assert torch.allclose(th_out,out, rtol=1e-4, atol=1e-5) assert torch.allclose(th_x.grad,x.grad, rtol=1e-4, atol=1e-5) assert torch.allclose(th_f.grad,f.grad, rtol=1e-4, atol=1e-5) assert torch.allclose(th_h.grad,h.grad, rtol=1e-4, atol=1e-5) for p in [x,f, th_x, th_f]: p = p.detach() p.grad = None
def __init__(self, input_dim: int, forward_combination: str = "y-x", backward_combination: str = "y-x", num_width_embeddings: int = None, span_width_embedding_dim: int = None, bucket_widths: bool = False, use_sentinels: bool = True) -> None: super().__init__() self._input_dim = input_dim self._forward_combination = forward_combination self._backward_combination = backward_combination self._num_width_embeddings = num_width_embeddings self._bucket_widths = bucket_widths if self._input_dim % 2 != 0: raise ConfigurationError("The input dimension is not divisible by 2, but the " "BidirectionalEndpointSpanExtractor assumes the embedded representation " "is bidirectional (and hence divisible by 2).") if num_width_embeddings is not None and span_width_embedding_dim is not None: self._span_width_embedding = Embedding(num_width_embeddings, span_width_embedding_dim) elif not all([num_width_embeddings is None, span_width_embedding_dim is None]): raise ConfigurationError("To use a span width embedding representation, you must" "specify both num_width_buckets and span_width_embedding_dim.") else: self._span_width_embedding = None self._use_sentinels = use_sentinels if use_sentinels: self._start_sentinel = Parameter(torch.randn([1, 1, int(input_dim / 2)])) self._end_sentinel = Parameter(torch.randn([1, 1, int(input_dim / 2)]))
def bisect_demo(): """ Bisect the LB/UB on specified columns. The key is to use scatter_() to convert indices into one-hot encodings. """ t1t2 = torch.stack((torch.randn(5, 4), torch.randn(5, 4)), dim=-1) lb, _ = torch.min(t1t2, dim=-1) ub, _ = torch.max(t1t2, dim=-1) print('LB:', lb) print('UB:', ub) # random idxs for testing idxs = torch.randn_like(lb) _, idxs = idxs.max(dim=-1) # <Batch> print('Split idxs:', idxs) idxs = idxs.unsqueeze(dim=-1) # Batch x 1 idxs = torch.zeros_like(lb).byte().scatter_(-1, idxs, 1) # convert into one-hot encoding print('Reorg idxs:', idxs) mid = (lb + ub) / 2.0 lefts_lb = lb lefts_ub = torch.where(idxs, mid, ub) # use the one-hot encoding to call torch.where() rights_lb = torch.where(idxs, mid, lb) # definitely faster than element-wise reassignment rights_ub = ub print('LEFT LB:', lefts_lb) print('LEFT UB:', lefts_ub) print('RIGHT LB:', rights_lb) print('RIGHT UB:', rights_ub) newlb = torch.cat((lefts_lb, rights_lb), dim=0) newub = torch.cat((lefts_ub, rights_ub), dim=0) return newlb, newub
def fit(self): args = self.args for epoch in range(args.max_epochs): self.G.train() self.D.train() for step, inputs in enumerate(self.train_loader): batch_size = inputs[0].size(0) images = inputs[0].to(self.device) labels = inputs[1].to(self.device) # create the labels used to distingush real or fake real_labels = torch.ones(batch_size, dtype=torch.int64).to(self.device) fake_labels = torch.zeros(batch_size, dtype=torch.int64).to(self.device) # train the discriminator # discriminator <- real image D_real, D_real_cls = self.D(images) D_loss_real = self.loss_fn(D_real, real_labels) D_loss_real_cls = self.loss_fn(D_real_cls, labels) # noise vector z = torch.randn(batch_size, args.z_dim).to(self.device) # make label to onehot vector y_onehot = torch.zeros((batch_size, 10)).to(self.device) y_onehot.scatter_(1, labels.unsqueeze(1), 1) y_onehot.requires_grad_(False) # discriminator <- fake image G_fake = self.G(y_onehot, z) D_fake, D_fake_cls = self.D(G_fake) D_loss_fake = self.loss_fn(D_fake, fake_labels) D_loss_fake_cls = self.loss_fn(D_fake_cls, labels) D_loss = D_loss_real + D_loss_fake + \ D_loss_real_cls + D_loss_fake_cls self.D.zero_grad() D_loss.backward() self.optim_D.step() # train the generator z = torch.randn(batch_size, args.z_dim).to(self.device) G_fake = self.G(y_onehot, z) D_fake, D_fake_cls = self.D(G_fake) G_loss = self.loss_fn(D_fake, real_labels) + \ self.loss_fn(D_fake_cls, labels) self.G.zero_grad() G_loss.backward() self.optim_G.step() if (epoch+1) % args.print_every == 0: print("Epoch [{}/{}] Loss_D: {:.3f}, Loss_G: {:.3f}". format(epoch+1, args.max_epochs, D_loss.item(), G_loss.item())) self.save(args.ckpt_dir, epoch+1) self.sample(epoch+1)
def test_backward_computes_backward_pass(): weight = torch.randn(4, 8, 3, 3).cuda() input = torch.randn(4, 8, 4, 4).cuda() input_var = Variable(input, requires_grad=True) weight_var = Parameter(weight) out_var = F.conv2d( input=input_var, weight=weight_var, bias=None, stride=1, padding=1, dilation=1, groups=1, ) out_var.backward(gradient=input_var.data.clone().fill_(1)) out = out_var.data input_grad = input_var.grad.data weight_grad = weight_var.grad.data func = _EfficientConv2d( stride=1, padding=1, dilation=1, groups=1, ) out_efficient = func.forward(weight, None, input) weight_grad_efficient, _, input_grad_efficient = func.backward( weight, None, input, input.clone().fill_(1)) assert(almost_equal(out, out_efficient)) assert(almost_equal(input_grad, input_grad_efficient)) assert(almost_equal(weight_grad, weight_grad_efficient))
def test_convtbc(self): # ksz, in_channels, out_channels conv_tbc = ConvTBC(4, 5, kernel_size=3, padding=1) # out_channels, in_channels, ksz conv1d = nn.Conv1d(4, 5, kernel_size=3, padding=1) conv_tbc.weight.data.copy_(conv1d.weight.data.transpose(0, 2)) conv_tbc.bias.data.copy_(conv1d.bias.data) input_tbc = torch.randn(7, 2, 4, requires_grad=True) input1d = input_tbc.data.transpose(0, 1).transpose(1, 2) input1d.requires_grad = True output_tbc = conv_tbc(input_tbc) output1d = conv1d(input1d) self.assertAlmostEqual(output_tbc.data.transpose(0, 1).transpose(1, 2), output1d.data) grad_tbc = torch.randn(output_tbc.size()) grad1d = grad_tbc.transpose(0, 1).transpose(1, 2).contiguous() output_tbc.backward(grad_tbc) output1d.backward(grad1d) self.assertAlmostEqual(conv_tbc.weight.grad.data.transpose(0, 2), conv1d.weight.grad.data) self.assertAlmostEqual(conv_tbc.bias.grad.data, conv1d.bias.grad.data) self.assertAlmostEqual(input_tbc.grad.data.transpose(0, 1).transpose(1, 2), input1d.grad.data)
def test_forward(self): batch = 16 len1, len2 = 21, 24 seq_len1 = torch.randint(low=len1 - 10, high=len1 + 1, size=(batch,)).long() seq_len2 = torch.randint(low=len2 - 10, high=len2 + 1, size=(batch,)).long() mask1 = [] for w in seq_len1: mask1.append([1] * w.item() + [0] * (len1 - w.item())) mask1 = torch.FloatTensor(mask1) mask2 = [] for w in seq_len2: mask2.append([1] * w.item() + [0] * (len2 - w.item())) mask2 = torch.FloatTensor(mask2) d = 200 # hidden dimension l = 20 # number of perspective test1 = torch.randn(batch, len1, d) test2 = torch.randn(batch, len2, d) test1 = test1 * mask1.view(-1, len1, 1).expand(-1, len1, d) test2 = test2 * mask2.view(-1, len2, 1).expand(-1, len2, d) test1_fw, test1_bw = torch.split(test1, d // 2, dim=-1) test2_fw, test2_bw = torch.split(test2, d // 2, dim=-1) ml_fw = BiMpmMatching.from_params(Params({"is_forward": True, "num_perspectives": l})) ml_bw = BiMpmMatching.from_params(Params({"is_forward": False, "num_perspectives": l})) vecs_p_fw, vecs_h_fw = ml_fw(test1_fw, mask1, test2_fw, mask2) vecs_p_bw, vecs_h_bw = ml_bw(test1_bw, mask1, test2_bw, mask2) vecs_p, vecs_h = torch.cat(vecs_p_fw + vecs_p_bw, dim=2), torch.cat(vecs_h_fw + vecs_h_bw, dim=2) assert vecs_p.size() == torch.Size([batch, len1, 10 + 10 * l]) assert vecs_h.size() == torch.Size([batch, len2, 10 + 10 * l]) assert ml_fw.get_output_dim() == ml_bw.get_output_dim() == vecs_p.size(2) // 2 == vecs_h.size(2) // 2
def test_backward(self): a = Variable(torch.randn(2, 2), requires_grad=True) b = Variable(torch.randn(2, 2), requires_grad=True) x = a y = a * b trace, inputs = torch._C._tracer_enter((x, y), 2) def fn(x, y): return y * 2 * x z = fn(*inputs) torch._C._tracer_exit((z,)) torch._C._jit_pass_lint(trace) # Run first backward grad, = torch.autograd.grad(z, x, Variable(torch.ones(2, 2), requires_grad=True), create_graph=True) torch._C._jit_pass_lint(trace) # Run second backward grad.sum().backward(create_graph=True) torch._C._jit_pass_lint(trace) # Run dead code elimination to remove unused trace nodes torch._C._jit_pass_dce(trace) # This is nondeterministic, see: # https://github.com/ezyang/pytorch/issues/227 # self.assertExpectedTrace(trace) self.skipTest("output is nondeterministic on Travis/Python 3.5")
def test_inline_jit_compile_extension_multiple_sources_and_no_functions(self): cpp_source1 = ''' at::Tensor sin_add(at::Tensor x, at::Tensor y) { return x.sin() + y.sin(); } ''' cpp_source2 = ''' #include <torch/torch.h> at::Tensor sin_add(at::Tensor x, at::Tensor y); PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("sin_add", &sin_add, "sin(x) + sin(y)"); } ''' module = torch.utils.cpp_extension.load_inline( name='inline_jit_extension', cpp_sources=[cpp_source1, cpp_source2], verbose=True) x = torch.randn(4, 4) y = torch.randn(4, 4) z = module.sin_add(x, y) self.assertEqual(z, x.sin() + y.sin())
def _prepare_dummy_data(self, local_bs): # global_bs for DDP should be divisible by WORLD_SIZE global_bs = int(WORLD_SIZE) * local_bs input_cpu = torch.randn(global_bs, 2) target = torch.randn(global_bs, 4) loss = nn.MSELoss() return global_bs, input_cpu, target, loss
def test_jit_compile_extension(self): module = torch.utils.cpp_extension.load( name='jit_extension', sources=[ 'cpp_extensions/jit_extension.cpp', 'cpp_extensions/jit_extension2.cpp' ], extra_include_paths=['cpp_extensions'], extra_cflags=['-g'], verbose=True) x = torch.randn(4, 4) y = torch.randn(4, 4) z = module.tanh_add(x, y) self.assertEqual(z, x.tanh() + y.tanh()) # Checking we can call a method defined not in the main C++ file. z = module.exp_add(x, y) self.assertEqual(z, x.exp() + y.exp()) # Checking we can use this JIT-compiled class. doubler = module.Doubler(2, 2) self.assertIsNone(doubler.get().grad) self.assertEqual(doubler.get().sum(), 4) self.assertEqual(doubler.forward().sum(), 8)
def main(): dtype = torch.FloatTensor N, d_in, H, d_out = 64, 1000, 100, 10 # d_in表示输入维度,d_out输出维度,H是隐藏层维度数 x = Variable(torch.randn(N, d_in).type(dtype), requires_grad=False) y = Variable(torch.randn(N, d_out).type(dtype), requires_grad=False) w1 = Variable(torch.randn(d_in, H).type(dtype), requires_grad=True) w2 = Variable(torch.randn(H, d_out).type(dtype), requires_grad=True) learning_rate = 1e-6 for t in range(500): relu = MyRelu() y_pred = relu(x.mm(w1)).mm(w2) loss = (y_pred - y).pow(2).sum() loss.backward() w1.data -= learning_rate * w1.grad.data w2.data -= learning_rate * w2.grad.data w1.grad.data.zero_() w2.grad.data.zero_() print(loss.data[0])
def setUp(self, length=3, factor=10, count=1000000, seed=None, dtype=torch.float64, device=None): '''Set up the test values. Args: length: Size of the vector. factor: To multiply the mean and standard deviation. count: Number of samples for Monte-Carlo estimation. seed: Seed for the random number generator. dtype: The data type. device: In which device. ''' if seed is not None: torch.manual_seed(seed) # variables self.A = torch.randn(length, length, dtype=dtype, device=device) self.b = torch.randn(length, dtype=dtype, device=device) # input mean and covariance self.mu = torch.randn(length, dtype=dtype, device=device) * factor self.cov = rand.definite(length, dtype=dtype, device=device, positive=True, semi=False, norm=factor**2) # Monte-Carlo estimation of the output mean and variance normal = torch.distributions.MultivariateNormal(self.mu, self.cov) samples = normal.sample((count,)) out_samples = samples.matmul(self.A.t()) + self.b self.mc_mu = torch.mean(out_samples, dim=0) self.mc_var = torch.var(out_samples, dim=0) self.mc_cov = cov(out_samples)
def test_trace_expire(self): x = Variable(torch.randn(2, 2), requires_grad=True) y = Variable(torch.randn(2, 2), requires_grad=True) def record_trace(num_backwards): trace = torch._C._tracer_enter((x, y), num_backwards) z = y * 2 * x torch._C._tracer_exit((z,)) return z, trace def check(expired, complete): self.assertEqual(trace.is_expired, expired) self.assertEqual(trace.is_complete, complete) z, trace = record_trace(0) check(False, True) del z check(False, True) z, trace = record_trace(1) check(False, False) del z check(True, False) z, trace = record_trace(1) check(False, False) z.sum().backward() check(False, True) del z check(False, True)
def test_getitem_1d(self): t = torch.randn(15) l = torch.randn(15) source = TensorDataset(t, l) for i in range(15): self.assertEqual(t[i], source[i][0]) self.assertEqual(l[i], source[i][1])
def main(): dtype = torch.FloatTensor N,d_in,H,d_out = 64,1000,100,10 # d_in表示输入维度,d_out输出维度,H是隐藏层维度数 x = torch.randn(N,d_in).type(dtype) y = torch.randn(N,d_out).type(dtype) w1 = torch.randn(d_in,H).type(dtype) w2 = torch.randn(H, d_out).type(dtype) learning_rate = 1e-6 for t in range(500): # 定义模型 h = x.mm(w1) h_relu = h.clamp(min=0) y_pred = h_relu.mm(w2) # 定义损失函数 loss = (y_pred - y).pow(2).sum() grad_y_pred = 2.0 * (y_pred - y) grad_w2 = h_relu.t().mm(grad_y_pred) grad_h_relu = grad_y_pred.mm(w2.t()) grad_h = grad_h_relu.clone() grad_h[h<0] = 0 grad_w1 = x.t().mm(grad_h) w1 -= learning_rate * grad_w1 w2 -= learning_rate * grad_w2 print(type(loss)) print(loss)
def test_DepthConcat(self): outputSize = torch.IntTensor((5, 6, 7, 8)) input = torch.randn(2, 3, 12, 12) gradOutput = torch.randn(2, int(outputSize.sum()), 12, 12) concat = nn.DepthConcat(1) concat.add(nn.SpatialConvolution(3, outputSize[0], 1, 1, 1, 1)) # > 2, 5, 12, 12 concat.add(nn.SpatialConvolution(3, outputSize[1], 3, 3, 1, 1)) # > 2, 6, 10, 10 concat.add(nn.SpatialConvolution(3, outputSize[2], 4, 4, 1, 1)) # > 2, 7, 9, 9 concat.add(nn.SpatialConvolution(3, outputSize[3], 5, 5, 1, 1)) # > 2, 8, 8, 8 concat.zeroGradParameters() # forward/backward outputConcat = concat.forward(input) gradInputConcat = concat.backward(input, gradOutput) # the spatial dims are the largest, the nFilters is the sum output = torch.Tensor(2, int(outputSize.sum()), 12, 12).zero_() # zero for padding narrows = ((slice(None), slice(0, 5), slice(None), slice(None)), (slice(None), slice(5, 11), slice(1, 11), slice(1, 11)), (slice(None), slice(11, 18), slice(1, 10), slice(1, 10)), (slice(None), slice(18, 26), slice(2, 10), slice(2, 10))) gradInput = input.clone().zero_() for i in range(4): conv = concat.get(i) gradWeight = conv.gradWeight.clone() conv.zeroGradParameters() output[narrows[i]].copy_(conv.forward(input)) gradInput.add_(conv.backward(input, gradOutput[narrows[i]])) self.assertEqual(gradWeight, conv.gradWeight) self.assertEqual(output, outputConcat) self.assertEqual(gradInput, gradInputConcat) # Check that these don't raise errors concat.__repr__() str(concat)
def compare_grid_sample(): # do gradcheck N = random.randint(1, 8) C = 2 # random.randint(1, 8) H = 5 # random.randint(1, 8) W = 4 # random.randint(1, 8) input = Variable(torch.randn(N, C, H, W).cuda(), requires_grad=True) input_p = input.clone().data.contiguous() grid = Variable(torch.randn(N, H, W, 2).cuda(), requires_grad=True) grid_clone = grid.clone().contiguous() out_offcial = F.grid_sample(input, grid) grad_outputs = Variable(torch.rand(out_offcial.size()).cuda()) grad_outputs_clone = grad_outputs.clone().contiguous() grad_inputs = torch.autograd.grad(out_offcial, (input, grid), grad_outputs.contiguous()) grad_input_off = grad_inputs[0] crf = RoICropFunction() grid_yx = torch.stack([grid_clone.data[:,:,:,1], grid_clone.data[:,:,:,0]], 3).contiguous().cuda() out_stn = crf.forward(input_p, grid_yx) grad_inputs = crf.backward(grad_outputs_clone.data) grad_input_stn = grad_inputs[0] pdb.set_trace() delta = (grad_input_off.data - grad_input_stn).sum()
def main(): dtype = torch.FloatTensor N, d_in, H, d_out = 64, 1000, 100, 10 # d_in表示输入维度,d_out输出维度,H是隐藏层维度数 x = Variable(torch.randn(N, d_in).type(dtype), requires_grad=False) y = Variable(torch.randn(N, d_out).type(dtype), requires_grad=False) model = torch.nn.Sequential( torch.nn.Linear(d_in,H), torch.nn.ReLU(), torch.nn.Linear(H,d_out) ) loss_fn = torch.nn.MSELoss(size_average=False) learning_rate = 1e-4 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) for t in range(500): y_pred = model(x) loss = loss_fn(y_pred,y) model.zero_grad() loss.backward() optimizer.step() print(loss.data[0])
def rand_init_hidden(self): """ random initialize hidden variable """ return autograd.Variable( torch.randn(2 * self.rnn_layers, self.batch_size, self.hidden_dim // 2)), autograd.Variable( torch.randn(2 * self.rnn_layers, self.batch_size, self.hidden_dim // 2))
def test_non_stateful_states_are_sorted_correctly(self): encoder_base = _EncoderBase(stateful=False) initial_states = (torch.randn(6, 5, 7), torch.randn(6, 5, 7)) # Check that we sort the state for non-stateful encoders. To test # we'll just use a "pass through" encoder, as we aren't actually testing # the functionality of the encoder here anyway. _, states, restoration_indices = encoder_base.sort_and_run_forward(lambda *x: x, self.tensor, self.mask, initial_states) # Our input tensor had 2 zero length sequences, so we need # to concat a tensor of shape # (num_layers * num_directions, batch_size - num_valid, hidden_dim), # to the output before unsorting it. zeros = torch.zeros([6, 2, 7]) # sort_and_run_forward strips fully-padded instances from the batch; # in order to use the restoration_indices we need to add back the two # that got stripped. What we get back should match what we started with. for state, original in zip(states, initial_states): assert list(state.size()) == [6, 3, 7] state_with_zeros = torch.cat([state, zeros], 1) unsorted_state = state_with_zeros.index_select(1, restoration_indices) for index in [0, 1, 3]: numpy.testing.assert_array_equal(unsorted_state[:, index, :].data.numpy(), original[:, index, :].data.numpy())
def test_stack_overwrite_failure(self): data1 = {"latent2": torch.randn(2)} data2 = {"latent2": torch.randn(2)} cm = poutine.condition(poutine.condition(self.model, data=data1), data=data2) with pytest.raises(AssertionError): cm()
def get_batch(batch_size=32, randomize=False): """Builds a batch i.e. (x, f(x)) pair.""" random = torch.randn(batch_size) x = make_features(random) y = f(x) if randomize: y += torch.randn(1) return Variable(x), Variable(y)
def __init__(self): super(Chunking, self).__init__() self.input_size = embedding_size \ + nb_postags \ + postag_hn_size * 2 self.w = nn.Parameter(torch.randn(chunking_nb_layers * 2, max_sentence_size, chunking_hn_size)) self.h = nn.Parameter(torch.randn(chunking_nb_layers * 2, max_sentence_size, chunking_hn_size)) self.embedding = nn.Embedding(nb_postags, chunking_postag_emb_size) self.aux_emb = torch.arange(0, nb_postags) self.aux_emb = Variable(self.aux_emb).long() self.bi_lstm = nn.LSTM(self.input_size, chunking_hn_size, chunking_nb_layers, bidirectional=True) self.fc = nn.Linear(chunking_hn_size * 2, nb_chunktags)
def main(): dtype = torch.cuda.FloatTensor batch_size = 64 input_dimension = 1000 hidden_dimension = 100 output_dimension = 10 random_input = torch.randn(batch_size, input_dimension).type(dtype) true_output = torch.randn(batch_size, output_dimension).type(dtype) weights_1 = torch.randn(input_dimension, hidden_dimension).type(dtype) weights_2 = torch.randn(hidden_dimension, output_dimension).type(dtype) learning_rate = 1e-6 for t in range(500): h = random_input.mm(weights_1) # Compute ReLU h_relu = h.clamp(min=0) predicted_output = h_relu.mm(weights_2) loss = (predicted_output - true_output).pow(2).sum() print("Loss: {}".format(loss)) # Compute gradient grad_y_pred = 2.0 * (predicted_output - true_output) grad_w2 = h_relu.t().mm(grad_y_pred) grad_h_relu = grad_y_pred.mm(weights_2.t()) grad_h = grad_h_relu.clone() grad_h[h < 0] = 0 grad_w1 = random_input.t().mm(grad_h) weights_1 -= learning_rate * grad_w1 weights_2 -= learning_rate * grad_w2
def test_degenerate_GPyTorchPosterior(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # singular covariance matrix degenerate_covar = torch.tensor( [[1, 1, 0], [1, 1, 0], [0, 0, 2]], dtype=dtype, device=device ) mean = torch.rand(3, dtype=dtype, device=device) mvn = MultivariateNormal(mean, lazify(degenerate_covar)) posterior = GPyTorchPosterior(mvn=mvn) # basics self.assertEqual(posterior.device.type, device.type) self.assertTrue(posterior.dtype == dtype) self.assertEqual(posterior.event_shape, torch.Size([3, 1])) self.assertTrue(torch.equal(posterior.mean, mean.unsqueeze(-1))) variance_exp = degenerate_covar.diag().unsqueeze(-1) self.assertTrue(torch.equal(posterior.variance, variance_exp)) # rsample with warnings.catch_warnings(record=True) as w: # we check that the p.d. warning is emitted - this only # happens once per posterior, so we need to check only once samples = posterior.rsample(sample_shape=torch.Size([4])) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(samples.shape, torch.Size([4, 3, 1])) samples2 = posterior.rsample(sample_shape=torch.Size([4, 2])) self.assertEqual(samples2.shape, torch.Size([4, 2, 3, 1])) # rsample w/ base samples base_samples = torch.randn(4, 3, 1, device=device, dtype=dtype) samples_b1 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) samples_b2 = posterior.rsample( sample_shape=torch.Size([4]), base_samples=base_samples ) self.assertTrue(torch.allclose(samples_b1, samples_b2)) base_samples2 = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) samples2_b1 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) samples2_b2 = posterior.rsample( sample_shape=torch.Size([4, 2]), base_samples=base_samples2 ) self.assertTrue(torch.allclose(samples2_b1, samples2_b2)) # collapse_batch_dims b_mean = torch.rand(2, 3, dtype=dtype, device=device) b_degenerate_covar = degenerate_covar.expand(2, *degenerate_covar.shape) b_mvn = MultivariateNormal(b_mean, lazify(b_degenerate_covar)) b_posterior = GPyTorchPosterior(mvn=b_mvn) b_base_samples = torch.randn(4, 2, 3, 1, device=device, dtype=dtype) with warnings.catch_warnings(record=True) as w: b_samples = b_posterior.rsample( sample_shape=torch.Size([4]), base_samples=b_base_samples ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, RuntimeWarning)) self.assertTrue("not p.d." in str(w[-1].message)) self.assertEqual(b_samples.shape, torch.Size([4, 2, 3, 1]))
def trySigJoin(): inp1 = Variable(torch.randn(12,6), requires_grad=True) inp2 = Variable(torch.randn(12,2), requires_grad=True) result = SigJoin(inp1,inp2,2) print(result.data) result.backward(torch.randn(result.size())) print(inp1.grad) print(inp2.grad)
def dummy_inputs(cls, params, init_case): hidden = torch.randn((params["batch_size"] * params["tgt_max_len"], init_case["input_size"])) attn = torch.randn((params["batch_size"] * params["tgt_max_len"], params["max_seq_len"])) src_map = torch.randn((params["max_seq_len"], params["batch_size"], params["n_extra_words"])) return hidden, attn, src_map
def set_z(self, var=None, volatile=False): if var is None: self.normal_z = var else: if self.gpu_ids: self.normal_z = Variable(torch.randn((self.opt.batch_size, self.encoder.k)).cuda(), volatile=volatile) else: self.normal_z = Variable(torch.randn((self.opt.batch_size, self.encoder.k)), volatile=volatile)
def initialize_cuda_context_rng(): global __cuda_ctx_rng_initialized assert TEST_CUDA, 'CUDA must be available when calling initialize_cuda_context_rng' if not __cuda_ctx_rng_initialized: # initialize cuda context and rng for memory tests for i in range(torch.cuda.device_count()): torch.randn(1, device="cuda:{}".format(i)) __cuda_ctx_rng_initialized = True
def forward(self, x): # if self.training: # noinspection PyArgumentList return x + th.autograd.Variable( th.randn(x.size()) * self.stddev).cuda()
return x, low_level_feat def _init_weight(self): for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def _load_pretrained_model(self): pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/resnet101-5d3b4d8f.pth') model_dict = {} state_dict = self.state_dict() for k, v in pretrain_dict.items(): if k in state_dict: model_dict[k] = v state_dict.update(model_dict) self.load_state_dict(state_dict) if __name__ == '__main__': # 打印核实网络是否与官方的一致 # model = torchvision.models.resnet101() model = ResNet101(10) print(model) input = torch.randn(1, 3, 224, 224) out = model(input) print(out.shape)
def test_init_with_custom_matrix(self): for matrix in (torch.randn(10, 4, 4), torch.randn(4, 4)): t = Transform3d(matrix=matrix) self.assertTrue(t.device == matrix.device) self.assertTrue(t._matrix.dtype == matrix.dtype) self.assertTrue(torch.allclose(t._matrix, matrix.view(t._matrix.shape)))
def test_transform_points_fail(self): t1 = Scale(0.1, 0.1, 0.1) P = 7 with self.assertRaises(ValueError): t1.transform_points(torch.randn(P))
def test_get_item(self, batch_size=5): device = torch.device("cuda:0") matrices = torch.randn( size=[batch_size, 4, 4], device=device, dtype=torch.float32 ) # init the Transforms3D class t3d = Transform3d(matrix=matrices) # int index index = 1 t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), 1) self._check_indexed_transforms(t3d, t3d_selected, [(0, 1)]) # negative int index index = -1 t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), 1) self._check_indexed_transforms(t3d, t3d_selected, [(0, -1)]) # list index index = [1, 2] t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), len(index)) self._check_indexed_transforms(t3d, t3d_selected, enumerate(index)) # empty list index index = [] t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), 0) self.assertEqual(t3d_selected.get_matrix().nelement(), 0) # slice index index = slice(0, 2, 1) t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), 2) self._check_indexed_transforms(t3d, t3d_selected, [(0, 0), (1, 1)]) # empty slice index index = slice(0, 0, 1) t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), 0) self.assertEqual(t3d_selected.get_matrix().nelement(), 0) # bool tensor index = (torch.rand(batch_size) > 0.5).to(device) index[:2] = True # make sure smth is selected t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), index.sum()) self._check_indexed_transforms( t3d, t3d_selected, zip( torch.arange(index.sum()), torch.nonzero(index, as_tuple=False).squeeze(), ), ) # all false bool tensor index = torch.zeros(batch_size).bool() t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), 0) self.assertEqual(t3d_selected.get_matrix().nelement(), 0) # int tensor index = torch.tensor([1, 2], dtype=torch.int64, device=device) t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), index.numel()) self._check_indexed_transforms(t3d, t3d_selected, enumerate(index.tolist())) # negative int tensor index = -(torch.tensor([1, 2], dtype=torch.int64, device=device)) t3d_selected = t3d[index] self.assertEqual(len(t3d_selected), index.numel()) self._check_indexed_transforms(t3d, t3d_selected, enumerate(index.tolist())) # invalid index for invalid_index in ( torch.tensor([1, 0, 1], dtype=torch.float32, device=device), # float tensor 1.2, # float index torch.tensor( [[1, 0, 1], [1, 0, 1]], dtype=torch.int32, device=device ), # multidimensional tensor ): with self.assertRaises(IndexError): t3d_selected = t3d[invalid_index]
x = self.stage4(x_list) # Upsampling x0_h, x0_w = x[0].size(2), x[0].size(3) x1 = F.interpolate( x[1], size=(x0_h, x0_w), mode='bilinear', align_corners=False) x2 = F.interpolate( x[2], size=(x0_h, x0_w), mode='bilinear', align_corners=False) x3 = F.interpolate( x[3], size=(x0_h, x0_w), mode='bilinear', align_corners=False) x = torch.cat([x[0], x1, x2, x3], 1) x = self.last_layer(x) return x def hrnetv2(pretrained=False, **kwargs): model = HRNetV2(n_class=2, **kwargs) if pretrained: model.load_state_dict(load_url(model_urls['hrnetv2']), strict=False) return model if __name__ == '__main__': model = hrnetv2(pretrained=True) inpu = torch.randn(2, 3, 512, 512) output = model(inpu) print(output.size())
for epoch in range(epoch_num): for batch_idx, data in enumerate(dataloader): batch_size = data[0].size(0) pic_size = 64 # load some real images real_images = data[0].to(device) # feed real images into discriminate net preds = discriminate_net(real_images).view(-1) labels = torch.ones(batch_size).to(device) dloss_real = criterion(preds, labels) dmean_real = preds.sigmoid().mean() # generate some fake images noises = torch.randn(batch_size,pic_size,1,1).to(device) fake_images = generate_net(noises) # grad info not go backward to generate_net # accelerate the training speed fake = fake_images.detach() # feed fake images into discriminate net preds = discriminate_net(fake).view(-1) # torch uses view(-1) while numpy uses reshape(1,-1) labels = torch.zeros(batch_size).to(device) dloss_fake = criterion(preds, labels) dmean_fake = preds.sigmoid().mean() # train the discriminate-net dloss = dloss_real + dloss_fake discriminate_optimizer.zero_grad() dloss.backward() discriminate_optimizer.step()
def _create_mat(self): mat = torch.randn(2, 3, 4, 4) mat = mat @ mat.transpose(-1, -2) mat.div_(5).add_(torch.eye(4).unsqueeze_(0)) return mat
def train(): args = load_args() train_gen = utils.dataset_iterator(args) dev_gen = utils.dataset_iterator(args) torch.manual_seed(1) netG, netD, netE = load_models(args) if args.use_spectral_norm: optimizerD = optim.Adam(filter(lambda p: p.requires_grad, netD.parameters()), lr=2e-4, betas=(0.0,0.9)) else: optimizerD = optim.Adam(netD.parameters(), lr=2e-4, betas=(0.5, 0.9)) optimizerG = optim.Adam(netG.parameters(), lr=2e-4, betas=(0.5, 0.9)) optimizerE = optim.Adam(netE.parameters(), lr=2e-4, betas=(0.5, 0.9)) schedulerD = optim.lr_scheduler.ExponentialLR(optimizerD, gamma=0.99) schedulerG = optim.lr_scheduler.ExponentialLR(optimizerG, gamma=0.99) schedulerE = optim.lr_scheduler.ExponentialLR(optimizerE, gamma=0.99) ae_criterion = nn.MSELoss() one = torch.FloatTensor([1]).cuda() mone = (one * -1).cuda() gen = utils.inf_train_gen(train_gen) for iteration in range(args.epochs): start_time = time.time() """ Update AutoEncoder """ for p in netD.parameters(): p.requires_grad = False netG.zero_grad() netE.zero_grad() _data = next(gen) # real_data = stack_data(args, _data) real_data = _data real_data_v = autograd.Variable(real_data).cuda() encoding = netE(real_data_v) fake = netG(encoding) ae_loss = ae_criterion(fake, real_data_v) ae_loss.backward(one) optimizerE.step() optimizerG.step() """ Update D network """ for p in netD.parameters(): p.requires_grad = True for i in range(5): _data = next(gen) # real_data = stack_data(args, _data) real_data = _data real_data_v = autograd.Variable(real_data).cuda() # train with real data netD.zero_grad() D_real = netD(real_data_v) D_real = D_real.mean() D_real.backward(mone) # train with fake data noise = torch.randn(args.batch_size, args.dim).cuda() noisev = autograd.Variable(noise, volatile=True) fake = autograd.Variable(netG(noisev).data) inputv = fake D_fake = netD(inputv) D_fake = D_fake.mean() D_fake.backward(one) # train with gradient penalty gradient_penalty = ops.calc_gradient_penalty(args, netD, real_data_v.data, fake.data) gradient_penalty.backward() D_cost = D_fake - D_real + gradient_penalty Wasserstein_D = D_real - D_fake optimizerD.step() # Update generator network (GAN) noise = torch.randn(args.batch_size, args.dim).cuda() noisev = autograd.Variable(noise) fake = netG(noisev) G = netD(fake) G = G.mean() G.backward(mone) G_cost = -G optimizerG.step() schedulerD.step() schedulerG.step() schedulerE.step() # Write logs and save samples save_dir = './plots/'+args.dataset plot.plot(save_dir, '/disc cost', D_cost.cpu().data.numpy()) plot.plot(save_dir, '/gen cost', G_cost.cpu().data.numpy()) plot.plot(save_dir, '/w1 distance', Wasserstein_D.cpu().data.numpy()) plot.plot(save_dir, '/ae cost', ae_loss.data.cpu().numpy()) # Calculate dev loss and generate samples every 100 iters if iteration % 100 == 99: dev_disc_costs = [] for i, (images, _) in enumerate(dev_gen): # imgs = stack_data(args, images) imgs = images imgs_v = autograd.Variable(imgs, volatile=True).cuda() D = netD(imgs_v) _dev_disc_cost = -D.mean().cpu().data.numpy() dev_disc_costs.append(_dev_disc_cost) plot.plot(save_dir ,'/dev disc cost', np.mean(dev_disc_costs)) utils.generate_image(iteration, netG, save_dir, args) # utils.generate_ae_image(iteration, netE, netG, save_dir, args, real_data_v) # Save logs every 100 iters if (iteration < 5) or (iteration % 100 == 99): plot.flush() plot.tick() if iteration % 100 == 0: utils.save_model(netG, optimizerG, iteration, 'models/{}/G_{}'.format(args.dataset, iteration)) utils.save_model(netD, optimizerD, iteration, 'models/{}/D_{}'.format(args.dataset, iteration))
def normalized_columns_initializer(weights, std=1.0): out = torch.randn(weights.size()) out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True)) return out
def rand_t(*sz): return torch.randn(sz) / math.sqrt(sz[0])
def sample_noise(self): torch.randn(self.epsilon_out.shape, out=self.epsilon_out) torch.randn(self.epsilon_in.shape, out=self.epsilon_in)
class MyReLU(torch.autograd.Function): @staticmethod def forward(ctx, input): ctx.save_for_backward(input) return input.clamp(min=0) @staticmethod def backward(ctx, grad_output): input, = ctx.saved_tensors grad_input = grad_output.clone() grad_input[input < 0] = 0 return grad_input x = torch.randn(300, 30) y = torch.randn(300, 30) model = TwoLayer(30, 1000, 30) loss_fn = torch.nn.MSELoss(reduction='sum') optim = torch.optim.Adam(model.parameters(), lr=0.001) for i in range(1000): pred = model(x) loss = loss_fn(pred, y) optim.zero_grad() loss.backward() optim.step() if (i % 200 == 0): print(pred, y) print(y)
def initHidden(self, batchsize = 1): return Variable(torch.randn(batchsize, self.hidden_size))
def train(self, loaders): args = self.args nets = self.nets nets_ema = self.nets_ema optims = self.optims # fetch random validation images for debugging fetcher = InputFetcher(loaders.src, loaders.ref, args.latent_dim, 'train') fetcher_val = InputFetcher(loaders.val, None, args.latent_dim, 'val') x_fixed = next(fetcher_val) x_fixed = x_fixed.x_src # resume training if necessary if args.resume_iter > 0: self._load_checkpoint(args.resume_iter) # remember the initial value of ds weight initial_lambda_ds = args.lambda_ds print('Start training...') start_time = time.time() for i in range(args.resume_iter, args.total_iters): # fetch images and labels inputs = next(fetcher) x_real, y_org = inputs.x_src, inputs.y_src x_ref, x_ref2, y_trg = inputs.x_ref, inputs.x_ref2, inputs.y_ref z_trg, z_trg2 = inputs.z_trg, inputs.z_trg2 masks = nets.fan.get_heatmap(x_real) if args.w_hpf > 0 else None # train the discriminator d_loss, d_losses_latent = compute_d_loss( nets, args, x_real, y_org, y_trg, z_trg=z_trg, masks=masks) self._reset_grad() d_loss.backward() optims.discriminator.step() """ Removing Reference based training d_loss, d_losses_ref = compute_d_loss( nets, args, x_real, y_org, y_trg, x_ref=x_ref, masks=masks) self._reset_grad() d_loss.backward() optims.discriminator.step() """ # train the generator g_loss, g_losses_latent = compute_g_loss( nets, args, x_real, y_org, y_trg, z_trgs=[z_trg, z_trg2], masks=masks) self._reset_grad() g_loss.backward() optims.generator.step() optims.mapping_network.step() optims.style_encoder.step() """ Removing reference based training g_loss, g_losses_ref = compute_g_loss( nets, args, x_real, y_org, y_trg, x_refs=[x_ref, x_ref2], masks=masks) self._reset_grad() g_loss.backward() optims.generator.step() """ # compute moving average of network parameters """ moving_average(nets.generator, nets_ema.generator, beta=0.999) moving_average(nets.mapping_network, nets_ema.mapping_network, beta=0.999) moving_average(nets.style_encoder, nets_ema.style_encoder, beta=0.999) """ # decay weight for diversity sensitive loss if args.lambda_ds > 0: args.lambda_ds -= (initial_lambda_ds / args.ds_iter) # print out log info if (i+1) % args.print_every == 0: elapsed = time.time() - start_time elapsed = str(datetime.timedelta(seconds=elapsed))[:-7] log = "Elapsed time [%s], Iteration [%i/%i], " % (elapsed, i+1, args.total_iters) all_losses = dict() for loss, prefix in zip([d_losses_latent, g_losses_latent], ['D/latent_', 'G/latent_']): for key, value in loss.items(): all_losses[prefix + key] = value all_losses['G/lambda_ds'] = args.lambda_ds log += ' '.join(['%s: [%.4f]' % (key, value) for key, value in all_losses.items()]) print(log) """ # generate images for debugging if (i+1) % args.sample_every == 0: os.makedirs(args.sample_dir, exist_ok=True) utils.debug_image(nets_ema, args, inputs=inputs_val, step=i+1) """ if (i+1) % args.sample_every == 0: with torch.no_grad(): x_fake_list = [x_fixed] for j in range(args.num_domains): label = torch.ones((x_fixed.size(0),),dtype=torch.long).to(self.device) label = label*j z = torch.randn((x_fixed.size(0),args.latent_dim)).to(self.device) style = self.nets.mapping_network(z,label) x_fake_list.append(self.nets.generator(x_fixed, style)) x_concat = torch.cat(x_fake_list, dim=3) sample_path = os.path.join('samples', '{}-images.jpg'.format(i+1)) save_image(self.denorm(x_concat.data.cpu()), sample_path, nrow=1, padding=0) print('Saved real and fake images into {}...'.format(sample_path)) # save model checkpoints if (i+1) % args.save_every == 0: self._save_checkpoint(step=i+1) # compute FID and LPIPS if necessary if (i+1) % args.eval_every == 0: calculate_metrics(nets_ema, args, i+1, mode='latent') calculate_metrics(nets_ema, args, i+1, mode='reference')
def norm_col_init(weights, std=1.0): x = torch.randn(weights.size()) x *= std / torch.sqrt((x**2).sum(1, keepdim=True)) return x
def _create_mat(self): mat = torch.randn(4, 4) mat = mat @ mat.transpose(-1, -2) mat.div_(5).add_(torch.eye(4)) return mat
def __init__(self,in_size, out_size): super().__init__() self.weights = nn.Parameter(torch.randn(in_size, out_size)) self.bias = nn.Parameter(torch.zeros(out_size))
def evaluate(experiment_directory, checkpoint_path): chamfer_results = [] specs = ws.load_experiment_specifications(experiment_directory) logging.info("Experiment description: \n" + specs["Description"]) data_source = specs["DataSource"] test_split_file = specs["TestSplit"] num_samp_per_scene = specs["SamplesPerScene"] scene_per_batch = specs["ScenesPerBatch"] clamp_dist = specs["ClampingDistance"] minT = -clamp_dist maxT = clamp_dist enforce_minmax = False num_data_loader_threads =1 # scene_per_subbatch =1 batch_split = 1 scene_per_subbatch = scene_per_batch // batch_split checkpoints = list( range( specs["SnapshotFrequency"], specs["NumEpochs"] + 1, specs["SnapshotFrequency"], ) ) def signal_handler(sig, frame): logging.info("Stopping early...") sys.exit(0) with open(test_split_file,"r") as f: test_split = json.load(f) sdf_dataset = dt_vtk.SDFVTKSamples( data_source, test_split, num_samp_per_scene ) sdf_loader = data_utils.DataLoader( sdf_dataset, batch_size=scene_per_subbatch, shuffle=True, num_workers=num_data_loader_threads, drop_last=True, ) decoder_eval = decoder.Decoder(0, **specs["NetworkSpecs"]).cuda() # for epoch in range(start_epoch, num_epochs + 1): # start = time.time() # logging.info("epoch {}...".format(epoch)) # pdb.set_trace() checkpoint = torch.load(checkpoint_path) decoder_eval.load_state_dict(checkpoint['model_state_dict']) decoder_eval = decoder_eval.float() # decoder_eval.eval() for param in decoder_eval.parameters(): param.requires_grad = False loss_l1 = torch.nn.L1Loss() loss_l2 = torch.nn.MSELoss() loss_log =[] # theta_x = torch.randn(1, requires_grad=True, dtype=torch.float)*3.1415 # theta_y = torch.randn(1, requires_grad=True, dtype=torch.float)*3.1415 # theta_z = torch.randn(1, requires_grad=True, dtype=torch.float)*3.1415 # theta_x = theta_x.float() # theta_y = theta_y.float() # theta_z = theta_z.float() # theta_x.retain_grad() # theta_y.retain_grad() # theta_z.retain_grad() scale_one = torch.randn(1, requires_grad=True, dtype=torch.float) scale_two = torch.randn(1, requires_grad=True, dtype=torch.float) scale_three = torch.randn(1, requires_grad=True, dtype=torch.float) scale_one.retain_grad() scale_two.retain_grad() scale_three.retain_grad() # transform_matrix = torch.zeros(3,3).float().cuda() # transform_matrix.requires_grad_(True) # transform_matrix.retain_grad() transform_inpt = torch.randn(3,3).float().cuda() transform_inpt.requires_grad_(True) transform_inpt.retain_grad() bias = torch.zeros(3).float().cuda() bias.requires_grad_(True) bias.retain_grad() # pdb.set_trace() test_model = np.array(pd.read_csv("../chairs_segdata/points/1a8bbf2994788e2743e99e0cae970928.pts", header=None,sep=" ").values) num_epochs = 500 learning_rate = 1e-3 test_pts = torch.from_numpy(test_model).float() test_pts.requies_grad = False bt_size = 32 num_batches = int(test_model.shape[0]//bt_size) sub = torch.Tensor([1]).cuda() reg = 1 # rot_x = torch.from_numpy(rotate_mat_x()).double().cuda() # rot_x.requires_grad_(True) # rot_y = torch.from_numpy(rotate_mat_y()).double().cuda() # rot_y.requires_grad_(True) # rot_z = torch.from_numpy(rotate_mat_z()).double().cuda() # rot_z.requires_grad_(True) with torch.enable_grad(): for j in range(num_epochs): # pdb.set_trace() # Process the input datag # sdf_data.requires_grad = False # sdf_data = (sdf_data.cuda()).reshape( # num_samp_per_scene * scene_per_subbatch, 4 # ) # xyz = sdf_data[:, 0:3] # transform_matrix_update = torch.add(transform_matrix,bias) batch_loss=0 for i in range(num_batches): test_torch = test_pts[i*bt_size:(i+1)*bt_size,:] # pdb.set_trace() # cosval_x = torch.cos(theta_x) # sinval_x = torch.sin(theta_x) # cosval_x.requires_grad_(True) # sinval_x.requires_grad_(True) # cosval_x.retain_grad() # sinval_x.retain_grad() # rot_x = torch.stack([torch.Tensor([1, 0, 0]), # torch.cat([torch.Tensor([0]), cosval_x, -sinval_x]), # torch.cat([torch.Tensor([0]), sinval_x, cosval_x])], dim=1).float().cuda() # rot_x.requires_grad_(True) # rot_x.retain_grad() # cosval_y = torch.cos(theta_y) # sinval_y = torch.sin(theta_y) # cosval_y.requires_grad_(True) # sinval_y.requires_grad_(True) # cosval_y.retain_grad() # sinval_y.retain_grad() # rot_y = torch.stack([torch.cat([cosval_y, torch.Tensor([0]), sinval_y]), # torch.Tensor([0, 1, 0]), # torch.cat([-sinval_y, torch.Tensor([0]), cosval_y])],dim=1).float().cuda() # rot_y.requires_grad_(True) # rot_y.retain_grad() # cosval_z = torch.cos(theta_z) # sinval_z = torch.sin(theta_z) # cosval_z.requires_grad_(True) # sinval_z.requires_grad_(True) # cosval_z.retain_grad() # sinval_z.retain_grad() # rot_z = torch.stack([torch.cat([cosval_z, -sinval_z, torch.Tensor([0])]), # torch.cat([sinval_z, cosval_z, torch.Tensor([0])]), # torch.Tensor([0, 0, 1])], dim=1).float().cuda() # rot_z.requires_grad_(True) # rot_z.retain_grad() scale_matrix = torch.cat([torch.cat([scale_one,torch.Tensor([0]),torch.Tensor([0])]), torch.cat([torch.Tensor([0]),scale_two,torch.Tensor([0])]), torch.cat([torch.Tensor([0]),torch.Tensor([0]),scale_three])]).view(3,3).float().cuda() # pdb.set_trace() scale_matrix.retain_grad() scale_matrix.requires_grad_(True) # transform_matrix = torch.matmul(torch.matmul(torch.matmul(rot_z,rot_y),rot_x),scale_matrix) transform_matrix = torch.matmul(transform_inpt, scale_matrix) transform_matrix.requires_grad_(True) transform_matrix.retain_grad() xyz = test_torch.cuda() xyz_transform = torch.matmul(xyz, transform_matrix) xyz_transform.requires_grad_(True) xyz_transform.retain_grad() transform_bias = torch.add(xyz_transform, bias).float() transform_bias.retain_grad() # diag_sum = torch.abs(torch.sum(torch.diag(transform_matrix))) # sdf_gt = sdf_data[:, 3].unsqueeze(1) pred_sdf = decoder_eval(transform_bias) # pred_sdf = decoder_eval(xyz_transform) # loss = loss_l1(pred_sdf, sdf_gt) target = torch.zeros(pred_sdf.shape[0],pred_sdf.shape[1]).float().cuda() # batch_loss += loss.item() # pdb.set_trace() diag_sum = torch.norm(torch.sub(torch.diag(scale_matrix),sub),2) diag_sum.retain_grad() diag_sum.requires_grad_(True) # diag_sum = torch.sum(torch.diag(transform_matrix)).cpu() loss1 = loss_l1(pred_sdf,target) loss2 = reg *diag_sum # loss2 = torch.abs(torch.sub(diag_sum,1)) loss = torch.add(loss1,loss2) loss.backward(retain_graph=True) batch_loss+= loss.item() print('Batch Loss {:6.4f}'.format(loss.item())) with torch.no_grad(): # theta_z.data.sub_(theta_z.grad.data*learning_rate) # theta_y.data.sub_(theta_y.data*learning_rate) # theta_x.data.sub_(theta_x.grad.data*learning_rate) bias.data.sub_(bias.grad.data*learning_rate) scale_one.data.sub_(scale_one.grad.data*learning_rate) scale_two.data.sub_(scale_two.grad.data*learning_rate) scale_three.data.sub_(scale_three.grad.data*learning_rate) transform_inpt.data.sub_(transform_inpt.grad.data*learning_rate) # theta_z.grad.data.zero_() # theta_y.grad.data.zero_() # theta_x.grad.data.zero_() bias.grad.data.zero_() scale_one.grad.data.zero_() scale_three.grad.data.zero_() scale_two.grad.data.zero_() scale_matrix.grad.data.zero_() transform_bias.grad.data.zero_() xyz_transform.grad.data.zero_() transform_matrix.grad.data.zero_() transform_inpt.grad.data.zero_() diag_sum.grad.data.zero_() # rot_z.grad.data.zero_() # rot_x.grad.data.zero_() # rot_y.grad.data.zero_() # pdb.set_trace() actual_loss = (batch_loss*bt_size)/(test_model.shape[0]) # print("Loss after {} epoch is {:6.4f}".format(j,batch_loss)) print("Loss after {} epoch is {:6.4f}".format(j,actual_loss)) loss_log.append(actual_loss) pdb.set_trace() fig,ax = plt.subplots() ax.plot(np.arange(num_epochs),loss_log) ax.set(xlabel='iterations',ylabel='transformationloss') plt.savefig('Transformation_loss_new.png') torch.save(transform_matrix,'transform_matrix_new.pt') torch.save(bias,'bias_new.pt') test_pts = torch.from_numpy(pd.read_csv('test_model.pts',header=None, sep=' ').values).cuda() transform_pts = torch.matmul(test_pts, transform_matrix.double()) transform_pts = torch.add(transform_pts, bias.double()).cpu().detach().numpy() np.savetxt('transform_points_new.pts',transform_pts) plot_heatmap(experiment_directory, checkpoint_path)
def test(): net = ResNet18() y = net(torch.randn(1,3,32,32)) print(y.size())
def train(avg_tensor = None, coefs=0): Gs = Generator(startf=64, maxf=512, layer_count=7, latent_size=512, channels=3) # 32->512 layer_count=8 / 64->256 layer_count=7 Gs.load_state_dict(torch.load('./pre-model/cat/cat256_Gs_dict.pth')) Gm = Mapping(num_layers=14, mapping_layers=8, latent_size=512, dlatent_size=512, mapping_fmaps=512) #num_layers: 14->256 / 16->512 / 18->1024 Gm.load_state_dict(torch.load('./pre-model/cat/cat256_Gm_dict.pth')) Gm.buffer1 = avg_tensor E = BE.BE(startf=64, maxf=512, layer_count=7, latent_size=512, channels=3) #E.load_state_dict(torch.load('/_yucheng/myStyle/EAE/result/EB_cars_v1/models/E_model_ep135000.pth')) #E.load_state_dict(torch.load('/_yucheng/myStyle/EAE/result/EB_cat_v1/models/E_model_ep165000.pth')) Gs.cuda() #Gm.cuda() E.cuda() const_ = Gs.const E_optimizer = LREQAdam([{'params': E.parameters()},], lr=0.0015, betas=(0.0, 0.99), weight_decay=0) loss_all=0 loss_mse = torch.nn.MSELoss() loss_lpips = lpips.LPIPS(net='vgg').to('cuda') loss_kl = torch.nn.KLDivLoss() batch_size = 5 const1 = const_.repeat(batch_size,1,1,1) for epoch in range(0,250001): set_seed(epoch%30000) latents = torch.randn(batch_size, 512) #[32, 512] with torch.no_grad(): #这里需要生成图片和变量 w1 = Gm(latents,coefs_m=coefs).to('cuda') #[batch_size,18,512] imgs1 = Gs.forward(w1,6) # 7->512 / 6->256 const2,w2 = E(imgs1.cuda()) imgs2=Gs.forward(w2,6) E_optimizer.zero_grad() #loss1 loss_img_mse = loss_mse(imgs1,imgs2) # loss_img_mse_c1 = loss_mse(imgs1[:,0],imgs2[:,0]) # loss_img_mse_c2 = loss_mse(imgs1[:,1],imgs2[:,1]) # loss_img_mse_c3 = loss_mse(imgs1[:,2],imgs2[:,2]) # loss_img_mse = max(loss_img_mse_c1,loss_img_mse_c2,loss_img_mse_c3) loss_img_lpips = loss_lpips(imgs1,imgs2).mean() y1_imgs, y2_imgs = torch.nn.functional.softmax(imgs1),torch.nn.functional.softmax(imgs2) loss_kl_img = loss_kl(torch.log(y2_imgs),y1_imgs) #D_kl(True=y1_imgs||Fake=y2_imgs) loss_kl_img = torch.where(torch.isnan(loss_kl_img),torch.full_like(loss_kl_img,0), loss_kl_img) loss_kl_img = torch.where(torch.isinf(loss_kl_img),torch.full_like(loss_kl_img,1), loss_kl_img) loss_1 = 17*loss_img_mse + 5*loss_img_lpips + loss_kl_img loss_1.backward(retain_graph=True) E_optimizer.step() #loss2 中等区域 #imgs_column1 = imgs1[:,:,imgs1.shape[2]//20:-imgs1.shape[2]//20,imgs1.shape[3]//20:-imgs1.shape[3]//20] # w,h #imgs_column2 = imgs2[:,:,imgs2.shape[2]//20:-imgs2.shape[2]//20,imgs2.shape[3]//20:-imgs2.shape[3]//20] #loss_img_mse_column = loss_mse(imgs_column1,imgs_column2) #loss_img_lpips_column = loss_lpips(imgs_column1,imgs_column2).mean() # loss_2 = 5*loss_img_mse_column + 3*loss_img_lpips_column # loss_2.backward(retain_graph=True) # E_optimizer.step() #loss3 最小区域 #imgs_center1 = imgs1[:,:,imgs1.shape[2]//10:-imgs1.shape[2]//10,imgs1.shape[3]//10:-imgs1.shape[3]//10] #imgs_center2 = imgs2[:,:,imgs2.shape[2]//10:-imgs2.shape[2]//10,imgs2.shape[3]//10:-imgs2.shape[3]//10] #loss_img_mse_center = loss_mse(imgs_center1,imgs_center2) #loss_img_lpips_center = loss_lpips(imgs_center1,imgs_center2).mean() # imgs_blob1 = imgs1[:,:,924:,924:] # imgs_blob2 = imgs2[:,:,924:,924:] # loss_img_mse_blob = loss_mse(imgs_blob1,imgs_blob2) #loss_3 = 3*loss_img_mse_center + loss_img_lpips_center #+ loss_img_mse_blob #loss_3.backward(retain_graph=True) #loss_x = loss_1+loss_2+loss_3 #loss_x.backward(retain_graph=True) #E_optimizer.step() #loss3_v2, cosine相似性 i1 = imgs1.view(-1) i2 = imgs2.view(-1) loss_cosine_i = i1.dot(i2)/(torch.sqrt(i1.dot(i1))*torch.sqrt(i2.dot(i2))) #loss_cosine_w = w1.dot(w2)/(torch.sqrt(w1.dot(w1))*torch.sqrt(w2.dot(w2))) #loss4 loss_c = loss_mse(const1,const2) #没有这个const,梯度起初没法快速下降,很可能无法收敛, 这个惩罚即乘0.1后,效果大幅提升! loss_c_m = loss_mse(const1.mean(),const2.mean()) loss_c_s = loss_mse(const1.std(),const2.std()) loss_w = loss_mse(w1,w2) loss_w_m = loss_mse(w1.mean(),w2.mean()) #初期一会很大10,一会很小0.0001 loss_w_s = loss_mse(w1.std(),w2.std()) #后期一会很大,一会很小 y1, y2 = torch.nn.functional.softmax(const1),torch.nn.functional.softmax(const2) loss_kl_c = loss_kl(torch.log(y2),y1) loss_kl_c = torch.where(torch.isnan(loss_kl_c),torch.full_like(loss_kl_c,0), loss_kl_c) loss_kl_c = torch.where(torch.isinf(loss_kl_c),torch.full_like(loss_kl_c,1), loss_kl_c) w1_kl, w2_kl = torch.nn.functional.softmax(w1),torch.nn.functional.softmax(w2) loss_kl_w = loss_kl(torch.log(w2_kl),w1_kl) #D_kl(True=y1_imgs||Fake=y2_imgs) loss_kl_w = torch.where(torch.isnan(loss_kl_w),torch.full_like(loss_kl_w,0), loss_kl_w) loss_kl_w = torch.where(torch.isinf(loss_kl_w),torch.full_like(loss_kl_w,1), loss_kl_w) w1_cos = w1.view(-1) w2_cos = w2.view(-1) loss_cosine_w = w1_cos.dot(w2_cos)/(torch.sqrt(w1_cos.dot(w1_cos))*torch.sqrt(w1_cos.dot(w1_cos))) loss_4 = 0.02*loss_c+0.03*loss_c_m+0.03*loss_c_s+0.02*loss_w+0.03*loss_w_m+0.03*loss_w_s+ loss_kl_w + loss_kl_c+loss_cosine_i loss_4.backward(retain_graph=True) E_optimizer.step() loss_all = loss_1 + loss_4 + loss_cosine_i #loss_2 + loss_3 print('i_'+str(epoch)+'--loss_all__:'+str(loss_all.item())+'--loss_mse:'+str(loss_img_mse.item())+'--loss_lpips:'+str(loss_img_lpips.item())+'--loss_kl_img:'+str(loss_kl_img.item())+'--loss_cosine_i:'+str(loss_cosine_i.item())) #print('loss_img_mse_column:'+str(loss_img_mse_column.item())+'loss_img_lpips_column:'+str(loss_img_lpips_column.item())+'--loss_img_mse_center:'+str(loss_img_mse_center.item())+'--loss_lpips_center:'+str(loss_img_lpips_center.item())) print('loss_w:'+str(loss_w.item())+'--loss_w_m:'+str(loss_w_m.item())+'--loss_w_s:'+str(loss_w_s.item())+'--loss_kl_w:'+str(loss_kl_w.item())+'--loss_c:'+str(loss_c.item())+'--loss_c_m:'+str(loss_c_m.item())+'--loss_c_s:'+str(loss_c_s.item())+'--loss_kl_c:'+str(loss_kl_c.item())+'--loss_cosine_w:'+str(loss_cosine_w.item())) print('-') if epoch % 100 == 0: n_row = batch_size test_img = torch.cat((imgs1[:n_row],imgs2[:n_row]))*0.5+0.5 torchvision.utils.save_image(test_img, resultPath1_1+'/ep%d.jpg'%(epoch),nrow=n_row) # nrow=3 with open(resultPath+'/Loss.txt', 'a+') as f: print('i_'+str(epoch)+'--loss_all__:'+str(loss_all.item())+'--loss_mse:'+str(loss_img_mse.item())+'--loss_lpips:'+str(loss_img_lpips.item())+'--loss_kl_img:'+str(loss_kl_img.item())+'--loss_cosine_i:'+str(loss_cosine_i.item()),file=f) #print('loss_img_mse_column:'+str(loss_img_mse_column.item())+'loss_img_lpips_column:'+str(loss_img_lpips_column.item())+'--loss_img_mse_center:'+str(loss_img_mse_center.item())+'--loss_lpips_center:'+str(loss_img_lpips_center.item()),file=f) print('loss_w:'+str(loss_w.item())+'--loss_w_m:'+str(loss_w_m.item())+'--loss_w_s:'+str(loss_w_s.item())+'--loss_kl_w:'+str(loss_kl_w.item())+'--loss_c:'+str(loss_c.item())+'--loss_c_m:'+str(loss_c_m.item())+'--loss_c_s:'+str(loss_c_s.item())+'--loss_kl_c:'+str(loss_kl_c.item())+'--loss_cosine_w:'+str(loss_cosine_w.item()),file=f) if epoch % 5000 == 0: torch.save(E.state_dict(), resultPath1_2+'/E_model_ep%d.pth'%epoch)
import torch from torch import nn class LSTM(nn.Module): def __init__(self): super(LSTM, self).__init__() self.rnn = nn.LSTM(3 * 32 * 32, 8192, batch_first=True) self.classifier = nn.Sequential( nn.Dropout(0.5), nn.Linear(8192, 8), ) def forward(self, x): x = torch.flatten(x, 2) x, _ = self.rnn(x) x = self.classifier(x[:, -1, :]) return x if __name__ == '__main__': model = LSTM() inputs = torch.randn(8, 10, 3, 32, 32) outputs = model(inputs) print(outputs.shape)
def test_compose_fail(self): # Only composing Transform3d objects is possible t1 = Scale(0.1, 0.1, 0.1) with self.assertRaises(ValueError): t1.compose(torch.randn(100))
x = x.view(-1, self.num_flat_features(x)) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x def num_flat_features(self, x): size = x.size()[1:] ##all dimensoins but batch num_features = 1 for s in size: num_features *= s return num_features net = Net() params = list(net.parameters()) input = torch.randn(1, 1, 40, 40) output = net(input) target = torch.randn(10) target = target.view(1, -1) criterion = nn.MSELoss() loss = criterion(output, target) net.zero_grad() # zeroes the gradient buffers of all parameters loss.backward() print(torch.randn(1, 1, 40, 40))
def test_init_with_custom_matrix_errors(self): bad_shapes = [[10, 5, 4], [3, 4], [10, 4, 4, 1], [10, 4, 4, 2], [4, 4, 4, 3]] for bad_shape in bad_shapes: matrix = torch.randn(*bad_shape).float() self.assertRaises(ValueError, Transform3d, matrix=matrix)
kernel_size=settings["kernel_size"], padding=settings["padding"], bias=settings["bias"])) def convlayer2(conv_cls, settings): return extend( conv_cls(in_channels=settings["in_features"][0], out_channels=settings["out_channels"], kernel_size=settings["kernel_size"], padding=settings["padding"], bias=settings["bias"])) input_size = (TEST_SETTINGS["batch"], ) + TEST_SETTINGS["in_features"] X = torch.randn(size=input_size) def convearlayer(settings): return extend( torch.nn.Linear(in_features=np.prod( [f - settings["padding"][0] for f in settings["in_features"]]) * settings["out_channels"], out_features=1)) def make_regression_problem(conv_cls, act_cls): model = torch.nn.Sequential(convlayer(conv_cls, TEST_SETTINGS), act_cls(), Flatten(), convearlayer(TEST_SETTINGS)) Y = torch.randn(size=(model(X).shape[0], 1))
nn.Dropout(), nn.Linear(LinearSize1, LinearSize2), nn.ReLU(True), nn.Dropout(), nn.Linear(LinearSize2, 1), ) if isFreeze: for param in self.features.parameters(): if isFreeze: param.requires_grad = False else: param.requires_grad = True def forward(self, x): x = self.features(x) x = x.view(x.size(0), -1) x = self.classifier(x) return x def get_name(self): return self.__class__.__name__ if __name__ == '__main__': test_input = torch.randn([1, 3, 224, 224]) compNet = FullVggCompositionNet() test_input = Variable(test_input) output = compNet(test_input) print("DEBUG")
# interval3 = Interval(8,10) # interval4 = Interval(15,18) nums = [2, -4, 5] so = Solution() # print(so.search(nums, 3)) letter = ["abbcbda", "cbdaaa", "b", "dadaaad", "dccbbbc", "dccadd", "ccbdbc", "bbca", "bacbcdd", "a", "bacb", "cbc", "adc", "c", "cbdbcad", "cdbab", "db", "abbcdbd", "bcb", "bbdab", "aa", "bcadb", "bacbcb", "ca", "dbdabdb", "ccd", "acbb", "bdc", "acbccd", "d", "cccdcda", "dcbd", "cbccacd", "ac", "cca", "aaddc", "dccac", "ccdc", "bbbbcda", "ba", "adbcadb", "dca", "abd", "bdbb", "ddadbad", "badb", "ab", "aaaaa", "acba", "abbb"] # print(so.divide(10, 4)) # print(so.getSum(-1, 1)) a = "acaa" print(a[0:4]) print(so.wordBreak("acaaaaabbbdbcccdcdaadcdccacbcccabbbbcdaaaaaadb", letter)) import torch x = torch.randn(3, requires_grad=True) print(x) y = x * 2 while y.data.norm() < 1000: print(y.data) print() print(y.data.norm()) y = y * 2 print(y)
def encode(self, images): hiddens = self.enc(images) noise = Variable( torch.randn(hiddens.size()).cuda(hiddens.data.get_device())) return hiddens, noise
def sample_noise(self): torch.randn(self.epsilon_w.shape, out=self.epsilon_w) torch.randn(self.epsilon_b.shape, out=self.epsilon_b)