Ejemplo n.º 1
0
 def test_MultivariateNormalQMCEngineDegenerate(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         # X, Y iid standard Normal and Z = X + Y, random vector (X, Y, Z)
         mean = torch.zeros(3, device=device, dtype=dtype)
         cov = torch.tensor(
             [[1, 0, 1], [0, 1, 1], [1, 1, 2]], device=device, dtype=dtype
         )
         engine = MultivariateNormalQMCEngine(mean=mean, cov=cov, seed=12345)
         samples = engine.draw(n=2000)
         self.assertEqual(samples.dtype, dtype)
         self.assertEqual(samples.device.type, device.type)
         self.assertTrue(torch.all(torch.abs(samples.mean(dim=0)) < 1e-2))
         self.assertTrue(torch.abs(torch.std(samples[:, 0]) - 1) < 1e-2)
         self.assertTrue(torch.abs(torch.std(samples[:, 1]) - 1) < 1e-2)
         self.assertTrue(torch.abs(torch.std(samples[:, 2]) - math.sqrt(2)) < 1e-2)
         for i in (0, 1, 2):
             _, pval = shapiro(samples[:, i].cpu().numpy())
             self.assertGreater(pval, 0.9)
         cov = np.cov(samples.cpu().numpy().transpose())
         self.assertLess(np.abs(cov[0, 1]), 1e-2)
         self.assertLess(np.abs(cov[0, 2] - 1), 1e-2)
         # check to see if X + Y = Z almost exactly
         self.assertTrue(
             torch.all(
                 torch.abs(samples[:, 0] + samples[:, 1] - samples[:, 2]) < 1e-5
             )
         )
Ejemplo n.º 2
0
def MVNError(output, gt):
    outMean = torch.mean(output)
    outStd = torch.std(output)
    output = (output - outMean)/outStd
    gtMean = torch.mean(gt)
    gtStd = torch.std(gt)
    gt = (gt - gtMean)/gtStd
    d = output - gt
    diff = torch.sqrt(torch.mean(d * d))
    return diff
Ejemplo n.º 3
0
    def apply_global_reward(self, rewards: torch.Tensor, next_iteration: int):
        std_dev = torch.std(rewards)
        if torch.abs(std_dev) > 1e-6:
            normalized_rewards = (rewards - torch.mean(rewards)) / std_dev
            for parent_tensor in self.parent_tensors.values():
                parent_tensor.grad.zero_()
            for i, individual in enumerate(self.population_tensors):
                for tensor_name, parent_tensor in self.parent_tensors.items():
                    individual_tensor = individual[tensor_name]

                    # Subtract the parent to get the gradient estimate
                    individual_tensor.sub_(parent_tensor)

                    # Amplify the gradient by the reward
                    individual_tensor.mul_(normalized_rewards[i])

                    # Divide by a normalizing constant
                    individual_tensor.div_(
                        self.es_params.population_size
                        * self.es_params.mutation_power
                        * -1
                    )

                    parent_tensor.grad += individual_tensor
            self.optimizer.step()

        self.populate_children(next_iteration)
def prepare_model():
    since = time.time()

    num_epochs = 1
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:

            mean = torch.zeros(3)
            std = torch.zeros(3)
            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data
                now_batch_size,c,h,w = inputs.shape
                mean += torch.sum(torch.mean(torch.mean(inputs,dim=3),dim=2),dim=0)
                std += torch.sum(torch.std(inputs.view(now_batch_size,c,h*w),dim=2),dim=0)
                
            print(mean/dataset_sizes['train'])
            print(std/dataset_sizes['train'])

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    return 
        def get_image_std_dev(img_tensor):
            shape = img_tensor.shape
            img_height = shape[1]
            img_width = shape[2]

            v = img_tensor.view(-1, img_height * img_width)
            std_dev = torch.std(v, 1)
            return std_dev
Ejemplo n.º 6
0
    def forward(self, z):
        if z.size(1) == 1:
            return z

        mu = torch.mean(z, keepdim=True, dim=-1)
        sigma = torch.std(z, keepdim=True, dim=-1)
        ln_out = (z - mu.expand_as(z)) / (sigma.expand_as(z) + self.eps)
        ln_out = ln_out * self.a_2.expand_as(ln_out) + self.b_2.expand_as(ln_out)

        return ln_out
Ejemplo n.º 7
0
 def test_importance_prior(self):
     posterior = pyro.infer.Importance(self.model, guide=None, num_samples=10000)
     marginal = pyro.infer.Marginal(posterior)
     posterior_samples = [marginal() for i in range(1000)]
     posterior_mean = torch.mean(torch.cat(posterior_samples))
     posterior_stddev = torch.std(torch.cat(posterior_samples), 0)
     self.assertEqual(0, torch.norm(posterior_mean - self.mu_mean).data[0],
                      prec=0.01)
     self.assertEqual(0, torch.norm(posterior_stddev - self.mu_stddev).data[0],
                      prec=0.1)
Ejemplo n.º 8
0
 def forward(self, z):
     if z.size(1) == 1:
         return z
     mu = torch.mean(z, dim=1)
     sigma = torch.std(z, dim=1)
     # HACK. PyTorch is changing behavior
     if mu.dim() == 1:
         mu = mu.unsqueeze(1)
         sigma = sigma.unsqueeze(1)
     ln_out = (z - mu.expand_as(z)) / (sigma.expand_as(z) + self.eps)
     ln_out = ln_out.mul(self.a_2.expand_as(ln_out)) \
              + self.b_2.expand_as(ln_out)
     return ln_out
Ejemplo n.º 9
0
def reinforce_baseline(surrogate, x, logits, mixtureweights, k=1, get_grad=False):
    B = logits.shape[0]
    probs = torch.softmax(logits, dim=1)
    outputs = {}

    cat = Categorical(probs=probs)

    grads =[]
    # net_loss = 0
    for jj in range(k):

        cluster_H = cat.sample()
        outputs['logq'] = logq = cat.log_prob(cluster_H).view(B,1)
        outputs['logpx_given_z'] = logpx_given_z = logprob_undercomponent(x, component=cluster_H)
        outputs['logpz'] = logpz = torch.log(mixtureweights[cluster_H]).view(B,1)
        logpxz = logpx_given_z + logpz #[B,1]

        surr_pred = surrogate.net(x)

        outputs['f'] = f = logpxz - logq - 1. 
        # outputs['net_loss'] = net_loss = net_loss - torch.mean((f.detach() ) * logq)
        outputs['net_loss'] = net_loss = - torch.mean((f.detach() - surr_pred.detach()) * logq)
        # net_loss += - torch.mean( -logq.detach()*logq)

        # surr_loss = torch.mean(torch.abs(f.detach() - surr_pred))

        grad_logq =  torch.autograd.grad([torch.mean(logq)], [logits], create_graph=True, retain_graph=True)[0]
        surr_loss = torch.mean(((f.detach() - surr_pred) * grad_logq )**2)

        if get_grad:
            grad = torch.autograd.grad([net_loss], [logits], create_graph=True, retain_graph=True)[0]
            grads.append(grad)

    # net_loss = net_loss/ k

    if get_grad:
        grads = torch.stack(grads)
        # print (grads.shape)
        outputs['grad_avg'] = torch.mean(torch.mean(grads, dim=0),dim=0)
        outputs['grad_std'] = torch.std(grads, dim=0)[0]

    outputs['surr_loss'] = surr_loss
    # return net_loss, f, logpx_given_z, logpz, logq
    return outputs
    def test_reinforce(self):
        phi0, optimizer, bern_experiment = self.set_params()

        # true gradient
        loss = bern_experiment.get_full_loss()
        loss.backward()

        true_grad = deepcopy(bern_experiment.var_params['phi'].grad)
        print('true_grad', true_grad.numpy())

        # analytically integrate reinforce gradient
        bern_experiment.set_var_params(deepcopy(phi0))
        optimizer.zero_grad()
        ps_loss = bern_experiment.get_pm_loss(alpha = 0.0, topk = 8,
                                                use_baseline = False)

        ps_loss.backward()
        reinforce_analytic_grad = deepcopy(bern_experiment.var_params['phi'].grad)
        print('reinforce_analytic_grad', reinforce_analytic_grad.numpy())

        assert reinforce_analytic_grad == true_grad

        # check sampling error
        n_samples = 10000
        reinforce_grads = bern_lib.sample_bern_gradient(phi0, bern_experiment,
                                          topk = 0,
                                          alpha = 0.,
                                          use_baseline = True,
                                          n_samples = n_samples)

        mean_reinforce_grad = torch.mean(reinforce_grads).numpy()
        std_reinforce_grad = (torch.std(reinforce_grads).numpy() / np.sqrt(n_samples))

        print('mean_reinforce_grad, ', mean_reinforce_grad)
        print('tol ', 3 * std_reinforce_grad)

        assert np.abs(true_grad.numpy() - mean_reinforce_grad) < \
                        (3 * std_reinforce_grad)
def standardize(data):
    '''
    Standardize the input data of the network
    :param data to be standardized (size nb_batches x WIDTH x HEIGHT x number of channels) 
    
    returns data standardized size nb_batches x WIDTH x HEIGHT x number of channels 
    
    '''

    WIDTH=data.shape[1]
    HEIGHT=data.shape[2]
    channels=data.shape[3]
    
    
    mean_t=torch.mean(data.view(len(data)*WIDTH*HEIGHT,channels),0)
    std_t=torch.std(data.view(len(data)*WIDTH*HEIGHT,channels), 0)
    data=(data-mean_t)/std_t

    #For normalization 
    min_t=torch.min(data.view(len(data)*WIDTH*HEIGHT,channels), 0)
    max_t=torch.max(data.view(len(data)*WIDTH*HEIGHT,channels), 0)
    data=(data-min_t[0])/((max_t[0]-min_t[0]))

    return data
Ejemplo n.º 12
0
 def FeatureStd(self,input):
     b,c,h,w = input.size()
     f = input.view(b,c,h*w) # bxcx(hxw)
     return torch.std(f, dim=2)
Ejemplo n.º 13
0
    def pixel_fit_image(self,
                        im3d,
                        sS=3.,
                        ss=1.5,
                        th_brightness=5,
                        plt_val=False):
        self.ss, self.sS = ss, sS
        self.th_brightness = th_brightness
        g_cutoff = 2.5
        input_ = torch.tensor([[im3d]]).cuda()
        ### compute the big gaussian filter ##########
        gaussian_kernel_ = gaussian_kernel(sxyz=[sS, sS, sS], cut_off=g_cutoff)
        ksz = len(gaussian_kernel_)
        gaussian_kernel_ = torch.FloatTensor(gaussian_kernel_).cuda().view(
            1, 1, ksz, ksz, ksz)
        #gaussian_kernel_ = gaussian_kernel_.repeat(channels, 1, 1, 1)
        gfilt_big = DataParallel(
            nn.Conv3d(1, 1, ksz, stride=1, padding=0, bias=False)).cuda()
        gfilt_big.module.weight.data = gaussian_kernel_
        gfilt_big.module.weight.requires_grad = False
        gfit_big_ = gfilt_big(pd.ReplicationPad3d(int(ksz / 2.))(input_))

        ### compute the small gaussian filter ##########
        gaussian_kernel_ = gaussian_kernel(sxyz=[1, ss, ss], cut_off=2.5)
        ksz = len(gaussian_kernel_)
        gaussian_kernel_ = torch.FloatTensor(gaussian_kernel_).cuda().view(
            1, 1, ksz, ksz, ksz)
        #gaussian_kernel_ = gaussian_kernel_.repeat(channels, 1, 1, 1)
        gfilt_sm = DataParallel(
            nn.Conv3d(1, 1, ksz, stride=1, padding=0, bias=False)).cuda()
        gfilt_sm.module.weight.data = gaussian_kernel_
        gfilt_sm.module.weight.requires_grad = False
        gfilt_sm_ = gfilt_sm(pd.ReplicationPad3d(int(ksz / 2.))(input_))

        ### compute the maximum filter ##########
        ksize_max = 3  #local maximum in 3x3x3 range
        max_filt = DataParallel(
            nn.MaxPool3d(ksize_max,
                         stride=1,
                         padding=int(ksize_max / 2),
                         return_indices=False)).cuda()
        local_max = max_filt(gfilt_sm_) == gfilt_sm_

        g_dif = torch.log(gfilt_sm_) - torch.log(gfit_big_)
        std_ = torch.std(g_dif)
        inds = torch.nonzero((g_dif > std_ * th_brightness) * local_max)
        zxyhf = np.array([[], [], [], []]).T
        zf, xf, yf, hf = zxyhf.T
        if len(inds):
            brightness = g_dif[inds[:, 0], inds[:, 1], inds[:, 2], inds[:, 3],
                               inds[:, 4]]
            # bring back to CPU
            torch.cuda.empty_cache()
            zf, xf, yf = inds[:, -3:].cpu().numpy().T
            hf = brightness.cpu().numpy()
            zxyhf = np.array([zf, xf, yf, hf]).T

        if plt_val:
            plt.figure()
            plt.scatter(yf, xf, s=150, facecolor='none', edgecolor='r')
            plt.imshow(np.max(im3d, axis=0), vmax=2)
            plt.show()
        return zxyhf
Ejemplo n.º 14
0
def relax(step, surrogate, x, logits, mixtureweights, k=1, get_grad=False):




    outputs = {}
    B = logits.shape[0]
    C = logits.shape[1]

    grads =[]
    for jj in range(k):

        b, logq, cz, cz_tilde, z, z_tilde, gumbels, u = sample_relax(x, logits, surrogate)
        # print (b)
        # print (b.shape)
        # fsadfa
        surr_pred_z = cz
        surr_pred_z_tilde = cz_tilde

        logpx_given_z = logprob_undercomponent(x, component=b)
        logpz = torch.log(mixtureweights[b]).view(B,1)
        logpxz = logpx_given_z + logpz #[B,1]
        # print(logpxz.shape, logpz.shape)
        # fsdf


        
        

        #Encoder loss
        # warmup = np.minimum( (step+1) / 50000., 1.)
        # warmup = .0001
        warmup = 1.

        # f = logpxz - logq - 1.
        # net_loss = - torch.mean(   warmup*((f.detach() - surr_pred_z_tilde.detach()) * logq)  +  surr_pred_z - surr_pred_z_tilde )

        f = logpxz - logq 
        net_loss = - torch.mean( (f.detach() - surr_pred_z_tilde.detach()) * logq - logq +  surr_pred_z - surr_pred_z_tilde )

        if (net_loss != net_loss).any():
            print ('net_loss', net_loss)
            print ('f', f)
            print ('logpxz', logpxz)
            print ('logq', logq)
            print ('surr_pred_z_tilde', surr_pred_z_tilde)
            print ('surr_pred_z', surr_pred_z)
            print ('logits', logits)
            print ((logits != logits).any())
            print ((1/logits != 1/logits).any())
            print ('gumbels', gumbels)
            print ((gumbels != gumbels).any())
            print ((1/gumbels != 1/gumbels).any())
            print ('z', z)
            print ((z != z).any())
            print ((1./z != 1./z).any())
            print ('u', u)

            # print (z)
            # print (probs)
            # print (gumbels)
            fasdfas

        if get_grad:
            grad = torch.autograd.grad([net_loss], [logits], create_graph=True, retain_graph=True)[0]
            grads.append(grad)
            surr_dif = torch.mean(torch.abs(f.detach() - surr_pred_z_tilde))
            surr_dif2 = torch.mean(f.detach() - surr_pred_z_tilde)


        else:
            # #Surrogate loss
            # grad_logq =  torch.mean( torch.autograd.grad([torch.mean(logq)], [logits], create_graph=True, retain_graph=True)[0], dim=1, keepdim=True)
            # grad_surr_z = torch.mean( torch.autograd.grad([torch.mean(surr_pred_z)], [logits], create_graph=True, retain_graph=True)[0], dim=1, keepdim=True)
            # grad_surr_z_tilde = torch.mean( torch.autograd.grad([torch.mean(surr_pred_z_tilde)], [logits], create_graph=True, retain_graph=True)[0], dim=1, keepdim=True)

            # print (f.shape, surr_pred_z_tilde.shape, grad_logq.shape, grad_surr_z.shape, grad_surr_z_tilde.shape)
            # fasdfdas
            # print (grad_surr_z_tilde)
            # fsfa

            grad_logq =  torch.autograd.grad([torch.mean(logq)], [logits], create_graph=True, retain_graph=True)[0]
            grad_surr_z =  torch.autograd.grad([torch.mean(surr_pred_z)], [logits], create_graph=True, retain_graph=True)[0]
            grad_surr_z_tilde = torch.autograd.grad([torch.mean(surr_pred_z_tilde)], [logits], create_graph=True, retain_graph=True)[0]
            grad_path = torch.autograd.grad([torch.mean(surr_pred_z - surr_pred_z_tilde)], [logits], create_graph=True, retain_graph=True)[0]
            # surr_loss = torch.mean(((f.detach() - surr_pred_z_tilde) * grad_logq - grad_logq  + grad_surr_z - grad_surr_z_tilde)**2)  

            # net_loss2 = torch.mean( (f.detach() - surr_pred_z_tilde) * logq - logq +  surr_pred_z - surr_pred_z_tilde )
            # grad = torch.autograd.grad([net_loss2], [logits], create_graph=True, retain_graph=True)[0]
            # surr_loss = torch.mean(grad**2)

            surr_loss = torch.mean(torch.abs(f.detach() - surr_pred_z_tilde))

            surr_dif = torch.mean(torch.abs(f.detach() - surr_pred_z_tilde))
            surr_dif2 = torch.mean(f.detach() - surr_pred_z_tilde)
            # surr_loss = surr_dif

            if (surr_loss != surr_loss).any():
                # print ('net_loss', net_loss)
                # print ('surr_loss', surr_loss)
                # print ('f', f)
                # print ('logpxz', logpxz)
                # print ('logq', logq)
                # print ('surr_pred_z_tilde', surr_pred_z_tilde)
                # print ('surr_pred_z', surr_pred_z)

                # print (z)
                # print (probs)
                # print (gumbels)
                print ('grad_logq', grad_logq)
                print ((grad_logq != grad_logq).any())
                print ('grad_surr_z', grad_surr_z)
                print ((grad_surr_z != grad_surr_z).any())
                print ('grad_surr_z_tilde', grad_surr_z_tilde)
                print ((grad_surr_z_tilde != grad_surr_z_tilde).any())
                print (logits)
                print (z_tilde)
                print ((z_tilde != z_tilde).any())
                print (torch.max(z_tilde))
                print (torch.min(z_tilde))
                aaa = torch.autograd.grad([torch.mean(z_tilde)], [logits], create_graph=True, retain_graph=True)[0]
                print ((aaa != aaa).any())
                print (torch.min(torch.exp(logits)))
                print (torch.max(torch.exp(logits)))


                fasdfas



    
    outputs['net_loss'] = net_loss
    outputs['f'] = f
    outputs['logpx_given_z'] = logpx_given_z
    outputs['logpz'] = logpz
    outputs['logq'] = logq


    if get_grad:
        grads = torch.stack(grads)
        # print (grads.shape)
        outputs['grad_avg'] = torch.mean(grads, dim=0)
        outputs['grad_std'] = torch.std(grads, dim=0)[0]
        outputs['surr_dif'] = surr_dif   
        outputs['surr_dif2'] = surr_dif2   
    else:
        outputs['surr_loss'] = surr_loss
        outputs['surr_dif'] = surr_dif   
        outputs['surr_dif2'] = surr_dif2   
        outputs['grad_logq'] = torch.abs(grad_logq)  
        outputs['grad_surr_z'] = torch.abs(grad_surr_z  ) 
        outputs['grad_surr_z_tilde'] = torch.abs(grad_surr_z_tilde )  
        outputs['grad_path'] = torch.abs(grad_path )  
        outputs['grad_score'] = torch.abs(grad_logq*(f.detach() - surr_pred_z_tilde.detach()))  

    # return net_loss, f, logpx_given_z, logpz, logq, surr_loss, surr_dif
    return outputs
Ejemplo n.º 15
0
    if i + 10 > len(graph_samples):
        train_ind = train_ind + list(range(i, len(graph_samples)))
    else:
        train_ind = train_ind + list(range(i, i + 8))
        validation_ind.append(i + 8)
        test_ind.append(i + 9)

with open(TARGET_FILE, "r") as t:
    target = torch.as_tensor([torch.tensor([float(v)])
                              for v in t.readlines()][:N_SAMPLES])

# Compute STD and MEAN only on training data
target_mean, target_std = 0, 1
if NORMALIZE_DATA:
    training_target = torch.tensor([target[i] for i in train_ind])
    target_std = torch.std(training_target, dim=0)
    target_mean = torch.mean(training_target, dim=0)
    target = ((target - target_mean) / target_std).reshape(shape=(len(target),
                                                                  1))

    columns = [[samples[i] for samples in graph_samples]
               for i in range(len(graph_samples[0]))]
    normalized_columns = [normalize(column, train_ind) for column in columns]
    graph_samples = [[column[i] for column in normalized_columns]
                     for i in range(len(graph_samples))]

dataset = []
for i, samples in enumerate(graph_samples):
    dataset.append([
        Data(x=sample[0],
             edge_index=sample[1],
Ejemplo n.º 16
0
 def compute_mean_std(tensor):
     # can't compute mean of integral tensor
     tensor = tensor.to(torch.double)
     mean = torch.mean(tensor)
     std = torch.std(tensor)
     return {"mean": mean, "std": std}
Ejemplo n.º 17
0
def test_single_dendrite_single_input_single_output_single_trial():

    params = {
        "seed": SEED,
        "in_features": [1],
        "out_features": 1,
    }

    np.random.seed(params["seed"])
    torch.manual_seed(params["seed"])

    wE = torch.Tensor([[[1.2]]])
    wI = torch.Tensor([[[0.7]]])

    model = FeedForwardCell(params["in_features"], params["out_features"])
    u_in = torch.Tensor([[model.EL + 10.0]])

    # hand-crafted solution
    r_in = model.f(u_in)
    gffd_target = model.gL0 + torch.mm(r_in, wE[0]) + torch.mm(r_in, wI[0])
    uffd_target = (model.gL0 * model.EL + torch.mm(r_in, wE[0]) * model.EE +
                   torch.mm(r_in, wI[0]) * model.EI) / gffd_target
    g0_target = model.gL0 + model.gc * gffd_target / (gffd_target + model.gc)
    u0_target = (model.gL0 * model.EL + model.gc * gffd_target /
                 (gffd_target + model.gc) * uffd_target) / g0_target

    # model solution
    model.set_weightsE(0, wE[0])
    model.set_weightsI(0, wI[0])
    gffd, uffd = model.compute_gffd_and_uffd(u_in)
    g0, u0 = model(u_in)

    assert gffd.shape == (1, params["out_features"],
                          len(params["in_features"]))
    assert uffd.shape == (1, params["out_features"],
                          len(params["in_features"]))
    assert gffd_target.shape == (1, params["out_features"])
    assert uffd_target.shape == (1, params["out_features"])
    assert gffd[0, 0, 0].tolist() == pytest.approx(gffd_target[0, 0].tolist())
    assert uffd[0, 0, 0].tolist() == pytest.approx(uffd_target[0, 0].tolist())

    assert g0.shape == (1, params["out_features"])
    assert u0.shape == (1, params["out_features"])
    assert g0.shape == g0_target.shape
    assert u0.shape == u0_target.shape
    assert g0[0, 0].tolist() == pytest.approx(g0_target[0, 0].tolist())
    assert u0[0, 0].tolist() == pytest.approx(u0_target[0, 0].tolist())

    # test sampling
    lambda_e = 1.67
    model.lambda_e = lambda_e
    n_samples = 5000
    u0_sample = torch.empty(n_samples, model.out_features)
    for i in range(n_samples):
        u0_sample[i] = model.sample(g0, u0)

    assert torch.mean(u0_sample).item() == pytest.approx(u0_target.item(),
                                                         rel=0.0001)
    assert torch.std(u0_sample).item() == pytest.approx(torch.sqrt(
        lambda_e / g0_target).item(),
                                                        rel=0.01)

    # test energy
    u0_target = u0.clone() + 5.0
    g0, u0 = model(u_in)
    p_expected = torch.sqrt(g0 / (2 * math.pi * model.lambda_e)) * torch.exp(
        -g0 / (2. * model.lambda_e) * (u0_target - u0)**2)
    assert model.energy_target(
        u0_target, g0,
        u0).item() == pytest.approx(-torch.log(p_expected).item())

    # test loss
    assert model.loss_target(u0_target, g0, u0).item() == pytest.approx(
        0.5 * (u0_target - u0).item()**2)
Ejemplo n.º 18
0
data = loadmat('patches_train_test_val_64.mat')
#data = loadmat('patches_448_576.mat')
train = data['patches_train']
val = data['patches_val']
test = data['patches_test']

train = torch.from_numpy(train)
val = torch.from_numpy(val)
test = torch.from_numpy(test)

train_gt_depths = train.float().to(device).requires_grad_(True)
val_gt_depths = val.float().to(device).requires_grad_(False)
test_gt_depths = test.float().to(device).requires_grad_(False)

train_gt_depths_mean = torch.mean(train_gt_depths)
train_gt_depths_std = torch.std(train_gt_depths)

train_normalized_gt_depths = (train_gt_depths -
                              train_gt_depths_mean) / train_gt_depths_std
val_normalized_gt_depths = (val_gt_depths -
                            train_gt_depths_mean) / train_gt_depths_std
test_normalized_gt_depths = (test_gt_depths -
                             train_gt_depths_mean) / train_gt_depths_std

print("DATA IMPORTED")

with torch.autograd.detect_anomaly():
    iteration = 0
    increased = 0
    patience = 50
    train_batch_size = 32
Ejemplo n.º 19
0
    def training_step(self, batch, batch_nb):
        coords, rgb_vals, imgs = batch
        batch_size = coords.shape[0]

        embedding, siren_weights, siren_biases, pred = self(imgs, coords)
        self._log_loss("train", pred, rgb_vals)

        self.last_logits = pred

        loss = 0

        siren_loss = self.siren_loss_fn(pred, rgb_vals)
        loss += siren_loss

        # Regularization encourages a gaussian prior on embedding from context encoder
        if self.encoder_cfg.get("loss_weight"):
            embedding_reg = (
                self.encoder_cfg["loss_weight"] * (embedding * embedding).mean()
            )
            loss += embedding_reg

        # Regularization encourages a lower frequency representation of the image
        # Not sure i believe that, but its what the paper says.
        # if self.hyper_cfg.get("loss_weight"):
        #    n_params = sum([w.shape[-1] * w.shape[-2] for w in siren_weights])
        #    cum_mag = sum([torch.sum(w * w, dim=(-1, -2)) for w in siren_weights])
        #    hyper_reg = self.hyper_cfg["loss_weight"] * (cum_mag / n_params).mean()
        #    loss += hyper_reg

        # The variance of each predicted layers should be approximately equal to
        # initialization for well behaved training and to avoid vanishing
        # gradients.
        # First Layer:    np.sqrt(6 / num_input) / self.frequency,
        #     This would be similar to:
        #              = sqrt(2/3) / (self.frequency * sqrt(num_input))
        # Rest:           m.weight.uniform_(-1 / num_input, 1 / num_input)

        if self.hyper_cfg.get("loss_weight"):
            hyper_reg = 0
            w = siren_weights[0]
            fan_in = w.shape[-1]
            # Empirically, the trained network had just under twice this std
            expected_std_first = torch.tensor(1 / (3 * fan_in)).to(w.device)
            actual_std_first = torch.std(w)
            actual_mean_first = torch.mean(w)

            hyper_loss_std_layer_0 = F.mse_loss(expected_std_first, actual_std_first)
            hyper_reg += hyper_loss_std_layer_0
            hyper_loss_mean_layer_0 = (
                actual_mean_first * actual_mean_first
            )  # Maybe these should be weighted.
            hyper_reg += hyper_loss_mean_layer_0

            self.log("hyper_loss_std_layer_0", hyper_loss_std_layer_0)
            self.log("hyper_loss_mean_layer_0", hyper_loss_mean_layer_0)

            for i, w in enumerate(siren_weights[1:]):
                fan_in = w.shape[-1]
                # Assumes the 30 w0 frequency
                # This 2 is just here because impirically i saw that trained weights ha
                # TODO: maybe multiply this std by 2. Empirically, trained networks had twice the std
                expected_std = torch.tensor(sqrt(6) / 3 / (30 * sqrt(fan_in))).to(
                    w.device
                )
                actual_std = torch.std(w)
                actual_mean = torch.mean(w)

                hyper_reg_loss_std = F.mse_loss(expected_std, actual_std)
                hyper_reg_loss_mean = (
                    actual_mean * actual_mean
                )  # Maybe these should be weighted.
                self.log(f"hyper_loss_std_layer_{i}", hyper_reg_loss_std)
                self.log(f"hyper_loss_mean_layer_{i}", hyper_reg_loss_mean)

                hyper_reg += hyper_reg_loss_std
                hyper_reg += hyper_reg_loss_mean
            self.log("hyper_reg", hyper_reg)
            loss += hyper_reg

        self._log_common("train", pred, rgb_vals, loss)

        return siren_loss
Ejemplo n.º 20
0
def global_std_pool2d(x):
    """2D global standard variation pooling"""
    return torch.std(x.view(x.size()[0],
                            x.size()[1], -1, 1),
                     dim=2,
                     keepdim=True)
 def __call__(self, spec):
     log_mel = torch.log(torch.clamp(spec, min=1e-18))
     mean = torch.mean(log_mel, dim=1, keepdim=True)
     std = torch.std(log_mel, dim=1, keepdim=True) + 1e-5
     log_mel = (log_mel - mean) / std
     return log_mel
Ejemplo n.º 22
0
ln_3 = nn.LayerNorm([2,2])
ln_4 = nn.LayerNorm([2,2,2])
ln_5 = nn.InstanceNorm3d(1)
# ln = nn.LayerNorm([2])
# ln = nn.LayerNorm(2)
# nn.init.constant_(ln_1.weight,1)
# nn.init.constant_(ln_1.bias,0)
# a = torch.Tensor([1,2,3,4]).reshape(1,1,2,2)
a = torch.Tensor([1,2,3,4,5,6,7,8]).reshape(1,2,2,2)
b = torch.Tensor([1,2,3,4,5,6,7,8]).reshape(2,2,2)
c = torch.Tensor([1,2,3,4,5,6,7,8]).reshape(1,2,2,2)
d = torch.Tensor([1,2,3,4,5,6,7,8]).reshape(1,1,2,2,2)
x = np.array([1.,2,3])
m1 = torch.mean(a)
m2 = torch.mean(b)
a1 = torch.std(a,unbiased=False)
b1 = torch.std(b,unbiased=False)


ln1 = ln_1(a)
ln2 = ln_2(b)
ln3= ln_3(c)
ln4 = ln_4(c)
ln5 = ln_5(d)
print(ln1)
print("1*******")
print(ln2)
print("2*******")
print(ln3)
print("3*******")
print(ln4)
Ejemplo n.º 23
0
def _get_meanstd(dataset):
    cc = torch.cat([trainset[i][0].reshape(3, -1) for i in range(len(trainset))], dim=1)
    data_mean = torch.mean(cc, dim=1).tolist()
    data_std = torch.std(cc, dim=1).tolist()
    return data_mean, data_std
Ejemplo n.º 24
0
 def minibatch_std(self, x):
     batch_statistics = torch.std(x, dim=0).mean().repeat(
         x.shape[0], 1, x.shape[2], x.shape[3])
     return torch.cat([x, batch_statistics], dim=1)
Ejemplo n.º 25
0
    def __init__(self,
                 dataset,
                 obs_dim,
                 act_dim,
                 gamma,
                 horizon,
                 policy_net,
                 hidden_layers,
                 activation,
                 output_transform,
                 norm='std',
                 input_mode='sa',
                 seed=1,
                 action_encoding_scheme='continuous',
                 keep_terminal_states=True,
                 use_separate_target_net=False):
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.gamma = gamma
        self.horizon = horizon
        self.norm = norm
        self.policy_net = policy_net
        self.input_mode = input_mode
        self.use_separate_target_net = use_separate_target_net

        # self.n_samples = dataset['obs'].shape[0]
        self.n_episode = dataset['init_obs'].shape[0]
        # self.non_terminal_idx = (dataset['info']==False)[:,0]
        # self.n_samples_non_terminal = int(self.non_terminal_idx.sum())

        self.non_absorbing_state = (dataset['info'] == False)[:, 0]
        self.n_samples_non_absorbing = int(self.non_absorbing_state.sum())
        if keep_terminal_states:
            self.included_idx = torch.arange(dataset['obs'].shape[0])
            self.end_idx = np.arange(self.horizon - 1, dataset['obs'].shape[0],
                                     self.horizon)
            self.absorbing_idx = np.where(dataset['info'][:, 0] == True)[0]
        else:
            self.included_idx = torch.tensor(
                self.non_absorbing_state.nonzero()[0])
            end_idx = []
            absorbing_idx = []
            accumulated_eps_length = 0
            for eps_id in range(self.n_episode):
                real_episode_duration = int(
                    self.non_absorbing_state[eps_id *
                                             self.horizon:(eps_id + 1) *
                                             self.horizon].sum())
                accumulated_eps_length += real_episode_duration
                end_idx.append(accumulated_eps_length - 1)
                if real_episode_duration < self.horizon:
                    absorbing_idx.append(accumulated_eps_length - 1)
            assert accumulated_eps_length == self.n_samples_non_absorbing
            self.end_idx = np.array(end_idx)
            self.absorbing_idx = np.array(absorbing_idx)
        self.n_samples = self.included_idx.shape[0]
        self.non_absorbing_mask = torch.ones(self.n_samples, dtype=torch.bool)
        self.non_absorbing_mask[self.absorbing_idx] = False

        self.data_acts = torch.tensor(dataset['acts'],
                                      dtype=torch.long)[self.included_idx]
        self.rews = torch.tensor(dataset['rews'],
                                 dtype=dtype)[self.included_idx]

        # self.data_acts = torch.tensor(dataset['acts'][self.non_terminal_idx], dtype=torch.long)
        # self.rews = torch.tensor(dataset['rews'][self.non_terminal_idx], dtype=dtype)

        if self.policy_net is not None:
            raise NotImplementedError
        else:
            self.pi_current = torch.tensor(dataset['target_prob_obs'],
                                           dtype=dtype)[self.included_idx]
            self.pi_next = torch.tensor(dataset['target_prob_next_obs'],
                                        dtype=dtype)[self.included_idx]
            self.pi_init = torch.tensor(dataset['target_prob_init_obs'],
                                        dtype=dtype)
            self.pi_term = torch.tensor(dataset['target_prob_term_obs'],
                                        dtype=dtype)
        if self.norm == 'std':
            raise NotImplementedError
        else:
            obs = torch.tensor(dataset['obs'], dtype=dtype)[self.included_idx]
            next_obs = torch.tensor(dataset['next_obs'],
                                    dtype=dtype)[self.included_idx]
            init_obs = torch.tensor(dataset['init_obs'], dtype=dtype)
            term_obs = torch.tensor(dataset['term_obs'], dtype=dtype)

            # obs = torch.tensor(dataset['obs'][self.non_terminal_idx], dtype = dtype)
            # next_obs = torch.tensor(dataset['next_obs'][self.non_terminal_idx], dtype=dtype)
            # init_obs = torch.tensor(dataset['init_obs'], dtype=dtype)
            # term_obs = torch.tensor(dataset['term_obs'], dtype=dtype)
        #* re-whiten the (possibly) non-terminal data frames
        #* should have no effect if all indices are included and if the data is already whitened
        obs_mean = torch.mean(obs, dim=0, keepdims=True)
        obs_std = torch.std(obs, dim=0, keepdims=True)
        self.obs = (obs - obs_mean) / obs_std
        self.next_obs = (next_obs - obs_mean) / obs_std
        self.init_obs = (init_obs - obs_mean) / obs_std
        self.term_obs = (term_obs - obs_mean) / obs_std

        # #* whiten the non-terminal data frames
        # obs_mean = torch.mean(obs, dim=0, keepdims= True)
        # obs_std = torch.std(obs, dim=0, keepdims= True)
        # self.obs = (obs - obs_mean) / obs_std
        # self.next_obs = (next_obs - obs_mean) / obs_std
        # self.init_obs = (init_obs - obs_mean) / obs_std
        # self.term_obs = (term_obs - obs_mean) / obs_std

        if action_encoding_scheme == 'continuous':
            encoded_actions = np.linspace(-1, 1, self.act_dim)
            mean_action = np.mean(encoded_actions[self.data_acts])
            std_action = np.std(encoded_actions[self.data_acts])
            self.encoded_actions = (encoded_actions - mean_action) / std_action
            self.act_input = self.encoded_actions[self.data_acts]
        else:
            raise NotImplementedError

        #* set-up networks
        if self.input_mode == 'sa':
            assert action_encoding_scheme == 'continuous'
            self.w_net = Simple_MLP(input_dim = self.obs_dim+1, output_dim = 1, hidden_layers = hidden_layers,\
                activation= activation, output_transform = output_transform)
            self.q_net = Simple_MLP(input_dim = self.obs_dim, output_dim = self.act_dim, hidden_layers = hidden_layers,\
                activation= activation, output_transform = None)
            if use_separate_target_net:
                self.q_net_target = Simple_MLP(input_dim = self.obs_dim, output_dim = self.act_dim, hidden_layers = hidden_layers,\
                    activation= activation, output_transform = None)
                for param in self.q_net_target.model.parameters():
                    param.requires_grad = False
                self.q_net_target.model.load_state_dict(
                    self.q_net.model.state_dict())
        else:
            raise NotImplementedError
Ejemplo n.º 26
0
def main():

    # -- init --
    cfg = get_main_config()
    cfg.gpuid = 0
    cfg.batch_size = 1
    cfg.N = 2
    cfg.num_workers = 0
    cfg.dynamic.frames = cfg.N
    cfg.rot = edict()
    cfg.rot.skip = 0  # big gap between 2 and 3.

    # -- dynamics --
    cfg.dataset.name = "rots"
    cfg.dataset.load_residual = True
    cfg.dynamic.frame_size = 256
    cfg.frame_size = cfg.dynamic.frame_size
    cfg.dynamic.ppf = 0
    cfg.dynamic.total_pixels = cfg.N * cfg.dynamic.ppf
    torch.cuda.set_device(cfg.gpuid)

    # -- sim params --
    K = 10
    patchsize = 9
    db_level = "frame"
    search_method = "l2"
    # database_str = f"burstAll"
    database_idx = 1
    database_str = "burst{}".format(database_idx)

    # -- grab grids for experiments --
    noise_settings = create_noise_level_grid(cfg)
    # sim_settings = create_sim_grid(cfg)
    # motion_settings = create_motion_grid(cfg)

    for ns in noise_settings:

        # -=-=-=-=-=-=-=-=-=-=-
        #     loop params
        # -=-=-=-=-=-=-=-=-=-=-
        noise_level = 0.
        noise_type = ns.ntype
        noise_str = set_noise_setting(cfg, ns)

        # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
        #    create path for results
        # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
        path_args = (K, patchsize, cfg.batch_size, cfg.N, noise_str,
                     database_str, db_level, search_method)
        base = Path(f"output/benchmark_noise_types/{cfg.dataset.name}")
        root = Path(base /
                    "k{}_ps{}_b{}_n{}_{}_db-{}_sim-{}-{}".format(*path_args))
        print(f"Writing to {root}")
        if root.exists(): print("Running Experiment Again.")
        else: root.mkdir(parents=True)

        # -=-=-=-=-=-=-
        #   dataset
        # -=-=-=-=-=-=-
        data, loader = load_dataset(cfg, 'dynamic')
        if cfg.dataset.name == "voc":
            sample = next(iter(loader.tr))
        else:
            sample = data.tr[0]

        # -- load sample --
        burst, raw_img, res = sample['burst'], sample['clean'] - 0.5, sample[
            'res']
        kindex_ds = kIndexPermLMDB(cfg.batch_size, cfg.N)
        N, B, C, H, W = burst.shape
        if 'clean_burst' in sample: clean = sample['clean_burst'] - 0.5
        else: clean = burst - res
        if noise_type in ["qis", "pn"]: tvF.rgb_to_grayscale(clean, 3)
        # burst = tvF.rgb_to_grayscale(burst,3)
        # raw_img = tvF.rgb_to_grayscale(raw_img,3)
        # clean = tvF.rgb_to_grayscale(clean,3)

        # -- temp (delete me soon) --
        search_rot_grid = np.linspace(.3, .32, 100)
        losses = np.zeros_like(search_rot_grid)
        for idx, angle in enumerate(search_rot_grid):
            save_alpha_burst = 0.5 * burst[0] + 0.5 * tvF.rotate(
                burst[1], angle)
            losses[idx] = F.mse_loss(save_alpha_burst, burst[0]).item()
        min_arg = np.argmin(losses)
        angle = search_rot_grid[min_arg]

        ref_img = tvF.rotate(burst[1], angle)
        shift_grid = np.linspace(-20, 20, 40 - 1).astype(np.int)
        losses = np.zeros_like(shift_grid).astype(np.float)
        for idx, shift in enumerate(shift_grid):
            save_alpha_burst = 0.5 * burst[0] + 0.5 * torch.roll(
                ref_img, shift, -2)
            losses[idx] = F.mse_loss(save_alpha_burst, burst[0]).item()
        min_arg = np.argmin(losses)
        shift = shift_grid[min_arg]

        # -- run search --
        kindex = kindex_ds[0]
        database = None
        if database_str == f"burstAll":
            database = burst
            clean_db = clean
        else:
            database = burst[[database_idx]]
            clean_db = clean[[database_idx]]
        query = burst[[0]]
        sim_outputs = compute_similar_bursts_analysis(
            cfg,
            query,
            database,
            clean_db,
            K,
            patchsize=patchsize,
            shuffle_k=False,
            kindex=kindex,
            only_middle=cfg.sim_only_middle,
            db_level=db_level,
            search_method=search_method,
            noise_level=noise_level / 255.)
        sims, csims, wsims, b_dist, b_indx = sim_outputs

        # -- save images --
        fs = cfg.frame_size
        save_K = 1
        save_sims = rearrange(sims[:, :, :save_K],
                              'n b k1 c h w -> (n b k1) c h w')
        save_csims = rearrange(csims[:, :, :save_K],
                               'n b k1 c h w -> (n b k1) c h w')
        save_cdelta = clean[0] - save_csims[0]
        save_alpha_burst = 0.5 * burst[0] + 0.5 * torch.roll(
            tvF.rotate(burst[1], angle), shift, -2)

        save_burst = rearrange(burst, 'n b c h w -> (b n) c h w')
        save_clean = rearrange(clean, 'n b c h w -> (b n) c h w')
        save_b_dist = rearrange(b_dist[:, :, :save_K],
                                'n b k1 h w -> (n b k1) 1 h w')
        save_b_indx = rearrange(b_indx[:, :, :save_K],
                                'n b k1 h w -> (n b k1) 1 h w')
        save_b_indx = torch.abs(
            torch.arange(fs * fs).reshape(fs, fs) - save_b_indx).float()
        save_b_indx /= (torch.sum(save_b_indx) + 1e-16)
        tv_utils.save_image(save_sims,
                            root / 'sims.png',
                            nrow=B,
                            normalize=True,
                            range=(-0.5, 0.5))
        tv_utils.save_image(save_csims,
                            root / 'csims.png',
                            nrow=B,
                            normalize=True,
                            range=(-0.5, 0.5))
        tv_utils.save_image(save_cdelta,
                            root / 'cdelta.png',
                            nrow=B,
                            normalize=True,
                            range=(-0.5, 0.5))
        tv_utils.save_image(save_clean,
                            root / 'clean.png',
                            nrow=N,
                            normalize=True,
                            range=(-0.5, 0.5))
        tv_utils.save_image(save_burst,
                            root / 'burst.png',
                            nrow=N,
                            normalize=True,
                            range=(-0.5, 0.5))
        tv_utils.save_image(save_b_dist,
                            root / 'b_dist.png',
                            nrow=B,
                            normalize=True)
        tv_utils.save_image(raw_img, root / 'raw.png', nrow=B, normalize=True)
        tv_utils.save_image(save_b_indx,
                            root / 'b_indx.png',
                            nrow=B,
                            normalize=True)
        tv_utils.save_image(save_alpha_burst,
                            root / 'alpha_burst.png',
                            nrow=B,
                            normalize=True)

        # -- save top K patches at location --
        b = 0
        ref_img = clean[0, b]
        ps, fs = patchsize, cfg.frame_size
        xx, yy = np.mgrid[32:48, 48:64]
        xx, yy = xx.ravel(), yy.ravel()
        clean_pad = F.pad(clean[database_idx, [b]],
                          (ps // 2, ps // 2, ps // 2, ps // 2),
                          mode='reflect')[0]
        patches = []
        for x, y in zip(xx, yy):
            gt_patch = tvF.crop(ref_img, x - ps // 2, y - ps // 2, ps, ps)
            patches_xy = [gt_patch]
            for k in range(save_K):
                indx = b_indx[0, 0, k, x, y]
                xp, yp = (indx // fs) + ps // 2, (indx % fs) + ps // 2
                t, l = xp - ps // 2, yp - ps // 2
                clean_patch = tvF.crop(clean_pad, t, l, ps, ps)
                patches_xy.append(clean_patch)
                pix_diff = F.mse_loss(gt_patch[:, ps // 2, ps // 2],
                                      clean_patch[:, ps // 2, ps // 2]).item()
                pix_diff_img = pix_diff * torch.ones_like(clean_patch)
                patches_xy.append(pix_diff_img)
            patches_xy = torch.stack(patches_xy, dim=0)
            patches.append(patches_xy)
        patches = torch.stack(patches, dim=0)
        R = patches.shape[1]
        patches = rearrange(patches, 'l k c h w -> (l k) c h w')
        fn = f"patches_{b}.png"
        tv_utils.save_image(patches, root / fn, nrow=R, normalize=True)

        # -- stats about distance --
        mean_along_k = reduce(b_dist, 'n b k1 h w -> k1', 'mean')
        std_along_k = torch.std(b_dist, dim=(0, 1, 3, 4))
        fig, ax = plt.subplots(figsize=(8, 8))
        R = mean_along_k.shape[0]
        ax.errorbar(np.arange(R), mean_along_k, yerr=std_along_k)
        plt.savefig(root / "distance_stats.png", dpi=300)
        plt.clf()
        plt.close("all")

        # -- psnr between 1st neighbor and clean --
        psnrs = pd.DataFrame({
            "b": [],
            "k": [],
            "psnr": [],
            'crop200_psnr': []
        })
        for b in range(B):
            for k in range(K):

                # -- psnr --
                crop_raw = clean[0, b]
                crop_cmp = csims[0, b, k]
                rc_mse = F.mse_loss(crop_raw, crop_cmp,
                                    reduction='none').reshape(1, -1)
                rc_mse = torch.mean(rc_mse, 1).numpy() + 1e-16
                psnr_bk = np.mean(mse_to_psnr(rc_mse))
                print(psnr_bk)

                # -- crop psnr --
                crop_raw = tvF.center_crop(clean[0, b], 200)
                crop_cmp = tvF.center_crop(csims[0, b, k], 200)
                rc_mse = F.mse_loss(crop_raw, crop_cmp,
                                    reduction='none').reshape(1, -1)
                rc_mse = torch.mean(rc_mse, 1).numpy() + 1e-16
                crop_psnr = np.mean(mse_to_psnr(rc_mse))
                # if np.isinf(psnr_bk): psnr_bk = 50.
                psnrs = psnrs.append(
                    {
                        'b': b,
                        'k': k,
                        'psnr': psnr_bk,
                        'crop200_psnr': crop_psnr
                    },
                    ignore_index=True)
        # psnr_ave = np.mean(psnrs)
        # psnr_std = np.std(psnrs)
        # print( "PSNR: %2.2f +/- %2.2f" % (psnr_ave,psnr_std) )
        psnrs = psnrs.astype({
            'b': int,
            'k': int,
            'psnr': float,
            'crop200_psnr': float
        })
        psnrs.to_csv(root / "psnrs.csv", sep=",", index=False)
Ejemplo n.º 27
0
    def eval_fn(params, horizon, tflogger, step):
        if FLAGS.init_std > 0.0:
            means = torch.cat(params[:len(params) // 2])
            stds = torch.cat(params[len(params) // 2:])
            stds = F.softplus(stds)
        else:
            means = torch.cat(params)
            stds = torch.zeros_like(means)
        means = F.softplus(means)
        ts = np.linspace(0, FLAGS.tmax, horizon)
        sample_params = means.view([1, -1]) + _cuda(
            torch.FloatTensor(
                np.random.normal(size=[FLAGS.eval_batch_size,
                                       len(means)]))) * stds.view([1, -1])
        sample_params = torch.abs(sample_params)

        log_params('means', means, tflogger, step)
        log_params('stds', stds, tflogger, step)
        log_params('sample_mean', torch.mean(sample_params, dim=0), tflogger,
                   step)
        log_params('sample_std', torch.std(sample_params, dim=0), tflogger,
                   step)
        log_params('true', true_theta, tflogger, step)
        lv = LoktaVolterra(sample_params[:, 2], sample_params[:, 3],
                           sample_params[:, 4], sample_params[:, 5])
        x0 = sample_params[:, :2]
        xs = torch.stack(RK4.integrate(lv.dx, x0, ts))
        nll_val = nll(xs, x_true_nll)
        p_z_term = param_log_prob(sample_params)
        h_q_z_term = posterior_entropy(means, stds)
        kl_term = h_q_z_term - p_z_term
        xs = xs.data.cpu().numpy()
        xbatch = np.swapaxes(xs, 0, 1)
        tflogger.log_scalar('min_x', np.min(xbatch), step)
        tflogger.log_scalar('nll', nll_val.data.cpu().numpy(), step)
        tflogger.log_scalar('p(z), z~q', p_z_term.data.cpu().numpy(), step)
        tflogger.log_scalar('H(q(z))', h_q_z_term.data.cpu().numpy(), step)
        tflogger.log_scalar('kld', kl_term.data.cpu().numpy(), step)
        tflogger.log_images(
            'Rabbits and Foxes',
            [draw_plots(xbatch, title='Estimated Rabbits and Foxes')], step)
        #posterior = D.Normal(loc=means, scale=stds)
        #log_prob_true_params = torch.sum(posterior.log_prob(true_theta)).data.cpu().numpy()
        posterior = D.Normal(loc=means, scale=stds)
        mean_param_distance = torch.mean(
            (true_theta - means)**2).data.cpu().numpy()
        if FLAGS.noise_std <= 0.0 or FLAGS.init_std <= 0.0:
            log_prob_true_params = -mean_param_distance
        else:
            log_prob_true_params = torch.sum(
                posterior.log_prob(true_theta)).data.cpu().numpy()
        return {
            'nll':
            nll_val.data.cpu().numpy(),
            'test_elbo':
            -nll_val.data.cpu().numpy() -
            prior_weight * kl_term.data.cpu().numpy(),
            'log_prob_true_params':
            log_prob_true_params,
            'mean_param_distance':
            mean_param_distance
        }
Ejemplo n.º 28
0
 def test_std(self):
     x = torch.randn(2, 3, 4).float()
     self.assertONNX(
         lambda x: torch.std(x, dim=(0, 1), unbiased=True, keepdim=True), x)
Ejemplo n.º 29
0
        for j, (data) in enumerate(train_loader):
            # inputs: Nx1xKxT
            inputs, targets, input_percentages, target_sizes = data
            input_sizes = input_percentages.mul_(int(inputs.size(3))).int()
            inputs = inputs.to(device)

            out, output_sizes = M_model(inputs, input_sizes)
            out = out.transpose(0, 1)  # TxNxH
            float_out = out.float()  # ensure float32 for loss

            loss, loss_1, loss_2 = self_loss(float_out, float_out_star,
                                             torch.abs(M_model.m), lamda)
            loss = loss / inputs.size(0)  # average the loss by minibatch
            loss_value = loss.item()
            m_mean = torch.mean(M_model.m).item()
            m_std = torch.std(M_model.m).item()

            optimizer.zero_grad()
            # compute gradient
            #loss.backward(retain_graph=True)  # save middle variables
            loss_1.backward(retain_graph=True)
            print('loss_1 grad m is:', M_model.m.grad)
            M_model.m.grad.data.zero_()  # avoid grad accumalate

            loss_2.backward(retain_graph=True)
            print('loss_2 grad m is:', M_model.m.grad)
            M_model.m.grad.data.zero_()

            loss.backward()
            print('loss grad m is:', M_model.m.grad)
Ejemplo n.º 30
0
        annee.append(float(i))
        temp.append(temperature[1][i][j])  #enlever les clés

humidite_continu = []
annee = []
humi = []
for i in humidite[0].keys():
    for j in range(len(humidite[0][i])):
        annee.append(float(i))
        humi.append(humidite[1][i][j])

annee = torch.tensor(annee)
temp = torch.tensor(temp)
humi = torch.tensor(humi)
ma = float(torch.mean(annee))
sta = float(torch.std(annee))  #ecart type
mt = float(torch.mean(temp))
stt = float(torch.std(temp))
mh = float(torch.mean(humi))
sth = float(torch.std(humi))
for i in temperature[0].keys():
    for j in range(len(temperature[0][i])):
        cdt, sdt = convertisseur_date(temperature[0][i][j])
        ch, sh = convertisseur_heure(temperature[0][i][j])
        temperature_continu.append([(float(i) - ma) / sta, cdt, sdt, ch, sh,
                                    (temperature[1][i][j] - mt) / stt,
                                    (humidite[1][i][j] - mh) / sth])

temperature_continu = torch.tensor(temperature_continu).double()

longueur_apprentissage = 8 * 7 * 3
Ejemplo n.º 31
0
def relax(step, surrogate, x, logits, mixtureweights, k=1, get_grad=False):

    def sample_relax(logits, surrogate):
        cat = Categorical(logits=logits)
        u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda()
        gumbels = -torch.log(-torch.log(u))
        z = logits + gumbels
        b = torch.argmax(z, dim=1) #.view(B,1)
        logprob = cat.log_prob(b).view(B,1)


        # czs = []
        # for j in range(1):
        #     z = sample_relax_z(logits)
        #     surr_input = torch.cat([z, x, logits.detach()], dim=1)
        #     cz = surrogate.net(surr_input)
        #     czs.append(cz)
        # czs = torch.stack(czs)
        # cz = torch.mean(czs, dim=0)#.view(1,1)
        surr_input = torch.cat([z, x, logits.detach()], dim=1)
        cz = surrogate.net(surr_input)


        cz_tildes = []
        for j in range(1):
            z_tilde = sample_relax_given_b(logits, b)
            surr_input = torch.cat([z_tilde, x, logits.detach()], dim=1)
            cz_tilde = surrogate.net(surr_input)
            cz_tildes.append(cz_tilde)
        cz_tildes = torch.stack(cz_tildes)
        cz_tilde = torch.mean(cz_tildes, dim=0) #.view(B,1)

        return b, logprob, cz, cz_tilde


    def sample_relax_z(logits):

        u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda()
        gumbels = -torch.log(-torch.log(u))
        z = logits + gumbels
        return z


    def sample_relax_given_b(logits, b):

        u_b = torch.rand(B,1).clamp(1e-10, 1.-1e-10).cuda()
        z_tilde_b = -torch.log(-torch.log(u_b))

        u = torch.rand(B,C).clamp(1e-10, 1.-1e-10).cuda()
        z_tilde = -torch.log((- torch.log(u) / torch.softmax(logits,dim=1)) - torch.log(u_b))
        z_tilde.scatter_(dim=1, index=b.view(B,1), src=z_tilde_b)

        return z_tilde




    outputs = {}
    B = logits.shape[0]
    C = logits.shape[1]

    grads =[]
    for jj in range(k):

        b, logq, cz, cz_tilde = sample_relax(logits, surrogate)

        logpx_given_z = logprob_undercomponent(x, component=b)
        logpz = torch.log(mixtureweights[b]).view(B,1)
        logpxz = logpx_given_z + logpz #[B,1]
        # print(logpxz.shape, logpz.shape)
        # fsdf
        f = logpxz - logq - 1.

        surr_pred_z = cz
        surr_pred_z_tilde = cz_tilde

        #Encoder loss
        # warmup = np.minimum( (step+1) / 50000., 1.)
        # warmup = .0001
        warmup = 1.

        net_loss = - torch.mean(   warmup*((f.detach() - surr_pred_z_tilde.detach()) * logq)  +  surr_pred_z - surr_pred_z_tilde )

        if (net_loss != net_loss).any():
            print (net_loss)
            print (f)
            print (logpxz)
            print (logq)
            print (surr_pred_z_tilde)
            print (surr_pred_z)
            # print (z)
            # print (probs)
            # print (gumbels)
            fasdfas

        if get_grad:
            grad = torch.autograd.grad([net_loss], [logits], create_graph=True, retain_graph=True)[0]
            grads.append(grad)
            surr_dif = torch.mean(torch.abs(f.detach() - surr_pred_z_tilde))

        else:
            # #Surrogate loss
            # grad_logq =  torch.mean( torch.autograd.grad([torch.mean(logq)], [logits], create_graph=True, retain_graph=True)[0], dim=1, keepdim=True)
            # grad_surr_z = torch.mean( torch.autograd.grad([torch.mean(surr_pred_z)], [logits], create_graph=True, retain_graph=True)[0], dim=1, keepdim=True)
            # grad_surr_z_tilde = torch.mean( torch.autograd.grad([torch.mean(surr_pred_z_tilde)], [logits], create_graph=True, retain_graph=True)[0], dim=1, keepdim=True)

            # print (f.shape, surr_pred_z_tilde.shape, grad_logq.shape, grad_surr_z.shape, grad_surr_z_tilde.shape)
            # fasdfdas
            # print (grad_surr_z_tilde)
            # fsfa

            grad_logq =  torch.autograd.grad([torch.mean(logq)], [logits], create_graph=True, retain_graph=True)[0]
            grad_surr_z =  torch.autograd.grad([torch.mean(surr_pred_z)], [logits], create_graph=True, retain_graph=True)[0]
            grad_surr_z_tilde = torch.autograd.grad([torch.mean(surr_pred_z_tilde)], [logits], create_graph=True, retain_graph=True)[0]
            grad_path = torch.autograd.grad([torch.mean(surr_pred_z - surr_pred_z_tilde)], [logits], create_graph=True, retain_graph=True)[0]
            surr_loss = torch.mean(((f.detach() - surr_pred_z_tilde) * grad_logq + grad_surr_z - grad_surr_z_tilde)**2)

            # surr_loss = torch.mean(torch.abs(f.detach() - surr_pred_z_tilde))

            if (surr_loss != surr_loss).any():
                print ('net_loss', net_loss)
                print ('surr_loss', surr_loss)
                print ('f', f)
                print ('logpxz', logpxz)
                print ('logq', logq)
                print ('surr_pred_z_tilde', surr_pred_z_tilde)
                print ('surr_pred_z', surr_pred_z)
                # print (z)
                # print (probs)
                # print (gumbels)
                print ('grad_logq', grad_logq)
                print ('grad_surr_z', grad_surr_z)
                print ('grad_surr_z_tilde', grad_surr_z_tilde)
                fasdfas

            surr_dif = torch.mean(torch.abs(f.detach() - surr_pred_z_tilde))
            # surr_loss = surr_dif

    
    outputs['net_loss'] = net_loss
    outputs['f'] = f
    outputs['logpx_given_z'] = logpx_given_z
    outputs['logpz'] = logpz
    outputs['logq'] = logq


    if get_grad:
        grads = torch.stack(grads)
        # print (grads.shape)
        outputs['grad_avg'] = torch.mean(torch.mean(grads, dim=0),dim=0)
        outputs['grad_std'] = torch.std(grads, dim=0)[0]
        outputs['surr_dif'] = surr_dif   
    else:
        outputs['surr_loss'] = surr_loss
        outputs['surr_dif'] = surr_dif   
        outputs['grad_logq'] = torch.abs(grad_logq)  
        outputs['grad_surr_z'] = torch.abs(grad_surr_z  ) 
        outputs['grad_surr_z_tilde'] = torch.abs(grad_surr_z_tilde )  
        outputs['grad_path'] = torch.abs(grad_path )  
        outputs['grad_score'] = torch.abs(grad_logq*(f.detach() - surr_pred_z_tilde.detach()))  

    # return net_loss, f, logpx_given_z, logpz, logq, surr_loss, surr_dif
    return outputs
Ejemplo n.º 32
0
def train(ds,
          gen,
          disc,
          init,
          nc_z,
          device,
          n_frames,
          epochs=1,
          batch_size=4,
          g_lr=0.0001,
          d_lr=0.0001,
          n_disc_trains=3,
          gen_path=None,
          disc_path=None,
          tqdm_disable=False,
          n_slices=1):
    # CHANGED LEARNING RATE
    gen.to(device)
    disc.to(device)
    init.to(device)
    gen.train()
    disc.train()
    init.train()
    # CHANGING GENERATOR AND DISCRIMINATOR STRUCTURE
    downsample = nn.AvgPool2d(16, stride=16, padding=0)
    opt_gen = torch.optim.Adam(gen.parameters(), lr=g_lr, betas=(0.99, 0.999))
    opt_disc = torch.optim.Adam(disc.parameters(),
                                lr=d_lr,
                                betas=(0.99, 0.999))
    gen_losses = []
    disc_losses = []
    for e_idx in range(epochs):
        dl = DataLoader(ds,
                        batch_size=batch_size,
                        shuffle=False,
                        num_workers=5)
        bar = tqdm(disable=tqdm_disable, total=len(dl) * n_slices)
        for idx, batch in enumerate(dl):
            batch = [t.to(device) for t in batch]
            label, all_frames = batch  # ignore label for now
            all_frames = all_frames[:, :n_frames].float()
            all_frames = downsample(all_frames.view(-1, 3, 256, 256)).view(
                batch_size, n_frames, 3, 16,
                16)  # downsample to 128x128 per frame
            rand_input = torch.randn(batch_size, nc_z, 16, 16).to(device)

            pyramid = init(
                rand_input
            )  # initial the first frame feature pyramid to begin generating

            epsilon = 10e-5

            d_slice = n_frames // n_slices
            for i in range(n_slices):
                bar.update(1)
                real_frames = all_frames[:, i * d_slice:i * d_slice + d_slice]

                log_idx = e_idx * len(bar) + idx
                # load batch of examples
                # we use vanilla GAN objective

                gen_frames_tanh, pyramid = gen(pyramid, n_frames=d_slice)
                fake_scores = disc(gen_frames_tanh)
                log_value('fake_score',
                          torch.mean(fake_scores).item(), log_idx)
                log_value('gen_frame_mean',
                          torch.mean(gen_frames_tanh).item(), log_idx)
                log_value('gen_frame_std',
                          torch.std(gen_frames_tanh).item(), log_idx)
                # train generator less than discriminator
                if idx % (n_disc_trains + 1) == 0:
                    #gen_loss = torch.mean((1-fake_scores)** 2)
                    # real_frames_tanh = (real_frames - 1 / 2) * 2
                    # gen_loss = torch.mean((gen_frames_tanh - real_frames_tanh) ** 2)
                    gen_loss = (1 + (-fake_scores).exp() + epsilon).log()
                    item_loss = gen_loss.item()
                    log_value('gen_loss', item_loss, log_idx)
                    gen_losses.append(item_loss)
                    opt_gen.zero_grad()
                    gen_loss.backward()
                    opt_gen.step()
                else:
                    # frames are in range (0, 1), convert to (-1, +1)
                    real_frames_tanh = (real_frames - 1 /
                                        2) * 2  # convert from sigmoid to tanh
                    log_value('real_frame_mean',
                              torch.mean(real_frames_tanh).item(), log_idx)
                    log_value('real_frame_std',
                              torch.std(real_frames_tanh).item(), log_idx)
                    real_scores = disc(real_frames_tanh)
                    log_value('real_score',
                              torch.mean(real_scores).item(), log_idx)
                    # objective for least-squares GAN
                    #disc_loss = .5 * torch.mean(fake_scores ** 2) + .5 * torch.mean((1-real_scores) ** 2)
                    disc_loss = (1 + (-real_scores).exp() + epsilon).log() + (
                        1 + fake_scores.exp() + epsilon).log()
                    item_loss = disc_loss.item()
                    log_value('disc_loss', item_loss, log_idx)
                    disc_losses.append(item_loss)
                    opt_disc.zero_grad()
                    disc_loss.backward()
                    opt_disc.step()

                pyramid = [layer.detach() for layer in pyramid
                           ]  # we don't want gradients flowing backward

                if log_idx % 10 == 9:
                    if len(gen_losses) > 0 and len(disc_losses) > 0:
                        bar.set_description(
                            'gen, disc losses: %s,%s' %
                            (np.mean(gen_losses), np.mean(disc_losses)))
                        gen_losses = []
                        disc_losses = []

                if log_idx % 100 == 99:
                    # periodically save models
                    if gen_path is not None:
                        torch.save(gen.state_dict(), gen_path)
                    if disc_path is not None:
                        torch.save(disc.state_dict(), disc_path)
    # gradcz_tilde = torch.autograd.grad(outputs=cz_tilde, inputs=(logits), retain_graph=True)[0]

    grad = (reward-cz).detach() *gradlogprob + gradcz

    if (grad != grad).any():
        print ('nan')
        fsfsa

    grads_simplax.append(grad)


print ()
grads_simplax = torch.stack(grads_simplax).view(N,C)

grad_mean_simplax = torch.mean(grads_simplax,dim=0)
grad_std_simplax = torch.std(grads_simplax,dim=0)


print ('SIMPLAX')
print ('mean:', grad_mean_simplax)
print ('std:', grad_std_simplax)
print ()



print ('True')
print ('[-.5478, .1122, .4422]')
print ('dif:', np.abs(grad_mean_simplax.numpy() - true ))
print ()

Ejemplo n.º 34
0
    def eval(self,
             epoch,
             save_score=False,
             loader_name=['test'],
             wrong_file=None,
             result_file=None):
        if wrong_file is not None:
            f_w = open(wrong_file, 'w')
        if result_file is not None:
            f_r = open(result_file, 'w')
        self.model.eval()
        self.print_log('Eval epoch: {}'.format(epoch + 1))
        for ln in loader_name:
            loss_value = []
            acc_value = []
            score_frag = []
            right_num_total = 0
            total_num = 0
            loss_total = 0
            step = 0
            process = tqdm(self.data_loader[ln])
            for batch_idx, (data, label, index) in enumerate(process):
                data = Variable(data.float().cuda(self.output_device),
                                requires_grad=False,
                                volatile=True)
                label = Variable(label.long().cuda(self.output_device),
                                 requires_grad=False,
                                 volatile=True)

                with torch.no_grad():
                    output = self.model(data)

                loss = self.loss(output, label)
                score_frag.append(output.data.cpu().numpy())
                loss_value.append(loss.data.cpu().numpy())

                _, predict_label = torch.max(output.data, 1)
                acc = torch.mean((predict_label == label.data).float())
                acc_value.append(acc)

                step += 1

                if wrong_file is not None or result_file is not None:
                    predict = list(predict_label.cpu().numpy())
                    true = list(label.data.cpu().numpy())
                    for i, x in enumerate(predict):
                        if result_file is not None:
                            f_r.write(str(x) + ',' + str(true[i]) + '\n')
                        if x != true[i] and wrong_file is not None:
                            f_w.write(
                                str(index[i]) + ',' + str(x) + ',' +
                                str(true[i]) + '\n')
            score = np.concatenate(score_frag)

            accuracy = self.data_loader[ln].dataset.top_k(score, 1)
            if accuracy > self.best_acc:
                self.best_acc = accuracy
                score_dict = dict(
                    zip(self.data_loader[ln].dataset.sample_name, score))

                with open(
                        './work_dir/' + arg.Experiment_name +
                        '/eval_results/best_acc' +
                        '.pkl'.format(epoch, accuracy), 'wb') as f:
                    pickle.dump(score_dict, f)

            print('Eval Accuracy: ', accuracy, ' model: ',
                  self.arg.model_saved_name)

            score_dict = dict(
                zip(self.data_loader[ln].dataset.sample_name, score))
            self.print_log('\tMean {} loss of {} batches: {}.'.format(
                ln, len(self.data_loader[ln]), np.mean(loss_value)))
            for k in self.arg.show_topk:
                self.print_log('\tTop{}: {:.2f}%'.format(
                    k, 100 * self.data_loader[ln].dataset.top_k(score, k)))

            with open(
                    './work_dir/' + arg.Experiment_name +
                    '/eval_results/epoch_' + str(epoch) + '_' + str(accuracy) +
                    '.pkl'.format(epoch, accuracy), 'wb') as f:
                pickle.dump(score_dict, f)

            std_acc = torch.std(torch.stack(acc_value))
            med_abs_dev = self.calc_mad(acc_value)
            avg_loss = np.mean(loss_value)
            avg_acc = torch.mean(torch.stack(acc_value))
            med_loss = np.median(loss_value)
            med_acc = torch.median(torch.stack(acc_value))

        return avg_loss, med_loss, avg_acc, med_acc, std_acc, med_abs_dev
Ejemplo n.º 35
0
 def input_norm(self, x):
     flat = x.view(x.size(0), -1)
     mp = torch.mean(flat, dim=1).detach()
     sp = torch.std(flat, dim=1).detach() + 1e-7
     return (x - mp.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1).expand_as(x)
             ) / sp.unsqueeze(-1).unsqueeze(-1).unsqueeze(1).expand_as(x)
Ejemplo n.º 36
0
        if not os.path.exists('./layer_record'):
            os.makedirs('./layer_record')
        if os.path.exists('./layer_record/trace_command.sh'):
            os.remove('./layer_record/trace_command.sh')

        delta_std = np.array([])
        delta_mean = np.array([])
        w_std = np.array([])
        w_mean = np.array([])
        oldWeight = {}
        k = 0

        for name, param in list(model.named_parameters()):
            oldWeight[k] = param.data + param.grad.data
            k = k + 1
            delta_std = np.append(delta_std, (torch.std(
                param.grad.data)).cpu().data.numpy())
            delta_mean = np.append(delta_mean, (torch.mean(
                param.grad.data)).cpu().data.numpy())
            w_std = np.append(w_std,
                              (torch.std(param.data)).cpu().data.numpy())
            w_mean = np.append(w_mean,
                               (torch.mean(param.data)).cpu().data.numpy())

        delta_mean = np.append(delta_mean, delta_std, axis=0)
        np.savetxt(delta_distribution, [delta_mean], delimiter=",", fmt='%f')
        w_mean = np.append(w_mean, w_std, axis=0)
        np.savetxt(weight_distribution, [w_mean], delimiter=",", fmt='%f')

        print("weight distribution")
        print(w_mean)
        print("delta distribution")
Ejemplo n.º 37
0
def reduce_std(x, axis=None, keepdim=False):
    if not axis:
        axis = range(len(x.shape))
    for i in sorted(axis, reverse=True):
        x = torch.std(x, dim=i, keepdim=keepdim)
    return x
Ejemplo n.º 38
0
 def _check_var(module):
     """Check that we initialized various parameters from N(0, config.init_std)."""
     self.assertAlmostEqual(
         torch.std(module.weight).item(), config.init_std, 2)
Ejemplo n.º 39
0
    def prepare_data(self, data, maxlags, normalize, split_timeseries=False, lstm=False):
        """
        Prepares multivariate time series data such that it can be used by a NAVAR model

        Args:
            data: ndarray
                T (time points) x N (variables) input data
            maxlags: int
                Maximum number of time lags
            normalize: bool
                Indicates whether we should should normalize every variable
            split_timeseries: int
                If the original time series consists of multiple shorter time series, this argument should indicate the
                original time series length. Otherwise should be zero.
            lstm: bool
                Indicates whether we should prepare the data for a LSTM model (or MLP).
        Returns:
            X: Tensor (T - maxlags - 1) x maxlags x N
                Input for the NAVAR model
            Y: Tensor (T - maxlags - 1) x N
                Target variables for the NAVAR model
        """
        # T is the total number of time steps, N is the number of variables
        T, N = data.shape
        data = torch.from_numpy(data)

        # normalize every variable to have 0 mean and standard deviation 1
        if normalize:
            data = data / torch.std(data, dim=0)
            data = data - data.mean(dim=0)
        
        if not lstm:
            # initialize our input and target variables
            X = torch.zeros((T - maxlags, maxlags, N))
            Y = torch.zeros((T - maxlags, N))

            # X consists of the past K values of Y
            for i in range(T - maxlags - 1):
                X[i, :, :] = data[i:i + maxlags, :]
                Y[i, :] = data[i + maxlags, :]

            # if the data originated from multiple smaller time series, we make sure not to predict over the boundaries.
            if split_timeseries:
                rows_to_be_kept = []
                for x in range(0, X.shape[0]):
                    to_be_deleted = sum([(x + maxlags - y) % split_timeseries == 0 for y in range(maxlags)]) > 0
                    if not to_be_deleted:
                        rows_to_be_kept.append(x)
                rows_to_be_kept = np.asarray(rows_to_be_kept)
                X = X[rows_to_be_kept]
                Y = Y[rows_to_be_kept]

            X = X.permute(0, 2, 1)
            
        else:
            if split_timeseries:
                # initialize our input and target variables
                X = torch.zeros((int(T/split_timeseries), split_timeseries, N))

                # X and Y consist of timeseries of length K
                for i in range(int(T/split_timeseries) -1):
                    X[i, :, :] = data[i*split_timeseries:(i+1)*split_timeseries, :]
                X = X.permute(0, 2, 1)
                X.view(-1, N, split_timeseries)
                Y = X[:, :, 1:]
                X = X[:, :, :-1]
            else:
                # initialize our input and target variables
                X = torch.zeros((T, maxlags + 1, N))

                # X and Y consist of timeseries of length K
                for i in range(int(T)):
                    for counter, j in enumerate(range(maxlags + 1, 0, -1)):
                        if i - j >= 0:
                            X[i, counter, :] = data[i - j, :]
                X = X.permute(0, 2, 1)
                X.view(-1, N, maxlags+1)
                Y = X[:, :, 1:]
                X = X[:, :, :-1]
            
        return X, Y
grads = []
for i in range (N):

    dist = Categorical(logits=logits)
    samp = dist.sample()
    logprob = dist.log_prob(samp)
    reward = f(samp) 
    gradlogprob = torch.autograd.grad(outputs=logprob, inputs=(logits), retain_graph=True)[0]
    grads.append(reward*gradlogprob)
    
print ()
grads = torch.stack(grads).view(N,C)
# print (grads.shape)
grad_mean_reinforce = torch.mean(grads,dim=0)
grad_std_reinforce = torch.std(grads,dim=0)

print ('REINFORCE')
print ('mean:', grad_mean_reinforce)
print ('std:', grad_std_reinforce)
print ()
# print ('True')
# print ('[-.5478, .1122, .4422]')
# print ('dif:', np.abs(grad_mean_reinforce.numpy() -  true))
# print ()





Ejemplo n.º 41
0
                continue

            traj_states = [t[0].state for t in trajectory]
            traj_actions = [t[0].action for t in trajectory]
            traj_states_v = torch.FloatTensor(traj_states).to(device)
            traj_actions_v = torch.FloatTensor(traj_actions).to(device)
            traj_adv_v, traj_ref_v = calc_adv_ref(trajectory,
                                                  net_crt,
                                                  traj_states_v,
                                                  device=device)
            mu_v = net_act(traj_states_v)
            old_logprob_v = calc_logprob(mu_v, net_act.logstd, traj_actions_v)

            # normalize advantages
            traj_adv_v = (traj_adv_v -
                          torch.mean(traj_adv_v)) / torch.std(traj_adv_v)

            # drop last entry from the trajectory, an our adv and ref value calculated without it
            trajectory = trajectory[:-1]
            old_logprob_v = old_logprob_v[:-1].detach()

            sum_loss_value = 0.0
            sum_loss_policy = 0.0
            count_steps = 0

            for epoch in range(PPO_EPOCHES):
                for batch_ofs in range(0, len(trajectory), PPO_BATCH_SIZE):
                    states_v = traj_states_v[batch_ofs:batch_ofs +
                                             PPO_BATCH_SIZE]
                    actions_v = traj_actions_v[batch_ofs:batch_ofs +
                                               PPO_BATCH_SIZE]
Ejemplo n.º 42
0
def standard_deviation_measurement(measurements):
    std_measurements = {}
    for key in measurements.keys():
        std_measurements[key] = torch.std(measurements[key]).item()
    return std_measurements
Ejemplo n.º 43
0
train_y = torch.from_numpy(y_train[index]).cuda()
# from_numpy 是从np.array转换为tensor, Tensor()是将list转为tensor
test_x = torch.from_numpy(X_test).cuda()
test_y = torch.from_numpy(y_test).cuda()

w1 = torch.randn(
    X_train.shape[1], hiden_size1, device=device,
    requires_grad=True).double() * 0.01
w2 = torch.randn(hiden_size1, hiden_size2, device=device,
                 requires_grad=True).double() * 0.01

w1 = w1.cuda()
w2 = w2.cuda()

lr = 0.1
train_x = (train_x - torch.mean(train_x)) / torch.std(train_x)
test_x = (test_x - torch.mean(test_x)) / torch.std(test_x)
for i in range(100):
    res1 = train_x.mm(w1)
    out1 = res1.clamp(min=0)
    res2 = out1.mm(w2)
    out2 = torch.exp(res2)
    loss = -torch.log(
        out2[range(nums), train_y] / torch.sum(out2, dim=1)).sum() / nums
    print(i, loss)
    w1.retain_grad()
    w2.retain_grad()
    loss.backward(retain_graph=True)

    with torch.no_grad():
        w2.data = w2.data - lr * w2.grad.data
Ejemplo n.º 44
0
def run_exp(
    debug,
    subject_id,
    constant_memory,
    data_zero_init,
    set_distribution_to_empirical,
    ot_on_class_dims,
    max_epochs,
    independent_class_dists,
    n_sensors,
    clf_loss,
    final_hz,
    start_ms,
    stop_ms,
    half_before,
    final_fft,
    model_name,
):
    assert final_hz in [64, 256]

    car = not debug
    train_inputs, test_inputs = load_train_test(
        subject_id,
        car,
        n_sensors,
        final_hz,
        start_ms,
        stop_ms,
        half_before,
        only_load_given_sensors=debug,
    )

    cuda = True
    if cuda:
        train_inputs = [i.cuda() for i in train_inputs]
        test_inputs = [i.cuda() for i in test_inputs]

    from reversible2.graph import Node
    from reversible2.branching import CatChans, ChunkChans, Select
    from reversible2.constantmemory import graph_to_constant_memory

    from copy import deepcopy
    from reversible2.graph import Node
    from reversible2.distribution import TwoClassDist
    from reversible2.wrap_invertible import WrapInvertible
    from reversible2.blocks import dense_add_no_switch, conv_add_3x3_no_switch
    from reversible2.rfft import RFFT, Interleave
    from reversible2.util import set_random_seeds
    import torch as th
    from reversible2.splitter import SubsampleSplitter

    set_random_seeds(2019011641, cuda)
    n_chans = train_inputs[0].shape[1]
    n_time = train_inputs[0].shape[2]
    if final_hz == 64:
        feature_model = smaller_model(n_chans, n_time, final_fft,
                                      constant_memory)
    else:
        assert final_hz == 256
        if model_name == 'old_invertible':
            feature_model = larger_model(n_chans, n_time, final_fft,
                                         constant_memory)
        elif model_name == 'deep_invertible':
            n_chan_pad = 0
            filter_length_time = 11
            feature_model = deep_invertible(n_chans, n_time, n_chan_pad,
                                            filter_length_time)
            from reversible2.view_as import ViewAs
            feature_model.add_module('flatten',
                                     ViewAs((-1, 176, 32), (-1, 5632)))
            from reversible2.graph import Node
            feature_model = Node(None, feature_model)
        else:
            assert False

    if cuda:
        feature_model.cuda()
    feature_model.eval()

    from reversible2.constantmemory import clear_ctx_dicts
    from reversible2.distribution import TwoClassDist

    if data_zero_init:
        feature_model.data_init(
            th.cat((train_inputs[0], train_inputs[1]), dim=0))

    # Check that forward + inverse is really identical
    t_out = feature_model(train_inputs[0][:2])
    inverted = feature_model.invert(t_out)
    clear_ctx_dicts(feature_model)
    assert th.allclose(train_inputs[0][:2], inverted, rtol=1e-3, atol=1e-4)
    from reversible2.ot_exact import ot_euclidean_loss_for_samples

    if independent_class_dists:
        class_dist = TwoClassIndependentDist(
            np.prod(train_inputs[0].size()[1:]))
    else:
        class_dist = TwoClassDist(2,
                                  np.prod(train_inputs[0].size()[1:]) - 2,
                                  [0, 1])
    class_dist.cuda()

    if set_distribution_to_empirical:
        for i_class in range(2):
            with th.no_grad():
                this_outs = feature_model(train_inputs[i_class])
                mean = th.mean(this_outs, dim=0)
                std = th.std(this_outs, dim=0)
                class_dist.set_mean_std(i_class, mean, std)
                # Just check
                setted_mean, setted_std = class_dist.get_mean_std(i_class)
                assert th.allclose(mean, setted_mean)
                assert th.allclose(std, setted_std)
        clear_ctx_dicts(feature_model)

    optim_model = th.optim.Adam(feature_model.parameters(),
                                lr=1e-3,
                                betas=(0.9, 0.999))
    optim_dist = th.optim.Adam(class_dist.parameters(),
                               lr=1e-2,
                               betas=(0.9, 0.999))

    if clf_loss is not None:
        clf = SubspaceClassifier(2, 10, np.prod(train_inputs[0].shape[1:]))
        clf.cuda()

        optim_clf = th.optim.Adam(clf.parameters(), lr=1e-3)
        clf_trainer = CLFTrainer(
            feature_model,
            clf,
            class_dist,
            optim_model,
            optim_clf,
            optim_dist,
            outs_loss=clf_loss,
        )

    import pandas as pd

    df = pd.DataFrame()

    from reversible2.training import OTTrainer

    trainer = OTTrainer(feature_model, class_dist, optim_model, optim_dist)

    from reversible2.constantmemory import clear_ctx_dicts
    from reversible2.timer import Timer

    i_start_epoch_out = int(np.round(max_epochs * 0.4)) + 1
    n_epochs = max_epochs + 1  # +1 for historical reasons.
    if debug:
        n_epochs = 21
        i_start_epoch_out = 5
    for i_epoch in range(n_epochs):
        epoch_row = {}
        with Timer(name="EpochLoop", verbose=False) as loop_time:
            loss_on_outs = i_epoch >= i_start_epoch_out
            result = trainer.train(train_inputs,
                                   loss_on_outs=(loss_on_outs
                                                 and ot_on_class_dims))
            if clf_loss is not None:
                result_clf = clf_trainer.train(train_inputs,
                                               loss_on_outs=loss_on_outs)
                epoch_row.update(result_clf)

        epoch_row.update(result)
        epoch_row["runtime"] = loop_time.elapsed_secs * 1000
        acc_results = compute_accs(feature_model, train_inputs, test_inputs,
                                   class_dist)
        epoch_row.update(acc_results)
        if clf_loss is not None:
            clf_accs = compute_clf_accs(clf, feature_model, train_inputs,
                                        test_inputs)
            epoch_row.update(clf_accs)
        if i_epoch % (n_epochs // 20) != 0:
            df = df.append(epoch_row, ignore_index=True)
            # otherwise add ot loss in
        else:
            for i_class in range(len(train_inputs)):
                with th.no_grad():
                    class_ins = train_inputs[i_class]
                    samples = class_dist.get_samples(
                        i_class,
                        len(train_inputs[i_class]) * 4)
                    inverted = feature_model.invert(samples)
                    clear_ctx_dicts(feature_model)
                    ot_loss_in = ot_euclidean_loss_for_samples(
                        class_ins.view(class_ins.shape[0], -1),
                        inverted.view(inverted.shape[0],
                                      -1)[:(len(class_ins))],
                    )
                    epoch_row["ot_loss_in_{:d}".format(
                        i_class)] = ot_loss_in.item()
            df = df.append(epoch_row, ignore_index=True)
            print("Epoch {:d} of {:d}".format(i_epoch, n_epochs))
            print("Loop Time: {:.0f} ms".format(loop_time.elapsed_secs * 1000))

    return df, feature_model, class_dist
    
    MUAPs = G_DC(z)
    MUAPs = torch.matmul(MUAPs, coeff_matrix) # 对每个MUAPs进行100Hz的高通滤波
    MUAPs_logits = D_DC(MUAPs)

    MUAPs = torch.squeeze(MUAPs)
    if GEN_SEARCH_NUM == 1:
        MUAPs = torch.unsqueeze(MUAPs, 0)

    if USE_ABS:
        reconstruct_EMG = torch.matmul(A, MUAPs) # torch.abs    
    else:
        reconstruct_EMG = torch.matmul(A, MUAPs)

    if batch_size > 1:
        penalty_A = torch.mean(torch.std(A, dim=0)) - torch.mean(torch.abs(A)) # 第一项希望A尽可能相同,第二项希望A的值不要是零
        #loss = LAMBDA * mseloss(reconstruct_EMG, EMG_mvc) - LAMBDA_1 * torch.mean(MUAPs_logits) + LAMBDA_2 * penalty_A
        loss = LAMBDA * l1loss(reconstruct_EMG, EMG_mvc) - LAMBDA_1 * torch.mean(MUAPs_logits) + LAMBDA_2 * penalty_A
    else:
        penalty_A = - torch.mean(torch.abs(A)) # 希望A的值越大越好
        #loss = LAMBDA * mseloss(reconstruct_EMG, EMG_mvc) - LAMBDA_1 * torch.mean(MUAPs_logits) + LAMBDA_2 * penalty_A
        loss = LAMBDA * l1loss(reconstruct_EMG, EMG_mvc) - LAMBDA_1 * torch.mean(MUAPs_logits) + LAMBDA_2 * penalty_A
    
    optim_A.zero_grad()
    optim_z.zero_grad()
    loss.backward()

    if epoch % UPDATE_z_EVERY == 0:
        optim_z.step()
        if not GAUSSIAN_NOISE:
            with torch.no_grad():
            curr_state_feat = np.array(state_features)
            diff = curr_state_feat - prev_state_feat
            print(np.sum(diff))
            prev_state_feat = curr_state_feat
            #todo save as torch dataset, better for loading and shuffling
            preprocessed_data_input.append(state_features)
            preprocessed_data_output.append(
                [distance])  #yes it needs to be a nested list/array
            #end with

            if (raw_data_idx + 1) % 1000 == 0:
                print("saved data of size = ", raw_data_idx + 1)
                data_input = torch.tensor(preprocessed_data_input,
                                          dtype=torch.float)
                input_mean = torch.mean(data_input, 0)
                input_std = torch.std(data_input, 0) + EPSILON
                data_input = (data_input - input_mean) / input_std
                print("printout of 10 data points")
                for i in range(1, 10):
                    " ".join([str(x) for x in preprocessed_data_input[i]])
                data_output = torch.tensor(preprocessed_data_output,
                                           dtype=torch.float)
                output_mean = 0.0  # we prefer keeping the original distance values, scaling hurts the heuristic computation
                output_std = 1
                preprocessed_torch_dataset = TensorDataset(
                    data_input, data_output)
                # --now we have the training data in the right format, save with the mean and std dev for later inference
                with open(preprocessed_data_save_file, "wb") as dest:
                    pickle.dump(preprocessed_torch_dataset, dest)
                    pickle.dump(input_mean, dest)
                    pickle.dump(input_std, dest)
                    best_reward = rewards

            trajectory.append(exp)
            if len(trajectory) < TRAJECTORY_SIZE:
                continue

            traj_states = [t[0].state for t in trajectory]
            traj_actions = [t[0].action for t in trajectory]
            traj_states_v = torch.FloatTensor(traj_states).to(device)
            traj_actions_v = torch.FloatTensor(traj_actions).to(device)
            traj_adv_v, traj_ref_v = calc_adv_ref(trajectory, net_crt, traj_states_v, device=device)
            mu_v = net_act(traj_states_v)
            old_logprob_v = calc_logprob(mu_v, net_act.logstd, traj_actions_v)

            # normalize advantages
            traj_adv_v = (traj_adv_v - torch.mean(traj_adv_v)) / torch.std(traj_adv_v)

            # drop last entry from the trajectory, an our adv and ref value calculated without it
            trajectory = trajectory[:-1]
            old_logprob_v = old_logprob_v[:-1].detach()
            traj_states_v = traj_states_v[:-1]
            traj_actions_v = traj_actions_v[:-1]
            sum_loss_value = 0.0
            sum_loss_policy = 0.0
            count_steps = 0

            # critic step
            opt_crt.zero_grad()
            value_v = net_crt(traj_states_v)
            loss_value_v = F.mse_loss(value_v.squeeze(-1), traj_ref_v)
            loss_value_v.backward()