def log_uniform_candidate_sampler(self, targets, choice_func=_choice):
        # returns sampled, true_expected_count, sampled_expected_count
        # targets = (batch_size, )
        #
        #  samples = (n_samples, )
        #  true_expected_count = (batch_size, )
        #  sampled_expected_count = (n_samples, )

        # see: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/range_sampler.h
        # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/range_sampler.cc

        # algorithm: keep track of number of tries when doing sampling,
        #   then expected count is
        #   -expm1(num_tries * log1p(-p))
        # = (1 - (1-p)^num_tries) where p is self._probs[id]

        np_sampled_ids, num_tries = choice_func(self._num_words, self._num_samples)

        sampled_ids = torch.from_numpy(np_sampled_ids).to(targets.device)

        # Compute expected count = (1 - (1-p)^num_tries) = -expm1(num_tries * log1p(-p))
        # P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)
        target_probs = torch.log((targets.float() + 2.0) / (targets.float() + 1.0)) / self._log_num_words_p1
        target_expected_count = -1.0 * (torch.exp(num_tries * torch.log1p(-target_probs)) - 1.0)
        sampled_probs = torch.log((sampled_ids.float() + 2.0) /
                                  (sampled_ids.float() + 1.0)) / self._log_num_words_p1
        sampled_expected_count = -1.0 * (torch.exp(num_tries * torch.log1p(-sampled_probs)) - 1.0)

        sampled_ids.requires_grad_(False)
        target_expected_count.requires_grad_(False)
        sampled_expected_count.requires_grad_(False)

        return sampled_ids, target_expected_count, sampled_expected_count
Esempio n. 2
0
def _kl_uniform_gumbel(p, q):
    common_term = q.scale / (p.high - p.low)
    high_loc_diff = (p.high - q.loc) / q.scale
    low_loc_diff = (p.low - q.loc) / q.scale
    t1 = common_term.log() + 0.5 * (high_loc_diff + low_loc_diff)
    t2 = common_term * (torch.exp(-high_loc_diff) - torch.exp(-low_loc_diff))
    return t1 - t2
Esempio n. 3
0
def bbox_transform_inv(boxes, deltas):
  # Input should be both tensor or both Variable and on the same device
  if len(boxes) == 0:
    return deltas.detach() * 0

  widths = boxes[:, 2] - boxes[:, 0] + 1.0
  heights = boxes[:, 3] - boxes[:, 1] + 1.0
  ctr_x = boxes[:, 0] + 0.5 * widths
  ctr_y = boxes[:, 1] + 0.5 * heights

  dx = deltas[:, 0::4]
  dy = deltas[:, 1::4]
  dw = deltas[:, 2::4]
  dh = deltas[:, 3::4]
  
  pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1)
  pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1)
  pred_w = torch.exp(dw) * widths.unsqueeze(1)
  pred_h = torch.exp(dh) * heights.unsqueeze(1)

  pred_boxes = torch.cat(\
    [_.unsqueeze(2) for _ in [pred_ctr_x - 0.5 * pred_w,\
                              pred_ctr_y - 0.5 * pred_h,\
                              pred_ctr_x + 0.5 * pred_w,\
                              pred_ctr_y + 0.5 * pred_h]], 2).view(len(boxes), -1)

  return pred_boxes
Esempio n. 4
0
 def model(num_particles):
     with pyro.iarange("particles", num_particles):
         q3 = pyro.param("q3", torch.tensor(pi3, requires_grad=True))
         q4 = pyro.param("q4", torch.tensor(0.5 * (pi1 + pi2), requires_grad=True))
         z = pyro.sample("z", dist.Normal(q3, 1.0).expand_by([num_particles]))
         zz = torch.exp(z) / (1.0 + torch.exp(z))
         pyro.sample("y", dist.Bernoulli(q4 * zz))
Esempio n. 5
0
    def forward(self, feat, right, wrong, batch_wrong, fake=None, fake_diff_mask=None):

        num_wrong = wrong.size(1)
        batch_size = feat.size(0)

        feat = feat.view(-1, self.ninp, 1)
        right_dis = torch.bmm(right.view(-1, 1, self.ninp), feat)
        wrong_dis = torch.bmm(wrong, feat)
        batch_wrong_dis = torch.bmm(batch_wrong, feat)

        wrong_score = torch.sum(torch.exp(wrong_dis - right_dis.expand_as(wrong_dis)),1) \
                + torch.sum(torch.exp(batch_wrong_dis - right_dis.expand_as(batch_wrong_dis)),1)

        loss_dis = torch.sum(torch.log(wrong_score + 1))
        loss_norm = right.norm() + feat.norm() + wrong.norm() + batch_wrong.norm()

        if fake:
            fake_dis = torch.bmm(fake.view(-1, 1, self.ninp), feat)
            fake_score = torch.masked_select(torch.exp(fake_dis - right_dis), fake_diff_mask)

            margin_score = F.relu(torch.log(fake_score + 1) - self.margin)
            loss_fake = torch.sum(margin_score)
            loss_dis += loss_fake
            loss_norm += fake.norm()

        loss = (loss_dis + 0.1 * loss_norm) / batch_size
        if fake:
            return loss, loss_fake.data[0] / batch_size
        else:
            return loss
Esempio n. 6
0
        def guide():
            mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.334 * torch.ones(2),
                                               requires_grad=True))
            log_sig_q = pyro.param("log_sig_q", Variable(
                                   self.analytic_log_sig_n.data - 0.29 * torch.ones(2),
                                   requires_grad=True))
            mu_q_prime = pyro.param("mu_q_prime", Variable(torch.Tensor([-0.34, 0.52]),
                                    requires_grad=True))
            kappa_q = pyro.param("kappa_q", Variable(torch.Tensor([0.74]),
                                 requires_grad=True))
            log_sig_q_prime = pyro.param("log_sig_q_prime",
                                         Variable(-0.5 * torch.log(1.2 * self.lam0.data),
                                                  requires_grad=True))
            sig_q, sig_q_prime = torch.exp(log_sig_q), torch.exp(log_sig_q_prime)
            mu_latent_dist = dist.Normal(mu_q, sig_q, reparameterized=repa2)
            mu_latent = pyro.sample("mu_latent", mu_latent_dist,
                                    baseline=dict(use_decaying_avg_baseline=use_decaying_avg_baseline))
            mu_latent_prime_dist = dist.Normal(kappa_q.expand_as(mu_latent) * mu_latent + mu_q_prime,
                                               sig_q_prime,
                                               reparameterized=repa1)
            pyro.sample("mu_latent_prime",
                        mu_latent_prime_dist,
                        baseline=dict(nn_baseline=mu_prime_baseline,
                                      nn_baseline_input=mu_latent,
                                      use_decaying_avg_baseline=use_decaying_avg_baseline))

            return mu_latent
Esempio n. 7
0
 def guide():
     pyro.module("mymodule", pt_guide)
     mu_q, tau_q = torch.exp(pt_guide.mu_q_log), torch.exp(pt_guide.tau_q_log)
     sigma = torch.pow(tau_q, -0.5)
     pyro.sample("mu_latent",
                 dist.Normal(mu_q, sigma, reparameterized=reparameterized),
                 baseline=dict(use_decaying_avg_baseline=True))
Esempio n. 8
0
File: gan.py Progetto: rdevon/cortex
def get_positive_expectation(p_samples, measure, average=True):
    log_2 = math.log(2.)

    if measure == 'GAN':
        Ep = - F.softplus(-p_samples)
    elif measure == 'JSD':
        Ep = log_2 - F.softplus(- p_samples)
    elif measure == 'X2':
        Ep = p_samples ** 2
    elif measure == 'KL':
        Ep = p_samples + 1.
    elif measure == 'RKL':
        Ep = -torch.exp(-p_samples)
    elif measure == 'DV':
        Ep = p_samples
    elif measure == 'H2':
        Ep = 1. - torch.exp(-p_samples)
    elif measure == 'W1':
        Ep = p_samples
    else:
        raise_measure_error(measure)

    if average:
        return Ep.mean()
    else:
        return Ep
    def predictive_elbo(self, x, k, s):
        # No pW or qW

        self.B = x.size()[0] #batch size
        # self.k = k  #number of z samples aka particles P
        # self.s = s  #number of W samples

        elbo1s = []
        for i in range(s):

            Ws, logpW, logqW = self.sample_W()  #_ , [1], [1]

            mu, logvar = self.encode(x)  #[B,Z]
            z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B]

            x_hat = self.decode(Ws, z) #[P,B,X]
            logpx = log_bernoulli(x_hat, x)  #[P,B]

            elbo = logpx + logpz - logqz #[P,B]
            if k>1:
                max_ = torch.max(elbo, 0)[0] #[B]
                elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B]
            # elbo1 = elbo1 #+ (logpW - logqW)*.00000001 #[B], logp(x|W)p(w)/q(w)
            elbo1s.append(elbo)

        elbo1s = torch.stack(elbo1s) #[S,B]
        if s>1:
            max_ = torch.max(elbo1s, 0)[0] #[B]
            elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B]            

        elbo = torch.mean(elbo1s) #[1]
        return elbo#, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
Esempio n. 10
0
 def guide(num_particles):
     q1 = pyro.param("q1", torch.tensor(pi1, requires_grad=True))
     q2 = pyro.param("q2", torch.tensor(pi2, requires_grad=True))
     with pyro.iarange("particles", num_particles):
         z = pyro.sample("z", dist.Normal(q2, 1.0).expand_by([num_particles]))
         zz = torch.exp(z) / (1.0 + torch.exp(z))
         pyro.sample("y", dist.Bernoulli(q1 * zz))
Esempio n. 11
0
File: gan.py Progetto: rdevon/cortex
def get_negative_expectation(q_samples, measure, average=True):
    log_2 = math.log(2.)

    if measure == 'GAN':
        Eq = F.softplus(-q_samples) + q_samples
    elif measure == 'JSD':
        Eq = F.softplus(-q_samples) + q_samples - log_2
    elif measure == 'X2':
        Eq = -0.5 * ((torch.sqrt(q_samples ** 2) + 1.) ** 2)
    elif measure == 'KL':
        Eq = torch.exp(q_samples)
    elif measure == 'RKL':
        Eq = q_samples - 1.
    elif measure == 'DV':
        Eq = log_sum_exp(q_samples, 0) - math.log(q_samples.size(0))
    elif measure == 'H2':
        Eq = torch.exp(q_samples) - 1.
    elif measure == 'W1':
        Eq = q_samples
    else:
        raise_measure_error(measure)

    if average:
        return Eq.mean()
    else:
        return Eq
Esempio n. 12
0
    def encode_and_logprob(self, x):

        for i in range(len(self.first_half_weights)-1):
            x = self.act_func(self.first_half_weights[i](x))

            # pre_act = self.first_half_weights[i](x) #[B,D]
            # # pre_act_with_noise = Variable(torch.randn(1, self.arch_2[i][1]).type(self.dtype)) * pre_act
            # probs = torch.ones(1, self.arch_2[i][1]) * .5
            # pre_act_with_noise = Variable(torch.bernoulli(probs).type(self.dtype)) * pre_act
            # x = self.act_func(pre_act_with_noise)

        mean = self.first_half_weights[-1](x)
        logvar = self.q_logvar(x)

        # print (logvar)
        #Sample

        eps = Variable(torch.randn(1, self.z_size)) #.type(self.dtype))
        # x =  (torch.sqrt(torch.exp(W_logvars)) * eps) + W_means 
        x =  (torch.exp(.5*logvar) * eps) + mean 

        logq = -torch.mean(  logvar.sum(1) + ((x - mean).pow(2)/torch.exp(logvar)).sum(1))
        logp = torch.mean( x.pow(2).sum(1))


        return x, logq+logp
Esempio n. 13
0
    def sample(self, mu, logvar, k):

        # print (mu)
        # print (logvar)


        if torch.cuda.is_available():
            eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_()).cuda() #[P,B,Z]

            # print (mu.size())
            # print (logvar.size())
            # print (eps.size())

            z = eps.mul(torch.exp(.5*logvar)) + mu  #[P,B,Z]
            logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size).cuda()), 
                                Variable(torch.zeros(self.B, self.z_size)).cuda())  #[P,B]



            # logqz = lognormal(z, mu, logvar)

            logqz = lognormal(z, Variable(mu.data), Variable(logvar.data))



        else:
            eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_())#[P,B,Z]
            z = eps.mul(torch.exp(.5*logvar)) + mu  #[P,B,Z]
            logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size)), 
                                Variable(torch.zeros(self.B, self.z_size)))  #[P,B]
            logqz = lognormal(z, mu, logvar) 
        return z, logpz, logqz
Esempio n. 14
0
    def forward(self, true_binary, rule_masks, raw_logits):
        if cmd_args.loss_type == 'binary':
            exp_pred = torch.exp(raw_logits) * rule_masks

            norm = F.torch.sum(exp_pred, 2, keepdim=True)
            prob = F.torch.div(exp_pred, norm)

            return F.binary_cross_entropy(prob, true_binary) * cmd_args.max_decode_steps

        if cmd_args.loss_type == 'perplexity':
            return my_perp_loss(true_binary, rule_masks, raw_logits)

        if cmd_args.loss_type == 'vanilla':
            exp_pred = torch.exp(raw_logits) * rule_masks + 1e-30
            norm = torch.sum(exp_pred, 2, keepdim=True)
            prob = torch.div(exp_pred, norm)

            ll = F.torch.abs(F.torch.sum( true_binary * prob, 2))
            mask = 1 - rule_masks[:, :, -1]
            logll = mask * F.torch.log(ll)

            loss = -torch.sum(logll) / true_binary.size()[1]
            
            return loss
        print('unknown loss type %s' % cmd_args.loss_type)
        raise NotImplementedError
Esempio n. 15
0
 def guide():
     alpha_q_log = pyro.param("alpha_q_log",
                              Variable(self.log_alpha_n.data + 0.17, requires_grad=True))
     beta_q_log = pyro.param("beta_q_log",
                             Variable(self.log_beta_n.data - 0.143, requires_grad=True))
     alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log)
     pyro.sample("p_latent", dist.beta, alpha_q, beta_q)
     pyro.map_data("aaa", self.data, lambda i, x: None, batch_size=self.batch_size)
Esempio n. 16
0
def mmd(Mxx, Mxy, Myy, sigma):
    scale = Mxx.mean()
    Mxx = torch.exp(-Mxx / (scale * 2 * sigma * sigma))
    Mxy = torch.exp(-Mxy / (scale * 2 * sigma * sigma))
    Myy = torch.exp(-Myy / (scale * 2 * sigma * sigma))
    mmd = math.sqrt(Mxx.mean() + Myy.mean() - 2 * Mxy.mean())

    return mmd
Esempio n. 17
0
def optimize_cnt(worm_img, skel_prev, skel_width, segment_length,  n_epochs = 1000):
    
    
    #this is the variable that is going t obe modified
    skel_r = skel_prev.data #+ torch.zeros(*skel_prev.size()).normal_()
    skel_r = torch.nn.Parameter(skel_r)
    
    optimizer = optim.Adam([skel_r], lr=0.1)
    for ii in range(n_epochs):
        skel_map = get_skel_map(skel_r, skel_width)
        #skel_map += 1e-3
        
        p_w = (skel_map*worm_img)
        
        skel_map_inv = (-skel_map).add_(1)
        worm_img_inv = (-worm_img).add_(1)
        p_bng = (skel_map_inv*worm_img_inv)
        
        #p_bng = torch.sqrt(p_bng)
        
        
        #c_loss = F.binary_cross_entropy(p_w, p_bng)
        c_loss = -(p_bng*torch.log(p_w + 1.e-3) + p_w*torch.log(p_bng + 1.e-3)).mean()
        
        ds = skel_r[1:] - skel_r[:-1]
        dds = ds[1:] - ds[:-1]
        #seg_mean = seg_sizes.mean()
        
        cont_loss = ds.norm(p=2)
        curv_loss = dds.norm(p=2)
        
        seg_sizes = ((ds).pow(2)).sum(1).sqrt()
        d1 = seg_sizes-segment_length*0.9
        d2 = seg_sizes-segment_length*1.5
        seg_loss = (torch.exp(-d1) + torch.exp(d2)).mean()
        
        
        #(seg_sizes-segment_length).cosh().mean()
        #seg_loss = ((seg_sizes - segment_length)).cosh().mean()
        #seg_mean_loss = ((seg_mean-seg_sizes).abs() + 1e-5).mean()
        
        loss = 100*c_loss + 50*seg_loss + cont_loss +  curv_loss
        #loss = 50*c_loss + seg_loss
        optimizer.zero_grad()
        loss.backward()
        
        #torch.nn.utils.clip_grad_norm([skel_r], 0.001)
        optimizer.step()
        
        if ii % 250 == 0:
            print(ii,
                  loss.data[0], 
                  c_loss.data[0],
                  seg_loss.data[0], 
                  cont_loss.data[0],
                  curv_loss.data[0]
                  )
    return skel_r, skel_map
Esempio n. 18
0
 def guide():
     alpha_q_log = pyro.param(
         "alpha_q_log",
         Variable(self.log_alpha_n.data + 0.17, requires_grad=True))
     beta_q_log = pyro.param(
         "beta_q_log",
         Variable(self.log_beta_n.data - 0.143, requires_grad=True))
     alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log)
     pyro.sample("lambda_latent", dist.gamma, alpha_q, beta_q)
Esempio n. 19
0
 def guide():
     alpha_q_log = pyro.param("alpha_q_log",
                              Variable(self.log_alpha_n.data + 0.17, requires_grad=True))
     beta_q_log = pyro.param("beta_q_log",
                             Variable(self.log_beta_n.data - 0.143, requires_grad=True))
     alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log)
     p_latent = pyro.sample("p_latent", dist.beta, alpha_q, beta_q,
                            baseline=dict(use_decaying_avg_baseline=True))
     return p_latent
Esempio n. 20
0
 def guide():
     alpha_q_log = pyro.param(
         "alpha_q_log", Variable(
             self.alpha_q_log_0.clone(), requires_grad=True), tags="guide")
     beta_q_log = pyro.param(
         "beta_q_log", Variable(
             self.beta_q_log_0.clone(), requires_grad=True), tags="guide")
     alpha_q, beta_q = torch.exp(alpha_q_log), torch.exp(beta_q_log)
     pyro.sample("lambda_latent", dist.gamma, alpha_q, beta_q)
    def sample(self, fc_feats, att_feats, opt={}):
        sample_max = opt.get('sample_max', 1)
        beam_size = opt.get('beam_size', 1)
        temperature = opt.get('temperature', 1.0)
        if beam_size > 1:
            return self.sample_beam(fc_feats, att_feats, opt)

        batch_size = fc_feats.size(0)
        state = self.init_hidden(batch_size)

        # embed fc and att feats
        fc_feats = self.fc_embed(fc_feats)
        _att_feats = self.att_embed(att_feats.view(-1, self.att_feat_size))
        att_feats = _att_feats.view(*(att_feats.size()[:-1] + (self.rnn_size,)))

        # Project the attention feats first to reduce memory and computation comsumptions.
        p_att_feats = self.ctx2att(att_feats.view(-1, self.rnn_size))
        p_att_feats = p_att_feats.view(*(att_feats.size()[:-1] + (self.att_hid_size,)))

        seq = []
        seqLogprobs = []
        for t in range(self.seq_length + 1):
            if t == 0: # input <bos>
                it = fc_feats.data.new(batch_size).long().zero_()
            elif sample_max:
                sampleLogprobs, it = torch.max(logprobs.data, 1)
                it = it.view(-1).long()
            else:
                if temperature == 1.0:
                    prob_prev = torch.exp(logprobs.data).cpu() # fetch prev distribution: shape Nx(M+1)
                else:
                    # scale logprobs by temperature
                    prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu()
                it = torch.multinomial(prob_prev, 1).cuda()
                sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions
                it = it.view(-1).long() # and flatten indices for downstream processing

            xt = self.embed(Variable(it, requires_grad=False))

            if t >= 1:
                # stop when all finished
                if t == 1:
                    unfinished = it > 0
                else:
                    unfinished = unfinished * (it > 0)
                if unfinished.sum() == 0:
                    break
                it = it * unfinished.type_as(it)
                seq.append(it) #seq[t] the input of t+2 time step

                seqLogprobs.append(sampleLogprobs.view(-1))

            output, state = self.core(xt, fc_feats, att_feats, p_att_feats, state)
            logprobs = F.log_softmax(self.logit(output))

        return torch.cat([_.unsqueeze(1) for _ in seq], 1), torch.cat([_.unsqueeze(1) for _ in seqLogprobs], 1)
Esempio n. 22
0
    def _gaussian_kl_divergence(self, p, q):
        p_mean = p[0][:Z_DIM]
        p_logstd = p[0][Z_DIM:]
        p_var = T.sqrt(T.exp(p_logstd))
        q_mean = q[0][:Z_DIM]
        q_logstd = q[0][Z_DIM:]
        q_var = T.sqrt(T.exp(q_logstd))

        kl = (T.log(q_var/p_var) + (p_var + (p_mean-q_mean)*(p_mean-q_mean))/q_var - 1) * 0.5
        return T.sum(kl)
Esempio n. 23
0
 def model():
     alpha_p_log = pyro.param(
         "alpha_p_log", Variable(
             self.alpha_p_log_0.clone(), requires_grad=True), tags="model")
     beta_p_log = pyro.param(
         "beta_p_log", Variable(
             self.beta_p_log_0.clone(), requires_grad=True), tags="model")
     alpha_p, beta_p = torch.exp(alpha_p_log), torch.exp(beta_p_log)
     lambda_latent = pyro.sample("lambda_latent", dist.gamma, alpha_p, beta_p)
     pyro.observe("obs", dist.poisson, self.data, lambda_latent)
     return lambda_latent
Esempio n. 24
0
 def guide():
     mu_q_log = pyro.param(
         "mu_q_log",
         Variable(
             self.log_mu_n.data +
             0.17,
             requires_grad=True))
     tau_q_log = pyro.param("tau_q_log", Variable(self.log_tau_n.data - 0.143,
                                                  requires_grad=True))
     mu_q, tau_q = torch.exp(mu_q_log), torch.exp(tau_q_log)
     pyro.sample("mu_latent", dist.normal, mu_q, torch.pow(tau_q, -0.5))
Esempio n. 25
0
 def mean_kl(self, new_dist_info, old_dist_info):
     old_log_std = old_dist_info[2]
     new_log_std = new_dist_info[2]
     old_std = torch.exp(old_log_std)
     new_std = torch.exp(new_log_std)
     old_mean = old_dist_info[1]
     new_mean = new_dist_info[1]
     Nr = (old_mean - new_mean) ** 2 + old_std ** 2 - new_std ** 2
     Dr = 2 * new_std ** 2 + 1e-8
     sample_kl = torch.sum(Nr / Dr + new_log_std - old_log_std, dim=1)
     return torch.mean(sample_kl)
Esempio n. 26
0
def plot_dist2(n_components, mixture_weights, true_mixture_weights, exp_dir, name=''):


    # mixture_weights = torch.softmax(needsoftmax_mixtureweight, dim=0)

    rows = 1
    cols = 1
    fig = plt.figure(figsize=(10+cols,4+rows), facecolor='white') #, dpi=150)

    col =0
    row = 0
    ax = plt.subplot2grid((rows,cols), (row,col), frameon=False, colspan=1, rowspan=1)


    # xs = np.linspace(-9,205, 300)
    xs = np.linspace(-10,n_components*10 +5, 300)
    sum_ = np.zeros(len(xs))

    # C = 20
    for c in range(n_components):
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
        ys = []
        for x in xs:
            component_i = (torch.exp(m.log_prob(x) )* mixture_weights[c]).detach().cpu().numpy()
            ys.append(component_i)
        ys = np.reshape(np.array(ys), [-1])
        sum_ += ys
        ax.plot(xs, ys, label='', c='orange')
    ax.plot(xs, sum_, label='current', c='r')


    sum_ = np.zeros(len(xs))
    for c in range(n_components):
        m = Normal(torch.tensor([c*10.]).float(), torch.tensor([5.0]).float())
        ys = []
        for x in xs:
            component_i = (torch.exp(m.log_prob(x) )* true_mixture_weights[c]).detach().cpu().numpy()
            ys.append(component_i)
        ys = np.reshape(np.array(ys), [-1])
        sum_ += ys
        ax.plot(xs, ys, label='', c='c')
    ax.plot(xs, sum_, label='true', c='b')

    ax.legend()

    ax.set_title(str(mixture_weights) +'\n'+str(true_mixture_weights), size=8, family='serif')


    # save_dir = home+'/Documents/Grad_Estimators/GMM/'
    plt_path = exp_dir+'gmm_plot_dist'+name+'.png'
    plt.savefig(plt_path)
    print ('saved training plot', plt_path)
    plt.close()
Esempio n. 27
0
    def bbox_transform(self, boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0), clip_value=4.135166556742356):
        """Forward transform that maps proposal boxes to predicted ground-truth
        boxes using bounding-box regression deltas. See bbox_transform_inv for a
        description of the weights argument.
        """
        if boxes.size(0) == 0:
            return None
            #return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)

        # get boxes dimensions and centers
        widths = boxes[:, 2] - boxes[:, 0] + 1.0
        heights = boxes[:, 3] - boxes[:, 1] + 1.0
        ctr_x = boxes[:, 0] + 0.5 * widths
        ctr_y = boxes[:, 1] + 0.5 * heights

        wx, wy, ww, wh = weights
        dx = deltas[:, 0::4] / wx
        dy = deltas[:, 1::4] / wy
        dw = deltas[:, 2::4] / ww
        dh = deltas[:, 3::4] / wh
        
        clip_value = Variable(torch.FloatTensor([clip_value]))
        if boxes.is_cuda:
            clip_value = clip_value.cuda()

        # Prevent sending too large values into np.exp()
        dw = torch.min(dw,clip_value)
        dh = torch.min(dh,clip_value)

        pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1)
        pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1)
        pred_w = torch.exp(dw) * widths.unsqueeze(1)
        pred_h = torch.exp(dh) * heights.unsqueeze(1)

        # pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
        # x1
        pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
        # y1
        pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
        # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
        pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w - 1
        # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
        pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h - 1

        pred_boxes = torch.cat((pred_boxes_x1,
                                pred_boxes_y1,
                                pred_boxes_x2,
                                pred_boxes_y2),1)

        return pred_boxes
Esempio n. 28
0
 def sample(self, mu, logvar, k):
     if torch.cuda.is_available():
         eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_()).cuda() #[P,B,Z]
         z = eps.mul(torch.exp(.5*logvar)) + mu  #[P,B,Z]
         logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size).cuda()), 
                             Variable(torch.zeros(self.B, self.z_size)).cuda())  #[P,B]
         logqz = lognormal(z, mu, logvar)
     else:
         eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_())#[P,B,Z]
         z = eps.mul(torch.exp(.5*logvar)) + mu  #[P,B,Z]
         logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size)), 
                             Variable(torch.zeros(self.B, self.z_size)))  #[P,B]
         logqz = lognormal(z, mu, logvar) 
     return z, logpz, logqz
    def forward(self, x, k, s):

        self.B = x.size()[0] #batch size
        # self.k = k  #number of z samples aka particles P
        # self.s = s  #number of W samples

        elbo1s = []
        logprobs = [[] for _ in range(5)]
        for i in range(s):

            Ws, logpW, logqW = self.sample_W()  #_ , [1], [1]

            mu, logvar = self.encode(x)  #[B,Z]
            z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B]

            x_hat = self.decode(Ws, z) #[P,B,X]
            logpx = log_bernoulli(x_hat, x)  #[P,B]

            elbo = logpx + logpz - logqz #[P,B]
            if k>1:
                max_ = torch.max(elbo, 0)[0] #[B]
                elbo1 = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B]
            elbo = elbo + (logpW*.000001) - (logqW*self.qW_weight) #[B], logp(x|W)p(w)/q(w)
            elbo1s.append(elbo)
            logprobs[0].append(torch.mean(logpx))
            logprobs[1].append(torch.mean(logpz))
            logprobs[2].append(torch.mean(logqz))
            logprobs[3].append(torch.mean(logpW))
            logprobs[4].append(torch.mean(logqW))




        elbo1s = torch.stack(elbo1s) #[S,B]
        if s>1:
            max_ = torch.max(elbo1s, 0)[0] #[B]
            elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B]            

        elbo = torch.mean(elbo1s) #[1]

        #for printing
        # logpx = torch.mean(logpx)
        # logpz = torch.mean(logpz)
        # logqz = torch.mean(logqz)
        # self.x_hat_sigmoid = F.sigmoid(x_hat)

        logprobs2 = [torch.mean(torch.stack(aa)) for aa in logprobs]

        return elbo, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
Esempio n. 30
0
 def rsample(self, sample_shape=torch.Size()):
     # Implements parallel batched accept-reject sampling.
     x = self.propose(sample_shape) if sample_shape else self.propose()
     log_prob_accept = self.log_prob_accept(x)
     probs = torch.exp(log_prob_accept).clamp_(0.0, 1.0)
     done = torch.bernoulli(probs).byte()
     while not done.all():
         proposed_x = self.propose(sample_shape) if sample_shape else self.propose()
         log_prob_accept = self.log_prob_accept(proposed_x)
         prob_accept = torch.exp(log_prob_accept).clamp_(0.0, 1.0)
         accept = torch.bernoulli(prob_accept).byte() & ~done
         if accept.any():
             x[accept] = proposed_x[accept]
             done |= accept
     return x
Esempio n. 31
0
def conditional_lognormal_loss(model,
                               x,
                               t,
                               e,
                               pdf_u,
                               pdf_c,
                               hr_loss=False,
                               imbalance_loss=False,
                               elbo=True,
                               risk=1):

    shape, scale, logits = model.forward(x)

    lossf = []
    losss = []

    k_ = shape
    b_ = scale
    loss_neg = 0
    for g in range(model.k):

        mu = k_[:, g]
        sigma = b_[:, g]

        f = -sigma - 0.5 * np.log(2 * np.pi)
        f = f - torch.div((torch.log(t) - mu)**2, 2. * torch.exp(2 * sigma))
        s = torch.div(torch.log(t) - mu, torch.exp(sigma) * np.sqrt(2))
        s = 0.5 - 0.5 * torch.erf(s)
        s = torch.log(s)

        lossf.append(f)
        losss.append(s)

        # negative partial log likelihood
        hr = f - s
        loss_neg += PartialLogLikelihood()(hr, e)

    losss = torch.stack(losss, dim=1)
    lossf = torch.stack(lossf, dim=1)

    if elbo:
        lossg = nn.Softmax(dim=1)(logits)
        losss = lossg * losss
        lossf = lossg * lossf

        losss = losss.sum(dim=1)
        lossf = lossf.sum(dim=1)
    else:
        lossg = nn.LogSoftmax(dim=1)(logits)
        losss = lossg + losss
        lossf = lossg + lossf

        losss = torch.logsumexp(losss, dim=1)
        lossf = torch.logsumexp(lossf, dim=1)

    if imbalance_loss:
        try:
            idx_time = t.int().cpu().detach().numpy()
            idx_time[idx_time >= 10] = 9
            pdf_u_ = torch.tensor(pdf_u).cuda()
            pdf_c_ = torch.tensor(pdf_c).cuda()
            lossf = lossf * ((1 - pdf_u_[idx_time]).exp())
            losss = losss * ((1 - pdf_c_[idx_time]).exp())
        except:
            pass

    uncens = np.where(e.cpu().data.numpy() == int(risk))[0]
    cens = np.where(e.cpu().data.numpy() != int(risk))[0]
    ll = lossf[uncens].sum() + model.discount * losss[cens].sum()

    if hr_loss and e.sum() > 0:
        return -ll / float(len(uncens) + len(cens)) + loss_neg * model.gamma
    else:
        return -ll / float(len(uncens) + len(cens))
Esempio n. 32
0
def logisticloss(D):
    """ k-way logistic loss
    """
    return torch.log2(1 + (torch.exp(D)).squeeze(-1).sum(-1))
Esempio n. 33
0
    def forward(
            self,
            x: torch.LongTensor,
            x_lengths: torch.LongTensor,
            y_lengths: torch.LongTensor,
            y: torch.FloatTensor = None,
            dr: torch.IntTensor = None,
            pitch: torch.FloatTensor = None,
            aux_input: Dict = {
                "d_vectors": None,
                "speaker_ids": None
            },  # pylint: disable=unused-argument
    ) -> Dict:
        """Model's forward pass.

        Args:
            x (torch.LongTensor): Input character sequences.
            x_lengths (torch.LongTensor): Input sequence lengths.
            y_lengths (torch.LongTensor): Output sequnce lengths. Defaults to None.
            y (torch.FloatTensor): Spectrogram frames. Only used when the alignment network is on. Defaults to None.
            dr (torch.IntTensor): Character durations over the spectrogram frames. Only used when the alignment network is off. Defaults to None.
            pitch (torch.FloatTensor): Pitch values for each spectrogram frame. Only used when the pitch predictor is on. Defaults to None.
            aux_input (Dict): Auxiliary model inputs for multi-speaker training. Defaults to `{"d_vectors": 0, "speaker_ids": None}`.

        Shapes:
            - x: :math:`[B, T_max]`
            - x_lengths: :math:`[B]`
            - y_lengths: :math:`[B]`
            - y: :math:`[B, T_max2]`
            - dr: :math:`[B, T_max]`
            - g: :math:`[B, C]`
            - pitch: :math:`[B, 1, T]`
        """
        g = self._set_speaker_input(aux_input)
        # compute sequence masks
        y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).float()
        x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.shape[1]),
                                 1).float()
        # encoder pass
        o_en, x_mask, g, x_emb = self._forward_encoder(x, x_mask, g)
        # duration predictor pass
        if self.args.detach_duration_predictor:
            o_dr_log = self.duration_predictor(o_en.detach(), x_mask)
        else:
            o_dr_log = self.duration_predictor(o_en, x_mask)
        o_dr = torch.clamp(torch.exp(o_dr_log) - 1, 0, self.max_duration)
        # generate attn mask from predicted durations
        o_attn = self.generate_attn(o_dr.squeeze(1), x_mask)
        # aligner
        o_alignment_dur = None
        alignment_soft = None
        alignment_logprob = None
        alignment_mas = None
        if self.use_aligner:
            o_alignment_dur, alignment_soft, alignment_logprob, alignment_mas = self._forward_aligner(
                x_emb, y, x_mask, y_mask)
            alignment_soft = alignment_soft.transpose(1, 2)
            alignment_mas = alignment_mas.transpose(1, 2)
            dr = o_alignment_dur
        # pitch predictor pass
        o_pitch = None
        avg_pitch = None
        if self.args.use_pitch:
            o_pitch_emb, o_pitch, avg_pitch = self._forward_pitch_predictor(
                o_en, x_mask, pitch, dr)
            o_en = o_en + o_pitch_emb
        # decoder pass
        o_de, attn = self._forward_decoder(
            o_en, dr, x_mask, y_lengths,
            g=None)  # TODO: maybe pass speaker embedding (g) too
        outputs = {
            "model_outputs": o_de,  # [B, T, C]
            "durations_log": o_dr_log.squeeze(1),  # [B, T]
            "durations": o_dr.squeeze(1),  # [B, T]
            "attn_durations": o_attn,  # for visualization [B, T_en, T_de']
            "pitch_avg": o_pitch,
            "pitch_avg_gt": avg_pitch,
            "alignments": attn,  # [B, T_de, T_en]
            "alignment_soft": alignment_soft,
            "alignment_mas": alignment_mas,
            "o_alignment_dur": o_alignment_dur,
            "alignment_logprob": alignment_logprob,
            "x_mask": x_mask,
            "y_mask": y_mask,
        }
        return outputs
Esempio n. 34
0
def log_sum_exp_batch(vecs):
    maxi = torch.max(vecs, 1)[0]
    maxi_bc = maxi[:, None].repeat(1, vecs.shape[1])
    recti_ = torch.log(torch.sum(torch.exp((vecs - maxi_bc)), 1))
    return (maxi + recti_)
Esempio n. 35
0
def conditional_distributions_loss(model,
                                   x,
                                   t,
                                   e,
                                   pdf_u,
                                   pdf_c,
                                   hr_loss=False,
                                   imbalance_loss=False,
                                   elbo=True,
                                   risk='1'):

    shape_weibull, scale_weibull, gates_weibull, shape_lognormal, scale_lognormal, logits_lognormal, attention_weights = model.forward(
        x)

    lossf_lognormal = []
    losss_lognormal = []

    hr_lognormal = []
    for g in range(model.k):

        mu = shape_lognormal[:, g]
        sigma = scale_lognormal[:, g]

        f = -sigma - 0.5 * np.log(2 * np.pi)
        f = f - torch.div((torch.log(t) - mu)**2, 2. * torch.exp(2 * sigma))
        s = torch.div(torch.log(t) - mu, torch.exp(sigma) * np.sqrt(2))
        s = 0.5 - 0.5 * torch.erf(s)
        s = torch.log(s)

        lossf_lognormal.append(f)
        losss_lognormal.append(s)

        # negative partial log likelihood
        hr_lognormal.append(f - s)

    losss_lognormal = torch.stack(losss_lognormal, dim=1)
    lossf_lognormal = torch.stack(lossf_lognormal, dim=1)
    hr_lognormal = torch.stack(hr_lognormal, dim=1)

    if elbo:
        lossg_lognormal = nn.Softmax(dim=1)(logits_lognormal)
        losss_lognormal = lossg_lognormal * losss_lognormal
        lossf_lognormal = lossg_lognormal * lossf_lognormal

        losss_lognormal = losss_lognormal.sum(dim=1)
        lossf_lognormal = lossf_lognormal.sum(dim=1)

        hr_lognormal = lossg_lognormal * hr_lognormal
        hr_lognormal = hr_lognormal.sum(dim=1)
    else:
        lossg_lognormal = nn.LogSoftmax(dim=1)(logits_lognormal)
        losss_lognormal = lossg_lognormal + losss_lognormal
        lossf_lognormal = lossg_lognormal + lossf_lognormal
        losss_lognormal = torch.logsumexp(losss_lognormal, dim=1)
        lossf_lognormal = torch.logsumexp(lossf_lognormal, dim=1)

    # Weibull distriubtion
    shapes_weibull, scales_weibull = shape_weibull.exp(), (
        -scale_weibull).exp()
    lossf_weibull, losss_weibull = [], []
    hr_weibull = []
    for idx in range(model.k):

        eta = shapes_weibull[:, idx]
        beta = scales_weibull[:, idx]

        log_s_weibull = -(torch.pow(t / beta, eta))
        log_f_weibull = torch.log(eta) - torch.log(beta) + (
            (eta - 1) * (-torch.log(beta) + torch.log(t)))
        log_f_weibull = log_f_weibull + log_s_weibull

        lossf_weibull.append(log_f_weibull)
        losss_weibull.append(log_s_weibull)

        # negative partial log likelihood
        hr_weibull.append(torch.log(eta / beta * (t / beta)**(eta - 1)))

    losss_weibull = torch.stack(losss_weibull, dim=1)
    lossf_weibull = torch.stack(lossf_weibull, dim=1)
    hr_weibull = torch.stack(hr_weibull, dim=1)

    if elbo:
        lossg_weibull = nn.Softmax(dim=1)(gates_weibull)
        losss_weibull = lossg_weibull * losss_weibull
        lossf_weibull = lossg_weibull * lossf_weibull
        losss_weibull = losss_weibull.sum(dim=1)
        lossf_weibull = lossf_weibull.sum(dim=1)
        hr_weibull = hr_weibull * lossg_weibull
        hr_weibull = hr_weibull.sum(dim=1)
    else:
        lossg_weibull = nn.LogSoftmax(dim=1)(gates_weibull)
        losss_weibull = lossg_weibull + losss_weibull
        lossf_weibull = lossg_weibull + lossf_weibull
        losss_weibull = torch.logsumexp(losss_weibull, dim=1)
        lossf_weibull = torch.logsumexp(lossf_weibull, dim=1)

    # Combine

    lossf, losss = torch.stack([lossf_lognormal, lossf_weibull],
                               dim=1), torch.stack(
                                   [losss_lognormal, losss_weibull], dim=1)
    weights = nn.Softmax(dim=1)(attention_weights)
    #hr = torch.stack([hr_weibull, hr_lognormal], dim=1)
    hr = torch.stack(
        [lossf_lognormal - losss_lognormal, lossf_weibull - losss_weibull],
        dim=1)
    hr = hr * weights
    hr = hr.sum(dim=1)
    loss_neg = PartialLogLikelihood()(hr, e)

    lossf = lossf * weights
    losss = losss * weights
    lossf = lossf.sum(dim=1)
    losss = losss.sum(dim=1)

    #
    if imbalance_loss:
        try:
            idx_time = t.int().cpu().detach().numpy()
            pdf_u_ = torch.tensor(pdf_u).cuda()
            pdf_c_ = torch.tensor(pdf_c).cuda()
            lossf = lossf * (1 - pdf_u_[idx_time])  #.exp()
            losss = losss * (1 - pdf_c_[idx_time])  #.exp()
        except:
            pass

    uncens = np.where(e.cpu().data.numpy() == int(risk))[0]
    cens = np.where(e.cpu().data.numpy() != int(risk))[0]
    ll = lossf[uncens].sum() + model.discount * losss[cens].sum()

    if hr_loss and e.sum() > 0:
        return -ll / float(len(uncens) + len(cens)) + loss_neg * model.gamma
    else:
        return -ll / float(len(uncens) + len(cens))
Esempio n. 36
0
 def e(self, s):
     return torch.exp(self.clamp * 0.636 * torch.atan(s / self.clamp))
Esempio n. 37
0
    def mutal_info(self, factors = ['shape', 'scale', 'rotation', 'x', 'y']):
        nsamps_per_factor = 100
        per_class_cnt = {}
        n_factors = len(self.latent_sizes)

        fig = plt.figure(figsize=(5, 2*n_factors))
        # fig.tight_layout()
        plt.subplots_adjust(hspace=.5)

        for fac_id in range(n_factors):
            n_fac_classes = self.latent_sizes[fac_id]
            for i in range(n_fac_classes):
                per_class_cnt.update({i: 0})

            dl = DataLoader(
                self.data_loader.dataset, batch_size=100,
                shuffle=True, pin_memory=True)

            # randomly select images (with 100 different samples per class for the fixed factor)
            fixed_XA = []
            for fac_class in range(n_fac_classes):
                indices = np.where(self.latent_classes[:, fac_id] == fac_class)[0]
                np.random.shuffle(indices)
                per_class_idx = indices[:nsamps_per_factor]
                for i in per_class_idx:
                    img, _ = dl.dataset.__getitem__(i)
                    if self.cuda:
                        img = img.cuda()
                    # img = img.squeeze(0)
                    fixed_XA.append(img)

            fixed_XA = torch.stack(fixed_XA, dim=0)
            q = self.enc(fixed_XA, num_samples=1)
            batch_dim = 1

            # for my model
            batch_size = q[self.latents['private']].value.shape[1]
            z_private = q[self.latents['private']].value.unsqueeze(batch_dim + 1).transpose(batch_dim, 0)
            z_shared = q[self.latents['shared']].value.unsqueeze(batch_dim + 1).transpose(batch_dim, 0)
            q_ziCx_private = torch.exp(q[self.latents['private']].dist.log_prob(z_private).transpose(1, batch_dim + 1).squeeze(2))
            q_ziCx_shared = torch.exp(q[self.latents['shared']].dist.log_prob(z_shared).transpose(1, batch_dim + 1).squeeze(2))
            q_ziCx = torch.cat((q_ziCx_private, q_ziCx_shared), dim=2)

            latent_dim = q_ziCx.shape[-1]
            mi_zi_y = torch.tensor([.0] * latent_dim)
            for k in range(n_fac_classes):
                q_ziCxk = q_ziCx[k * nsamps_per_factor:(k + 1) * nsamps_per_factor,
                          k * nsamps_per_factor:(k + 1) * nsamps_per_factor, :]
                marg_q_ziCxk = q_ziCxk.sum(1)
                mi_zi_y += (marg_q_ziCxk * (np.log(batch_size / nsamps_per_factor) + torch.log(marg_q_ziCxk)
                                            - torch.log(
                    q_ziCx[k * nsamps_per_factor:(k + 1) * nsamps_per_factor, :, :].sum(1)))).mean(0)
            mi_zi_y = mi_zi_y / batch_size
            print(mi_zi_y)


            my_xticks = []
            for i in range(latent_dim):
                my_xticks.append('z' + str(i+1))

            ax = fig.add_subplot(n_factors, 1, fac_id + 1)
            ax.bar(range(latent_dim), mi_zi_y.detach().cpu().numpy())
            ax.set_title(factors[fac_id])
            plt.xticks(range(latent_dim), my_xticks)

        plt.show()
Esempio n. 38
0
 def _distribution(self, obs):
     mu = self.mu_net(obs)
     std = torch.exp(self.log_std)
     return Normal(mu, std)
Esempio n. 39
0
def train_smovement(train_loader, glow, nn_theta, loss_fn, optimizer,
                    scheduler, epoch):
    print(
        "ID: exp12_1 testing lr 1e-4 and only one step movement, no glow loss with random patch"
    )
    global global_step
    loss_meter = AverageMeter()
    # loss_fn_glow = GlowLoss()
    for net in glow:
        net.train()
    for net in nn_theta:
        net.train()

    with tqdm(total=len(train_loader.dataset)) as progress_bar:
        for itr, sequence in enumerate(train_loader):
            sequence = sequence.to(device)
            b_s = sequence.size(0)

            # start_index = torch.LongTensor(1).random_(0, 2)
            # random_patch = sequence[:, start_index:start_index + 2, :, :, :]

            random_patch = []
            for n in range(b_s):
                start_index = torch.LongTensor(1).random_(0, 2)
                random_patch.append(sequence[n, start_index:start_index +
                                             2, :, :, :])
            random_patch = torch.stack(random_patch, dim=0)

            t0_zi, _, sldj_0 = flow_forward(random_patch[:, 0, :, :, :], glow)
            # z_glow = recover_z_shape(t0_zi)
            # loss_glow = loss_fn_glow(z_glow, sldj_0)

            t1_zi_out, t1_zi_h, sldj_1 = flow_forward(
                random_patch[:, 1, :, :, :], glow)
            h12 = t1_zi_h.l3

            mu_l3, logsigma_l3 = nn_theta.l3(t0_zi.l3, h12)
            g3 = Normal(loc=mu_l3, scale=torch.exp(logsigma_l3))

            h1 = t1_zi_h.l2
            mu_l2, logsigma_l2 = nn_theta.l2(t0_zi.l2, h1)
            g2 = Normal(loc=mu_l2, scale=torch.exp(logsigma_l2))

            mu_l1, logsigma_l1 = nn_theta.l1(t0_zi.l1)
            g1 = Normal(loc=mu_l1, scale=torch.exp(logsigma_l1))

            total_loss = loss_fn(g1,
                                 g2,
                                 g3,
                                 z=t1_zi_out,
                                 sldj=sldj_1,
                                 input_dim=random_patch[:, 1, :, :, :].size())

            # total_loss = loss #+ loss_glow
            total_loss.backward()

            clip_grad_value(optimizer)

            optimizer.step()
            optimizer.zero_grad()
            if scheduler is not None:
                scheduler.step(global_step)

            loss_meter.update(total_loss.item(), b_s)
            progress_bar.set_postfix(nll=loss_meter.avg,
                                     bpd=bits_per_dim(
                                         random_patch[:, 1, :, :, :],
                                         loss_meter.avg),
                                     lr=optimizer.param_groups[0]['lr'])
            progress_bar.update(b_s)
            global_step += 1

    print("global step:", global_step)

    torch.cuda.empty_cache()
    #save_model(glow, nn_theta, optimizer, scheduler, epoch, PATH)
    save_model(glow, nn_theta, optimizer, epoch, PATH)
    writer.add_scalar('data/train_loss', loss_meter.avg, epoch)
    writer.add_scalar('data/lr', get_lr(optimizer), epoch)

    context = next(iter(train_loader)).cuda()
    flow_inverse_smovement(context, glow, nn_theta, epoch)
Esempio n. 40
0
def evaluate(val_dataset, model, nll_crit, mse_crit, opt):
  # set mode
  model.eval()

  # predict
  predictions = []
  overall_nll = 0
  overall_teacher_forcing_acc, overall_teacher_forcing_cnt = 0, 0
  overall_mse = 0
  Nav_nll = {'object': 0, 'room': 0}
  Nav_cnt = {'object': 0, 'room': 0}
  Nav_teacher_forcing_acc = {'object': 0, 'room': 0}
  Nav_teacher_forcing_cnt = {'object': 0, 'room': 0}
  for ix in range(len(val_dataset)):
    # data = {qid, path_ix, house, id, type, phrase, phrase_emb, ego_feats, next_feats, res_feats,
    #  action_inputs, action_outputs, action_masks, ego_imgs}
    data = val_dataset[ix]
    ego_feats = torch.from_numpy(data['ego_feats']).cuda().unsqueeze(0)  # (1, L, 3200)
    phrase_embs = torch.from_numpy(data['phrase_emb']).cuda().unsqueeze(0)  # (1, 300)
    action_inputs = torch.from_numpy(data['action_inputs']).cuda().unsqueeze(0)   # (1, L)
    action_outputs = torch.from_numpy(data['action_outputs']).cuda().unsqueeze(0) # (1, L)
    action_masks = torch.from_numpy(data['action_masks']).cuda().unsqueeze(0)  # (1, L)
    # forward
    logprobs, _, pred_feats, _ = model(ego_feats, phrase_embs, action_inputs)  # (1, L, #actions), (1, L, 3200)
    nll_loss = nll_crit(logprobs, action_outputs, action_masks)
    nll_loss = nll_loss.item()
    mse_loss = 0
    if opt['use_next']:
      next_feats = torch.from_numpy(data['next_feats']).cuda().unsqueeze(0)  # (1, L, 3200)
      mse_loss = mse_crit(pred_feats, next_feats, action_masks)
      mse_loss = mse_loss.item()
    if opt['use_residual']:
      res_feats = torch.from_numpy(data['res_feats']).cuda().unsqueeze(0)  # (1, L, 3200)
      mse_loss = mse_crit(pred_feats, res_feats, action_masks)
      mse_loss = mse_loss.item()
    pred_acts = logprobs[0].argmax(1)  # (L, ) 
    # entry
    entry = {}
    entry['qid'] = data['qid']
    entry['house'] = data['house']
    entry['id'] = data['id']
    entry['type'] = data['type']
    entry['path_ix'] = data['path_ix']
    entry['pred_acts'] = pred_acts.tolist()        # list of L actions
    entry['pred_acts_probs'] = torch.exp(logprobs[0]).tolist() # (L, #actions)
    entry['gd_acts'] = action_outputs[0].tolist()  # list of L actions
    entry['nll_loss'] = nll_loss
    entry['mse_loss'] = mse_loss
    # accumulate
    predictions.append(entry)
    Nav_nll[data['type']] += nll_loss
    Nav_cnt[data['type']] += 1
    acc, cnt = 0, 0
    for pa, ga in zip(entry['pred_acts'], entry['gd_acts']):
      if pa == ga:
        acc += 1
      cnt += 1
      if ga == 3:
        break
    Nav_teacher_forcing_acc[data['type']] += acc
    Nav_teacher_forcing_cnt[data['type']] += cnt
    overall_nll += nll_loss
    overall_mse += mse_loss
    overall_teacher_forcing_acc += acc
    overall_teacher_forcing_cnt += cnt
    # print
    if ix % 10 == 0:
      print('(%s/%s)qid[%s], id[%s], type[%s], nll_loss=%.3f, mse_loss=%.3f' % \
        (ix+1, len(val_dataset), entry['qid'], entry['id'], entry['type'], nll_loss, mse_loss))

  # summarize 
  overall_nll /= len(val_dataset)
  overall_mse /= len(val_dataset)
  overall_teacher_forcing_acc /= overall_teacher_forcing_cnt
  for _type in ['object', 'room']:
    Nav_nll[_type] /= (Nav_cnt[_type]+1e-5)
    Nav_teacher_forcing_acc[_type] /= (Nav_teacher_forcing_cnt[_type]+1e-5)
  
  # return
  return predictions, overall_nll, overall_teacher_forcing_acc, overall_mse, Nav_nll, Nav_teacher_forcing_acc
Esempio n. 41
0
    def load_explicit_dH(self):
        # write down explicit 3 rd derivatives
        # write down explicit 3 rd derivatives
        out = torch.zeros(self.dim, self.dim, self.dim)
        tau = numpy.asscalar(torch.exp(self.beta[self.J + 1]).data.numpy())
        mu = numpy.asscalar(self.beta[self.J].data.numpy())
        theta_tilde = self.beta[:(self.J)].data
        theta = theta_tilde * tau + mu
        sigma = self.sigma.data
        y = self.y.data

        # case 1
        # dH_i i = (0,..,self.J-1]
        #dtheta_tilde dtheta_tilde dtau'
        out[:self.J, :self.J,
            self.J + 1] = torch.diag(-2 * tau * tau / (sigma * sigma))
        # dtheta_tilde dmu dtau'
        out[:self.J, self.J, self.J + 1] = -tau / (sigma * sigma)
        # dtheta_tilde dtau' dtau'
        out[:self.J, self.J + 1, self.J +
            1] = tau * (y - mu - 4 * theta_tilde * tau) / (sigma * sigma)
        # fill in rest
        out[:self.J, self.J + 1, :self.J].copy_(out[:self.J, :self.J,
                                                    self.J + 1])
        out[:self.J, self.J + 1, self.J].copy_(out[:self.J, self.J,
                                                   self.J + 1])

        # case 2
        # dH_i i = self.J , beta_i = mu
        # dmu dtheta_tilde dtau'
        out[self.J, :self.J, self.J + 1] = -tau / (sigma * sigma)
        # dmu dtau' dtau'
        out[self.J, self.J + 1,
            self.J + 1] = -(theta_tilde / (sigma * sigma)).sum() * tau
        # fill in
        out[self.J, self.J + 1, :self.J].copy_(out[self.J, :self.J,
                                                   self.J + 1])

        # case 3
        # dH_i i = self.J+1 , beta_i = tau'

        case_3matrix = torch.zeros(self.dim, self.dim)
        # dtau' dtheta_tilde dtheta_tilde
        case_3matrix[:self.J, :self.J].copy_(
            torch.diag(-2 * tau * tau / (sigma * sigma)))
        # dtau' dtheta_tilde dmu
        case_3matrix[:self.J, self.J] = (-tau) / (sigma * sigma)
        # dtau' dtheta_tilde dtau'
        case_3matrix[:self.J, self.J +
                     1] = tau * (y - mu - 4 * tau * theta_tilde) / (sigma *
                                                                    sigma)
        # dtau' dmu dtau'
        case_3matrix[self.J,
                     self.J + 1] = -(theta_tilde / (sigma * sigma)).sum() * tau
        # dtau'dtau'dtau'
        case_3matrix[self.J + 1, self.J + 1] = 200*tau*tau*(tau*tau-25)/((tau*tau+25)**3) \
                                               + (theta_tilde*tau*(y-mu-4*tau*theta_tilde)/(sigma*sigma)).sum()

        # fill in
        case_3matrix[self.J, :self.J].copy_(case_3matrix[:self.J, self.J])
        case_3matrix[self.J + 1, :self.J].copy_(case_3matrix[:self.J,
                                                             self.J + 1])
        case_3matrix[self.J + 1, self.J] = case_3matrix[self.J, self.J + 1]

        out[self.J + 1, :, :].copy_(case_3matrix)
        out = -out
        return (out)
Esempio n. 42
0
 def forward(self, x):
     val = torch.exp(-torch.pow(x - self.mu, 2) / (2 * self.sigma**2))
     return val
    def train(self, data_dir, epochs, learning_rate):
        image_datasets, dataloaders, class_to_idx  = self.load_data(data_dir)
        criterion = nn.NLLLoss()
        optimizer = optim.Adam(self.model.classifier.parameters(), lr=learning_rate)
        
        # gpu or cpu
        self.model.to(self.device)
        
        # start training
        train_losses = []
        test_losses = []
        for e in range(epochs):
            running_train_loss = 0
            self.model.train()
            for images, labels in dataloaders['train']:
                images, labels = images.to(self.device), labels.to(self.device)

                optimizer.zero_grad()

                # get log probs
                log_ps = self.model.forward(images)

                # get loss
                loss = criterion(log_ps, labels)
                running_train_loss += loss.item()
        #         print(f'running_train_loss: {running_train_loss}')

                # back propagation
                loss.backward()

                # adjust weights
                optimizer.step()

            else:
                self.model.eval()
                running_test_loss = 0
                accuracy = 0
                with torch.no_grad():
                    for images, labels in dataloaders['test']:
                        images, labels = images.to(self.device), labels.to(self.device)

                        # get log probs
                        log_ps = self.model.forward(images)

                        # get loss
                        test_loss = criterion(log_ps, labels)
                        running_test_loss += test_loss.item()
        #                 print(f'running_test_loss: {running_test_loss}')

                        # turn log probs into real probs
                        ps = torch.exp(log_ps)

                        # calc accuracy
                        top_p, top_class = ps.topk(1, dim=1)
                        equals = top_class == labels.view(*top_class.shape)
                        accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

            n_test_batches = len(dataloaders['test'])
            n_train_batches = len(dataloaders['train'])

            epoch_train_loss = running_train_loss / n_train_batches
            epoch_test_loss  = running_test_loss / n_test_batches

            train_losses.append(epoch_train_loss)
            test_losses.append(epoch_test_loss)

            print(f'Epoch: {e+1}/{epochs}',
                  f'Training Loss {epoch_train_loss:{0}.{4}}',
                  f'Validation Loss {epoch_test_loss:{0}.{4}}',
                  f'Accuracy {(accuracy / n_test_batches):{0}.{4}}'
                 )
        
        #return e+1, train_losses, test_losses
        self.final_epoch = e+1
        self.train_losses = train_losses
        self.test_losses = test_losses
        self.class_to_idx = class_to_idx
Esempio n. 44
0
def log_sum_exp(vec):
    max_score = vec[(0, argmax(vec))]
    max_score_broadcast = max_score.view(1, (-1)).expand(1, vec.size()[1])
    return (max_score +
            torch.log(torch.sum(torch.exp((vec - max_score_broadcast)))))
Esempio n. 45
0
 def sample(self, deterministic=False):
     if deterministic:
         return self.mean
     else:
         return Variable(torch.randn(self.mean.size())) * torch.exp(self.log_var) + self.mean
 def sampling(self, mu, logvar):
     std = torch.exp(0.5 * logvar)
     eps = torch.randn_like(std)
     return eps.mul(std).add_(mu)
Esempio n. 47
0
    def ppo_step(self, weights):
        cloned_policy = copy.deepcopy(self.policy)
        for i, weight in enumerate(cloned_policy.parameters()):
            try:
                weight.data.copy_(weights[i])
            except:
                weight.data.copy_(weights[i].data)
        optimizer = optim.Adam(cloned_policy.parameters(),
                               lr=self.ppo_learning_rate)

        for _ in range(self.n_seq):
            # s_t, a_t, b(s_t) = v(s_t), \pi_{\theta_{\text{old}}}(a_t|s_t), R_t(\tau)
            states, actions, rewards, values, logprobs, returns = self.env_function(
                cloned_policy, max_steps=self.max_steps,
                gamma=self.gamma)  #, stochastic=False)
            # \hat{A_t}(\tau) = R_t(\tau) - b(s_t)
            advantages = returns - values
            advantages = (advantages - advantages.mean()) / advantages.std()
            for update in range(self.n_updates):
                sampler = BatchSampler(SubsetRandomSampler(
                    list(range(advantages.shape[0]))),
                                       batch_size=self.batch_size,
                                       drop_last=False)
                for i, index in enumerate(sampler):
                    sampled_states = utils.to_var(states[index])
                    sampled_actions = utils.to_var(actions[index])
                    sampled_logprobs = utils.to_var(logprobs[index])
                    sampled_returns = utils.to_var(returns[index])
                    sampled_advs = utils.to_var(advantages[index])
                    # v(s_t), \pi_\theta(a_t|s_t), H(\pi(a_t, |a_t))
                    new_values, new_logprobs, dist_entropy = cloned_policy.evaluate(
                        sampled_states, sampled_actions)

                    ratio = torch.exp(new_logprobs - sampled_logprobs)
                    # print(ratio.sum())
                    sampled_advs = sampled_advs.view(-1, 1)
                    surrogate1 = ratio * sampled_advs
                    surrogate2 = torch.clamp(ratio, 1 - self.clip,
                                             1 + self.clip) * sampled_advs
                    policy_loss = -torch.min(surrogate1, surrogate2).mean()

                    # # \dfrac{\pi_\theta(a_t|s_t)}{\pi_{\theta_{\text{old}}}(a_t|s_t)}
                    # ratio1 = torch.exp(new_logprobs - sampled_logprobs)
                    # # [\dfrac{\pi_\theta(a_t|s_t)}{\pi_{\theta_{\text{old}}}(a_t|s_t)}]_{\text{clip}}
                    # ratio2 = ratio1.clamp(1-self.clip, 1+self.clip)
                    # # \min\{.,[.]_{\text{clip}}\}
                    # ratio = torch.min(ratio1, ratio2)
                    # # \min\{. \,[.]_{\text{clip}}\}
                    # policy_loss = -sampled_advs.detach() * ratio
                    sampled_returns = sampled_returns.view(-1, 1)
                    new_values = new_values.view(-1, 1)
                    # \frac{1}{2}(v(s_t) - R_t(\tau))^2
                    value_loss = F.mse_loss(new_values, sampled_returns)
                    loss = policy_loss.mean() + value_loss.mean(
                    ) - self.ent_coeff * dist_entropy.mean()
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
        rewards = self.env_function(cloned_policy,
                                    stochastic=False,
                                    render=False,
                                    reward_only=True)
        new_weights = list(cloned_policy.parameters())
        return rewards, new_weights
    def test_ntxent_loss(self):
        temperature = 0.1
        loss_funcA = NTXentLoss(temperature=temperature)
        loss_funcB = NTXentLoss(temperature=temperature, distance=LpDistance())

        for dtype in TEST_DTYPES:
            embedding_angles = [0, 20, 40, 60, 80]
            embeddings = torch.tensor(
                [c_f.angle_to_coord(a) for a in embedding_angles],
                requires_grad=True,
                dtype=dtype,
            ).to(self.device)  # 2D embeddings

            labels = torch.LongTensor([0, 0, 1, 1, 2])

            lossA = loss_funcA(embeddings, labels)
            lossB = loss_funcB(embeddings, labels)

            pos_pairs = [(0, 1), (1, 0), (2, 3), (3, 2)]
            neg_pairs = [
                (0, 2),
                (0, 3),
                (0, 4),
                (1, 2),
                (1, 3),
                (1, 4),
                (2, 0),
                (2, 1),
                (2, 4),
                (3, 0),
                (3, 1),
                (3, 4),
                (4, 0),
                (4, 1),
                (4, 2),
                (4, 3),
            ]

            total_lossA, total_lossB = 0, 0
            for a1, p in pos_pairs:
                anchor, positive = embeddings[a1], embeddings[p]
                numeratorA = torch.exp(
                    torch.matmul(anchor, positive) / temperature)
                numeratorB = torch.exp(
                    -torch.sqrt(torch.sum(
                        (anchor - positive)**2)) / temperature)
                denominatorA = numeratorA.clone()
                denominatorB = numeratorB.clone()
                for a2, n in neg_pairs:
                    if a2 == a1:
                        negative = embeddings[n]
                    else:
                        continue
                    denominatorA += torch.exp(
                        torch.matmul(anchor, negative) / temperature)
                    denominatorB += torch.exp(
                        -torch.sqrt(torch.sum(
                            (anchor - negative)**2)) / temperature)
                curr_lossA = -torch.log(numeratorA / denominatorA)
                curr_lossB = -torch.log(numeratorB / denominatorB)
                total_lossA += curr_lossA
                total_lossB += curr_lossB

            total_lossA /= len(pos_pairs)
            total_lossB /= len(pos_pairs)
            rtol = 1e-2 if dtype == torch.float16 else 1e-5
            self.assertTrue(torch.isclose(lossA, total_lossA, rtol=rtol))
            self.assertTrue(torch.isclose(lossB, total_lossB, rtol=rtol))
Esempio n. 49
0
 def erf_approx(self, x):
     exp = -x * x * (4 / math.pi +
                     self.a_for_erf * x * x) / (1 + self.a_for_erf * x * x)
     return torch.sign(x) * torch.sqrt(1 - torch.exp(exp))
Esempio n. 50
0
 def log_likelihood_ratio(self, x, new_dist):
     ll_new = new_dist.log_likelihood(x)
     ll_old = self.log_likelihood(x)
     return torch.exp(ll_new - ll_old)
Esempio n. 51
0
 def forward(self, input):
     return torch.exp(input)
Esempio n. 52
0
 def reparameterize(mu, logvar):
     std = torch.exp(0.5 * logvar)
     eps = torch.randn_like(std)
     return mu + eps * std
def train_model(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    device_ids=[0,1,2,3]
    batch_size=args.batch_size
    input_channels = 1
    out_channels = [args.out_channels1, args.out_channels2]
    kernel_size_cnn = [[args.kernel_size_cnn1, args.kernel_size_cnn2],[args.kernel_size_cnn2, args.kernel_size_cnn1]]
    stride_size_cnn = [[args.stride_size_cnn1, args.stride_size_cnn2],[args.stride_size_cnn2, args.stride_size_cnn1]]
    kernel_size_pool = [[args.kernel_size_pool1, args.kernel_size_pool2],[args.kernel_size_pool2, args.kernel_size_pool1]]
    stride_size_pool = [[args.stride_size_pool1, args.stride_size_pool2],[args.stride_size_pool2, args.stride_size_pool1]]
    hidden_dim=200
    num_layers=2
    dropout=0
    num_labels=4
    hidden_dim_lstm=200
    epoch_num=50
    num_layers_lstm=2
    nfft=[512,1024]
    weight = args.weight
    model = MultiSpectrogramModel(input_channels,out_channels, kernel_size_cnn, stride_size_cnn, kernel_size_pool,
                                stride_size_pool, hidden_dim,num_layers,dropout,num_labels, batch_size,
                                hidden_dim_lstm,num_layers_lstm,device, nfft, weight, False)

    print("============================ Number of parameters ====================================")
    print(str(sum(p.numel() for p in model.parameters() if p.requires_grad)))

    path="batch_size:{};out_channels:{};kernel_size_cnn:{};stride_size_cnn:{};kernel_size_pool:{};stride_size_pool:{}; weight:{}".format(args.batch_size,out_channels,kernel_size_cnn,stride_size_cnn,kernel_size_pool,stride_size_pool, weight)
    with open("/scratch/speech/models/classification/spec_multi_joint_stats_weight.txt","a+") as f:
        f.write("\n"+"============ model starts ===========")
        f.write("\n"+"model_parameters: "+str(sum(p.numel() for p in model.parameters() if p.requires_grad))+"\n"+path+"\n")
    model.cuda()
    model=DataParallel(model,device_ids=device_ids)
    model.train()

    # Use Adam as the optimizer with learning rate 0.01 to make it fast for testing purposes
    optimizer = optim.Adam(model.parameters(),lr=0.001)
    optimizer2=optim.SGD(model.parameters(), lr=0.1)
    scheduler = ReduceLROnPlateau(optimizer=optimizer,factor=0.5, patience=2, threshold=1e-3)
    #scheduler2=ReduceLROnPlateau(optimizer=optimizer2, factor=0.5, patience=2, threshold=1e-3)
    #scheduler2 =CosineAnnealingLR(optimizer2, T_max=300, eta_min=0.0001)
    scheduler3 =MultiStepLR(optimizer, [5,10,15],gamma=0.1)

    # Load the training data
    training_data = IEMOCAP(name='mel', nfft=nfft, train=True)
    train_loader = DataLoader(dataset=training_data, batch_size=batch_size, shuffle=True, collate_fn=my_collate, num_workers=0, drop_last=True)
    testing_data = IEMOCAP(name='mel', nfft=nfft, train=False)
    test_loader = DataLoader(dataset=testing_data, batch_size=batch_size, shuffle=True, collate_fn=my_collate, num_workers=0,drop_last=True)

    #print("=================")
    #print(len(training_data))
    #print("===================")

    test_acc=[]
    train_acc=[]
    test_loss=[]
    train_loss=[]
    for epoch in range(epoch_num):  # again, normally you would NOT do 300 epochs, it is toy data
        #print("===================================" + str(epoch+1) + "==============================================")
        losses = 0
        correct=0
        model.train()
        for j, (input_lstm, input1, input2, target, seq_length) in enumerate(train_loader):
            #if (j+1)%20==0:
                #print("=================================Train Batch"+ str(j+1)+str(weight)+"===================================================")
            model.zero_grad()
            losses_batch,correct_batch= model(input_lstm, input1, input2, target, seq_length)
            loss = torch.mean(losses_batch,dim=0)
            correct_batch=torch.sum(correct_batch,dim=0)
            losses += loss.item() * batch_size
            loss.backward()
            weight=model.module.state_dict()["weight"]
            weight=torch.exp(10*weight)/(1+torch.exp(10*weight)).item()
            optimizer.step()
            correct += correct_batch.item()
        accuracy=correct*1.0/((j+1)*batch_size)
        losses=losses / ((j+1)*batch_size)
        #scheduler3.step()
        losses_test = 0
        correct_test = 0
        #torch.save(model.module.state_dict(), "/scratch/speech/models/classification/spec_full_joint_checkpoint_epoch_{}.pt".format(epoch+1))
        model.eval()
        with torch.no_grad():
            for j,(input_lstm, input1, input2, target, seq_length) in enumerate(test_loader):
                #if (j+1)%10==0: print("=================================Test Batch"+ str(j+1)+ "===================================================")
                #input_lstm = pad_sequence(sequences=input_lstm,batch_first=True)
                losses_batch,correct_batch= model(input_lstm,input1, input2, target, seq_length)
                loss = torch.mean(losses_batch,dim=0)
                correct_batch=torch.sum(correct_batch,dim=0)
                losses_test += loss.item() * batch_size
                correct_test += correct_batch.item()

        #print("how many correct:", correct_test)
        accuracy_test = correct_test * 1.0 / ((j+1)*batch_size)
        losses_test = losses_test / ((j+1)*batch_size)

        # data gathering
        test_acc.append(accuracy_test)
        train_acc.append(accuracy)
        test_loss.append(losses_test)
        train_loss.append(losses)
        print("Epoch: {}-----------Training Loss: {} -------- Testing Loss: {} -------- Training Acc: {} -------- Testing Acc: {}".format(epoch+1,losses,losses_test, accuracy, accuracy_test)+"\n")
        with open("/scratch/speech/models/classification/spec_multi_joint_stats_weight.txt","a+") as f:
            #f.write("Epoch: {}-----------Training Loss: {} -------- Testing Loss: {} -------- Training Acc: {} -------- Testing Acc: {}".format(epoch+1,losses,losses_test, accuracy, accuracy_test)+"\n")
            if epoch==epoch_num-1:
                f.write("Best Accuracy:{:06.5f}".format(max(test_acc))+"\n")
                f.write("Average Top 10 Accuracy:{:06.5f}".format(np.mean(np.sort(np.array(test_acc))[-10:]))+"\n")
                f.write("=============== model ends ==================="+"\n")
    print("success:{}, Best Accuracy:{}".format(path,max(test_acc)))
Esempio n. 54
0
def sigmoid(z):
    return 1 / (1 + torch.exp(-z))
def k2(kesi,f_x,f_y,mean_logk,lamda):
    logk=mean_logk+torch.sum(torch.sqrt(lamda)*f_x*f_y*kesi,1)
    kk=torch.exp(logk)
    return kk
Esempio n. 56
0
 def reparameterize(self, mu, logvar):  #done
     std = torch.exp(0.5*logvar)
     u = torch.randn_like(std)
     return mu + u*std
Esempio n. 57
0
def evaluate_meta_parameters(model, test_loader, args, train=False, name=None, n_recons = 8, n_steps = 100):
    print('  - Evaluate meta parameters.')
    latent_dims = model.ae_model.latent_dims
    fig = plt.figure(figsize=(10, 20))
    outer = gridspec.GridSpec(latent_dims + 1, 3, wspace=0.2, hspace=0.4)
    # Select 5 random points from the test set
    fixed_data, fixed_params, fixed_meta, fixed_audio = next(iter(test_loader))
    in_data = fixed_data[np.random.randint(0, fixed_data.shape[0], size=(32))].to(args.device)
    # Find corresponding params
    _, in_data, _ = model.ae_model(in_data)
    if (args.semantic_dim > -1):
        in_data, _ = model.disentangling(in_data)
    z_var = 0
    for l in range(latent_dims):
        var_z = torch.linspace(-4, 4, n_steps)
        fake_batch = torch.zeros(n_steps, latent_dims)
        fake_batch[:, l] = var_z
        fake_batch = fake_batch.to(args.device)
        # Generate VAE outputs
        x_tilde_full = model.ae_model.decode(fake_batch)
        # Perform regression
        out = model.regression_model(fake_batch)
        if (args.loss in ['multinomial']):
            tmp = out.view(out.shape[0], -1, latent_dims).max(dim=1)[1]
            out = tmp.float() / (args.n_classes - 1.)
        if (args.loss in ['multi_mse']):
            out = out.view(out.shape[0], -1, latent_dims)
            out = out[:, -1, :]
        # Select parameters
        var_param = out.std(dim=0)
        idx = torch.argsort(var_param, descending=True)
        # To keep coloring consistent we blank out all parameters above 5 most varying
        out[:, idx[5:]] = torch.zeros(out.shape[0], len(idx[5:])).to(out.device)
        ax = plt.Subplot(fig, outer[l*3])
        ax.plot(out.detach().cpu().numpy())
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if (hasattr(args, 'z_vars')):
            z_var = args.z_vars[l].item()
        ax.set_title('$z_{' + str(l) + '}$ - %.2f - %.3f'%((z_var), (var_param[idx[:5]].mean().item())))
        fig.add_subplot(ax)
        # Reconstruct a handful of points
        fake_batch = torch.zeros(n_recons, latent_dims)
        fake_batch[:, l] = torch.linspace(-4, 4, n_recons)
        fake_batch = fake_batch.to(args.device)
        # Reconstruct with the VAE
        x_tilde = model.ae_model.decode(fake_batch)
        # Reconstruct with the synth engine
        if (args.synthesize == True and train == False and ((var_param[idx[:5]].mean().item() > 0.15) or ((args.semantic_dim > -1) and (l == 0)))):
            out_batch = model.regression_model(fake_batch)
            if (args.loss in ['multinomial']):
                tmp = out_batch.view(out_batch.shape[0], -1, args.latent_dims).max(dim=1)[1]
                out_batch = tmp.float() / (args.n_classes - 1.)
            if (args.loss in ['multi_mse']):
                out_batch = out_batch.view(out_batch.shape[0], -1, args.latent_dims)
                out_batch = out_batch[:, -1, :]
            print('      - Generate audio for latent ' + str(l))
            from synth.synthesize import synthesize_batch
            audio = synthesize_batch(out_batch.cpu(), test_loader.dataset.final_params, args.engine, args.generator, args.param_defaults, args.rev_idx, orig_wave=None, name=None)
            save_batch_audio(audio, args.base_audio + '_meta_parameters_z' + str(l) + '_v' + str(var_param[idx[:5]].mean().item()))
            # Now check how this parameter act on various sounds
            n_ins = ((args.semantic_dim > -1) and (l == 0)) and 32 or 4
            for s in range(n_ins):
                print('          - Generate audio for meta-modified ' + str(s))
                tmp_data = in_data[s].clone().unsqueeze(0).repeat(n_recons, 1)
                tmp_data[:, l] = torch.linspace(-4, 4, n_recons)
                tmp_data = model.regression_model(tmp_data)
                if (args.loss in ['multinomial']):
                    tmp = tmp_data.view(tmp_data.shape[0], -1, args.latent_dims).max(dim=1)[1]
                    tmp_data = tmp.float() / (args.n_classes - 1.)
                if (args.loss in ['multi_mse']):
                    tmp_data = tmp_data.view(tmp_data.shape[0], -1, args.latent_dims)
                    tmp_data = tmp_data[:, -1, :]
                # Synthesize meta-modified test example :)                
                audio = synthesize_batch(tmp_data.cpu(), test_loader.dataset.final_params, args.engine, args.generator, args.param_defaults, args.rev_idx, orig_wave=None, name=None)
                save_batch_audio(audio, args.base_audio + '_meta_parameters_z' + str(l) + '_b' + str(s))
        if len(x_tilde.shape) > 3:
            x_tilde = x_tilde[:,0]
        inner = gridspec.GridSpecFromSubplotSpec(1, 8,
            subplot_spec=outer[l*3+1], wspace=0.1, hspace=0.1)
        for n in range(n_recons):
            ax = plt.Subplot(fig, inner[n])
            ax.imshow(x_tilde[n].detach().cpu().numpy(), aspect='auto')
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)
            fig.add_subplot(ax)
        # Unscale and un-log output
        x_tilde_full = (x_tilde_full * test_loader.dataset.vars["mel"]) + test_loader.dataset.means["mel"]
        if (args.data in ['mel',"mel_mfcc"]):
            x_tilde_full = torch.exp(x_tilde_full)
        x_tilde_full = x_tilde_full[:,0]
        # Compute descriptors
        descs = compute_descriptors(x_tilde_full.detach().cpu().numpy())
        ax = plt.Subplot(fig, outer[l*3+2])
        ax.plot(descs)
        fig.add_subplot(ax)
    # Just fake plots for legends
    fake = torch.linspace(1, len(idx), len(idx)).repeat(out.shape[0], 1)
    ax = plt.Subplot(fig, outer[latent_dims*3])
    ax.plot(fake.numpy())
    ax.legend(test_loader.dataset.final_params)
    fig.add_subplot(ax)
    fake = torch.linspace(1, len(descriptors), len(descriptors)).repeat(out.shape[0], 1)
    ax = plt.Subplot(fig, outer[latent_dims*3+2])
    ax.plot(fake.numpy())
    ax.legend(descriptors)
    fig.add_subplot(ax)
    # Just generate a legend for kicks
    if (name is not None):
        plt.savefig(name + '_meta_parameters.pdf')
        plt.close()
    if (train == False and name is None):
        plt.savefig(args.base_img + '_meta_parameters.pdf')
        plt.close()
Esempio n. 58
0
        running_loss += loss.item()

        if steps % print_every == 0:
            test_loss = 0
            accuracy = 0
            model.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)

                    test_loss += batch_loss.item()

                    # Calculate accuracy
                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(
                        torch.FloatTensor)).item()

            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {running_loss/print_every:.3f}.. "
                  f"Test loss: {test_loss/len(testloader):.3f}.. "
                  f"Test accuracy: {accuracy/len(testloader)}")
            running_loss = 0
            model.train()

PATH = Path("./model_q1.pth")

torch.save(model, PATH)
Esempio n. 59
0
def run_SAM(in_data,
            skeleton=None,
            is_mixed=False,
            device="cpu",
            train=10000,
            test=1,
            batch_size=-1,
            lr_gen=.001,
            lr_disc=.01,
            lambda1=0.001,
            lambda2=0.0000001,
            nh=None,
            dnh=None,
            verbose=True,
            losstype="fgan",
            functionalComplexity="n_hidden_units",
            sampletype="sigmoidproba",
            dagstart=0,
            dagloss=False,
            dagpenalization=0.05,
            dagpenalization_increase=0.0,
            categorical_threshold=50,
            linear=False,
            numberHiddenLayersG=2,
            numberHiddenLayersD=2,
            idx=0):

    list_nodes = list(in_data.columns)
    if is_mixed:
        onehotdata = []
        for i in range(len(list_nodes)):
            # print(pd.get_dummies(in_data.iloc[:, i]).values.shape[1])
            if pd.get_dummies(
                    in_data.iloc[:,
                                 i]).values.shape[1] < categorical_threshold:
                onehotdata.append(pd.get_dummies(in_data.iloc[:, i]).values)
            else:
                onehotdata.append(scale(in_data.iloc[:, [i]].values))
        cat_sizes = [i.shape[1] for i in onehotdata]

        data = np.concatenate(onehotdata, 1)
    else:
        data = scale(in_data[list_nodes].values)
        cat_sizes = None

    nb_var = len(list_nodes)
    data = data.astype('float32')
    data = th.from_numpy(data).to(device)
    if batch_size == -1:
        batch_size = data.shape[0]

    lambda1 = lambda1 / data.shape[0]
    lambda2 = lambda2 / data.shape[0]

    rows, cols = data.size()
    # Get the list of indexes to ignore
    if skeleton is not None:
        skeleton = th.from_numpy(skeleton.astype('float32'))

    sam = SAM_generators((batch_size, cols),
                         nh,
                         skeleton=skeleton,
                         cat_sizes=cat_sizes,
                         linear=linear,
                         numberHiddenLayersG=numberHiddenLayersG).to(device)

    sam.reset_parameters()
    g_optimizer = th.optim.Adam(list(sam.parameters()), lr=lr_gen)

    if losstype != "mse":
        discriminator = SAM_discriminator(
            cols,
            dnh,
            numberHiddenLayersD,
            mask=sam.categorical_matrix,
        ).to(device)
        discriminator.reset_parameters()
        d_optimizer = th.optim.Adam(discriminator.parameters(), lr=lr_disc)
        criterion = th.nn.BCEWithLogitsLoss()
    else:
        criterion = th.nn.MSELoss()
        disc_loss = th.zeros(1)

    if sampletype == "sigmoid":
        graph_sampler = SimpleMatrixConnection(len(list_nodes),
                                               mask=skeleton).to(device)
    elif sampletype == "sigmoidproba":
        graph_sampler = MatrixSampler(len(list_nodes),
                                      mask=skeleton,
                                      gumble=False).to(device)
    elif sampletype == "gumbleproba":
        graph_sampler = MatrixSampler(len(list_nodes),
                                      mask=skeleton,
                                      gumble=True).to(device)
    else:
        raise ValueError('Unknown Graph sampler')

    graph_sampler.weights.data.fill_(2)

    graph_optimizer = th.optim.Adam(graph_sampler.parameters(), lr=lr_gen)

    if not linear and functionalComplexity == "n_hidden_units":
        neuron_sampler = MatrixSampler((nh, len(list_nodes)),
                                       mask=False,
                                       gumble=True).to(device)
        neuron_optimizer = th.optim.Adam(list(neuron_sampler.parameters()),
                                         lr=lr_gen)

    _true = th.ones(1).to(device)
    _false = th.zeros(1).to(device)
    output = th.zeros(len(list_nodes), len(list_nodes)).to(device)

    data_iterator = DataLoader(data,
                               batch_size=batch_size,
                               shuffle=True,
                               drop_last=True)

    # RUN
    if verbose:
        pbar = tqdm(range(train + test))
    else:
        pbar = range(train + test)
    for epoch in pbar:
        for i_batch, batch in enumerate(data_iterator):

            if losstype != "mse":
                d_optimizer.zero_grad()

            # Train the discriminator

            drawn_graph = graph_sampler()

            if not linear and functionalComplexity == "n_hidden_units":
                drawn_neurons = neuron_sampler()

            if linear or functionalComplexity != "n_hidden_units":
                generated_variables = sam(batch, drawn_graph)
            else:
                generated_variables = sam(batch, drawn_graph, drawn_neurons)

            if losstype != "mse":
                disc_vars_d = discriminator(generated_variables.detach(),
                                            batch)
                true_vars_disc = discriminator(batch)

                if losstype == "gan":
                    disc_loss = sum([criterion(gen, _false.expand_as(gen)) for gen in disc_vars_d]) / nb_var \
                                     + criterion(true_vars_disc, _true.expand_as(true_vars_disc))
                    # Gen Losses per generator: multiply py the number of channels
                elif losstype == "fgan":

                    disc_loss = th.mean(th.exp(disc_vars_d - 1), [0, 2]).sum(
                    ) / nb_var - th.mean(true_vars_disc)

                disc_loss.backward()
                d_optimizer.step()

            ### OPTIMIZING THE GENERATORS
            g_optimizer.zero_grad()
            graph_optimizer.zero_grad()

            if not linear and functionalComplexity == "n_hidden_units":
                neuron_optimizer.zero_grad()

            if losstype == "mse":
                gen_loss = criterion(generated_variables, batch)
            else:
                disc_vars_g = discriminator(generated_variables, batch)

                if losstype == "gan":
                    # Gen Losses per generator: multiply py the number of channels
                    gen_loss = sum([
                        criterion(gen, _true.expand_as(gen))
                        for gen in disc_vars_g
                    ])
                elif losstype == "fgan":
                    gen_loss = -th.mean(th.exp(disc_vars_g - 1), [0, 2]).sum()

            filters = graph_sampler.get_proba()
            struc_loss = lambda1 * drawn_graph.sum()

            if linear:
                func_loss = 0
            else:
                if functionalComplexity == "n_hidden_units":
                    func_loss = lambda2 * drawn_neurons.sum()

                elif functionalComplexity == "l2_norm":
                    l2_reg = th.Tensor([0.]).to(device)
                    for param in sam.parameters():
                        l2_reg += th.norm(param)

                    func_loss = lambda2 * l2_reg

            regul_loss = struc_loss + func_loss

            # Optional: prune edges and sam parameters before dag search

            if dagloss and epoch > train * dagstart:
                dag_constraint = notears_constr(filters * filters)
                #dag_constraint = notears_constr(drawn_graph)

                loss = gen_loss + regul_loss + (
                    dagpenalization + (epoch - train * dagstart) *
                    dagpenalization_increase) * dag_constraint
            else:
                loss = gen_loss + regul_loss
            if verbose and epoch % 20 == 0 and i_batch == 0:
                pbar.set_postfix(gen=gen_loss.item() / cols,
                                 disc=disc_loss.item(),
                                 regul_loss=regul_loss.item(),
                                 tot=loss.item())

            if epoch < train + test - 1:
                loss.backward()

            if epoch >= train:
                output.add_(filters.data)

            g_optimizer.step()
            graph_optimizer.step()
            if not linear and functionalComplexity == "n_hidden_units":
                neuron_optimizer.step()

    return output.div_(test).cpu().numpy()
Esempio n. 60
0
def image_histogram2d(
    image: torch.Tensor,
    min: float = 0.0,
    max: float = 255.0,
    n_bins: int = 256,
    bandwidth: Optional[float] = None,
    centers: Optional[torch.Tensor] = None,
    return_pdf: bool = False,
    kernel: str = "triangular",
    eps: float = 1e-10,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Estimate the histogram of the input image(s).

    The calculation uses triangular kernel density estimation.

    Args:
        image: Input tensor to compute the histogram with shape
          :math:`(H, W)`, :math:`(C, H, W)` or :math:`(B, C, H, W)`.
        min: Lower end of the interval (inclusive).
        max: Upper end of the interval (inclusive). Ignored when
          :attr:`centers` is specified.
        n_bins: The number of histogram bins. Ignored when
          :attr:`centers` is specified.
        bandwidth: Smoothing factor. If not specified or equal to -1,
          :math:`(bandwidth = (max - min) / n_bins)`.
        centers: Centers of the bins with shape :math:`(n_bins,)`.
          If not specified or empty, it is calculated as centers of
          equal width bins of [min, max] range.
        return_pdf: If True, also return probability densities for
          each bin.
        kernel: kernel to perform kernel density estimation
          ``(`triangular`, `gaussian`, `uniform`, `epanechnikov`)``.

    Returns:
        Computed histogram of shape :math:`(bins)`, :math:`(C, bins)`,
          :math:`(B, C, bins)`.
        Computed probability densities of shape :math:`(bins)`, :math:`(C, bins)`,
          :math:`(B, C, bins)`, if return_pdf is ``True``. Tensor of zeros with shape
          of the histogram otherwise.
    """
    if image is not None and not isinstance(image, torch.Tensor):
        raise TypeError(
            f"Input image type is not a torch.Tensor. Got {type(image)}.")

    if centers is not None and not isinstance(centers, torch.Tensor):
        raise TypeError(
            f"Bins' centers type is not a torch.Tensor. Got {type(centers)}.")

    if centers is not None and len(centers.shape) > 0 and centers.dim() != 1:
        raise ValueError(
            f"Bins' centers must be a torch.Tensor of the shape (n_bins,). Got {centers.shape}."
        )

    if not isinstance(min, float):
        raise TypeError(
            f'Type of lower end of the range is not a float. Got {type(min)}.')

    if not isinstance(max, float):
        raise TypeError(
            f"Type of upper end of the range is not a float. Got {type(min)}.")

    if not isinstance(n_bins, int):
        raise TypeError(
            f"Type of number of bins is not an int. Got {type(n_bins)}.")

    if bandwidth is not None and not isinstance(bandwidth, float):
        raise TypeError(
            f"Bandwidth type is not a float. Got {type(bandwidth)}.")

    if not isinstance(return_pdf, bool):
        raise TypeError(
            f"Return_pdf type is not a bool. Got {type(return_pdf)}.")

    if bandwidth is None:
        bandwidth = (max - min) / n_bins
    if centers is None:
        centers = min + bandwidth * (torch.arange(
            n_bins, device=image.device, dtype=image.dtype).float() + 0.5)
    centers = centers.reshape(-1, 1, 1, 1, 1)
    u = torch.abs(image.unsqueeze(0) - centers) / bandwidth
    if kernel == "triangular":
        mask = (u <= 1).to(u.dtype)
        kernel_values = (1 - u) * mask
    elif kernel == "gaussian":
        kernel_values = torch.exp(-0.5 * u**2)
    elif kernel == "uniform":
        mask = (u <= 1).to(u.dtype)
        kernel_values = torch.ones_like(u, dtype=u.dtype,
                                        device=u.device) * mask
    elif kernel == "epanechnikov":
        mask = (u <= 1).to(u.dtype)
        kernel_values = (1 - u**2) * mask
    else:
        raise ValueError(f"Kernel must be 'triangular', 'gaussian', "
                         f"'uniform' or 'epanechnikov'. Got {kernel}.")

    hist = torch.sum(kernel_values, dim=(-2, -1)).permute(1, 2, 0)
    if return_pdf:
        normalization = torch.sum(hist, dim=-1, keepdim=True) + eps
        pdf = hist / normalization
        if image.dim() == 2:
            hist = hist.squeeze()
            pdf = pdf.squeeze()
        elif image.dim() == 3:
            hist = hist.squeeze(0)
            pdf = pdf.squeeze(0)
        return hist, pdf

    if image.dim() == 2:
        hist = hist.squeeze()
    elif image.dim() == 3:
        hist = hist.squeeze(0)
    return hist, torch.zeros_like(hist, dtype=hist.dtype, device=hist.device)