Example #1
0
    def forward(self, frame, policy):
        # x: [B,2,84,84]
        self.B = frame.size()[0]
        # policy = policies[0]

        # print (frame.size())
        # fds

        # print (frame.size())

        #Predict mask
        mask = self.predict_mask(frame)  #[B,2,210,160]
        # print (mask.size())
        

        mask = mask.repeat(1,3,1,1)
        # print (mask.size())
        # fsad
        #frame: [B,6,210,160]
        masked_frame = frame * mask


        log_dist_mask = policy.action_logdist(masked_frame)
        log_dist_true = policy.action_logdist(frame)

        action_dist_kl = torch.sum((log_dist_true - log_dist_mask)*torch.exp(log_dist_true), dim=1) #[B]
        action_dist_kl = torch.mean(action_dist_kl) # * 1000

        mask = mask.view(self.B, -1)
        mask_sum = torch.mean(torch.sum(mask, dim=1)) * .000001

        loss = action_dist_kl + mask_sum

        return loss, action_dist_kl, mask_sum
    def forward(self, frame, DQNs):
        # x: [B,2,84,84]
        self.B = frame.size()[0]

        blurred_frame = self.blur_frame(frame)

        #Predict mask
        blur_weighting = self.predict_precision(frame)  #[B,1,480,640]
        blur_weighting = blur_weighting.repeat(1,3,1,1)

        mixed_frame = frame * blur_weighting + (1.-blur_weighting)*blurred_frame


        difs= []
        for i in range(len(DQNs)):
            q_mask = DQNs[i](mixed_frame)
            q_real = DQNs[i](frame)

            dif = torch.mean((q_mask-q_real)**2)  #[B,A]
            difs.append(dif)

        difs = torch.stack(difs)
        dif = torch.mean(difs)


        blur_weighting = blur_weighting.view(self.B, -1)
        mask_sum = torch.mean(torch.sum(blur_weighting, dim=1)) * .0000001

        loss = dif + mask_sum

        return loss, dif, mask_sum
Example #3
0
    def calculate_loss(self, x, beta=1., average=False):
        '''
        :param x: input image(s)
        :param beta: a hyperparam for warmup
        :param average: whether to average loss or not
        :return: value of a loss function
        '''
        # pass through VAE
        x_mean, x_logvar, z_q, z_q_mean, z_q_logvar = self.forward(x)

        # RE
        if self.args.input_type == 'binary':
            RE = log_Bernoulli(x, x_mean, dim=1)
        elif self.args.input_type == 'gray' or self.args.input_type == 'continuous':
            RE = -log_Logistic_256(x, x_mean, x_logvar, dim=1)
        else:
            raise Exception('Wrong input type!')

        # KL
        log_p_z = self.log_p_z(z_q)
        log_q_z = log_Normal_diag(z_q, z_q_mean, z_q_logvar, dim=1)
        KL = -(log_p_z - log_q_z)

        loss = - RE + beta * KL

        if average:
            loss = torch.mean(loss)
            RE = torch.mean(RE)
            KL = torch.mean(KL)

        return loss, RE, KL
def prepare_model():
    since = time.time()

    num_epochs = 1
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train']:

            mean = torch.zeros(3)
            std = torch.zeros(3)
            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data
                now_batch_size,c,h,w = inputs.shape
                mean += torch.sum(torch.mean(torch.mean(inputs,dim=3),dim=2),dim=0)
                std += torch.sum(torch.std(inputs.view(now_batch_size,c,h*w),dim=2),dim=0)
                
            print(mean/dataset_sizes['train'])
            print(std/dataset_sizes['train'])

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    return 
Example #5
0
def evaluate(model):
    model.eval()
    running_loss = [0., 0., 0.]
    epoch_loss = 0.
    display_step = 100
    for batch_idx, (x, c) in enumerate(test_loader):
        x, c = x.to(device), c.to(device)
        log_p, logdet = model(x, c)
        log_p, logdet = torch.mean(log_p), torch.mean(logdet)
        loss = -(log_p + logdet)

        running_loss[0] += loss.item() / display_step
        running_loss[1] += log_p.item() / display_step
        running_loss[2] += logdet.item() / display_step
        epoch_loss += loss.item()

        if (batch_idx + 1) % 100 == 0:
            print('Global Step : {}, [{}, {}] [Log pdf, Log p(z), Log Det] : {}'
                  .format(global_step, epoch, batch_idx + 1, np.array(running_loss)))
            running_loss = [0., 0., 0.]
        del x, c, log_p, logdet, loss
    del running_loss
    epoch_loss /= len(test_loader)
    print('Evaluation Loss : {:.4f}'.format(epoch_loss))
    return epoch_loss
    def forward(self, frame, DQNs):
        # x: [B,2,84,84]
        self.B = frame.size()[0]

        #Predict mask
        mask = self.predict_mask(frame)  #[B,2,210,160]
        # print (mask.size())
        
        mask = mask.repeat(1,3,1,1)
        #frame: [B,6,210,160]
        masked_frame = frame * mask

        difs= []
        for i in range(len(DQNs)):
            q_mask = DQNs[i](masked_frame)
            q_real = DQNs[i](frame)
            dif = torch.mean((q_mask-q_real)**2)  #[B,A]
            difs.append(dif)

        difs = torch.stack(difs)
        dif = torch.mean(difs)


        mask = mask.view(self.B, -1)
        mask_sum = torch.mean(torch.sum(mask, dim=1)) * .0000001

        loss = dif + mask_sum

        return loss, dif, mask_sum
    def predictive_elbo(self, x, k, s):
        # No pW or qW

        self.B = x.size()[0] #batch size
        # self.k = k  #number of z samples aka particles P
        # self.s = s  #number of W samples

        elbo1s = []
        for i in range(s):

            Ws, logpW, logqW = self.sample_W()  #_ , [1], [1]

            mu, logvar = self.encode(x)  #[B,Z]
            z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B]

            x_hat = self.decode(Ws, z) #[P,B,X]
            logpx = log_bernoulli(x_hat, x)  #[P,B]

            elbo = logpx + logpz - logqz #[P,B]
            if k>1:
                max_ = torch.max(elbo, 0)[0] #[B]
                elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B]
            # elbo1 = elbo1 #+ (logpW - logqW)*.00000001 #[B], logp(x|W)p(w)/q(w)
            elbo1s.append(elbo)

        elbo1s = torch.stack(elbo1s) #[S,B]
        if s>1:
            max_ = torch.max(elbo1s, 0)[0] #[B]
            elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B]            

        elbo = torch.mean(elbo1s) #[1]
        return elbo#, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
Example #8
0
    def forward(self, z_seq, a_seq, term_seq):
        # x: [B,2,84,84]
        # T = x.size()[0]

        h = torch.zeros(1,self.h_size).cuda()
        z_losses = []
        term_losses = []
        for t in range(len(term_seq)-1):

            inter = self.encode_az(a_seq[t], z_seq[t])
            h = self.update_h(h, inter)
            z_pred, term_pred = self.predict_output(h, inter)

            z_loss = torch.mean((z_seq[t+1] - z_pred)**2)
            term_loss = F.binary_cross_entropy_with_logits(input=term_pred, target=term_seq[t+1])

            z_losses.append(z_loss)
            term_losses.append(term_loss)

        z_loss = torch.mean(torch.stack(z_losses))
        term_loss = torch.mean(torch.stack(term_losses)) 

        loss = z_loss + term_loss 

        return loss, z_loss, term_loss
Example #9
0
    def forward(self, x, k=1):
        
        self.B = x.size()[0]
        mu, logvar = self.encode(x)
        z, logpz, logqz = self.sample(mu, logvar, k=k)  #[P,B,Z]
        x_hat = self.decode(z)  #[PB,X]
        x_hat = x_hat.view(k, self.B, -1)
        # print x_hat.size()
        # print x_hat.size()
        # print x.size()
        logpx = log_bernoulli(x_hat, x)  #[P,B]

        elbo = logpx + logpz - logqz  #[P,B]

        if k>1:
            max_ = torch.max(elbo, 0)[0] #[B]
            elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B]

        elbo = torch.mean(elbo) #[1]

        #for printing
        logpx = torch.mean(logpx)
        logpz = torch.mean(logpz)
        logqz = torch.mean(logqz)
        self.x_hat_sigmoid = F.sigmoid(x_hat)

        return elbo, logpx, logpz, logqz
def get_paf_and_heatmap(model, img_raw, scale_search, param_stride=8, box_size=368):
    multiplier = [scale * box_size / img_raw.shape[0] for scale in scale_search]

    heatmap_avg = torch.zeros((len(multiplier), 19, img_raw.shape[0], img_raw.shape[1])).cuda()
    paf_avg = torch.zeros((len(multiplier), 38, img_raw.shape[0], img_raw.shape[1])).cuda()

    for i, scale in enumerate(multiplier):
        img_test = cv2.resize(img_raw, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
        img_test_pad, pad = pad_right_down_corner(img_test, param_stride, param_stride)
        img_test_pad = np.transpose(np.float32(img_test_pad[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5

        feed = Variable(torch.from_numpy(img_test_pad)).cuda()
        output1, output2 = model(feed)

        print(output1.size())
        print(output2.size())

        heatmap = nn.UpsamplingBilinear2d((img_raw.shape[0], img_raw.shape[1])).cuda()(output2)

        paf = nn.UpsamplingBilinear2d((img_raw.shape[0], img_raw.shape[1])).cuda()(output1)

        heatmap_avg[i] = heatmap[0].data
        paf_avg[i] = paf[0].data

    heatmap_avg = torch.transpose(torch.transpose(torch.squeeze(torch.mean(heatmap_avg, 0)), 0, 1), 1, 2).cuda()
    heatmap_avg = heatmap_avg.cpu().numpy()

    paf_avg = torch.transpose(torch.transpose(torch.squeeze(torch.mean(paf_avg, 0)), 0, 1), 1, 2).cuda()
    paf_avg = paf_avg.cpu().numpy()

    return paf_avg, heatmap_avg
Example #11
0
    def forward(self, x, k):

        self.B = x.size()[0] #batch size

        #Encode
        mu, logvar = self.encode(x)  #[B,Z]
        z, logpz, logqz = self.sample(mu, logvar, k=k) #[P,B,Z], [P,B]

        #Decode
        x_hat = self.decode(z) #[P,B,X]
        logpx = log_bernoulli(x_hat, x)  #[P,B]

        #Compute elbo
        elbo = logpx + logpz - logqz #[P,B]
        if k>1:
            max_ = torch.max(elbo, 0)[0] #[B]
            elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B]
            
        elbo = torch.mean(elbo) #[1]
        logpx = torch.mean(logpx)
        logpz = torch.mean(logpz)
        logqz = torch.mean(logqz)


        return elbo, logpx, logpz, logqz
    def train_step(self, state_batch, mcts_probs, winner_batch, lr):
        """perform a training step"""
        # wrap in Variable
        if self.use_gpu:
            state_batch = Variable(torch.FloatTensor(state_batch).cuda())
            mcts_probs = Variable(torch.FloatTensor(mcts_probs).cuda())
            winner_batch = Variable(torch.FloatTensor(winner_batch).cuda())
        else:
            state_batch = Variable(torch.FloatTensor(state_batch))
            mcts_probs = Variable(torch.FloatTensor(mcts_probs))
            winner_batch = Variable(torch.FloatTensor(winner_batch))

        # zero the parameter gradients
        self.optimizer.zero_grad()
        # set learning rate
        set_learning_rate(self.optimizer, lr)

        # forward
        log_act_probs, value = self.policy_value_net(state_batch)
        # define the loss = (z - v)^2 - pi^T * log(p) + c||theta||^2
        # Note: the L2 penalty is incorporated in optimizer
        value_loss = F.mse_loss(value.view(-1), winner_batch)
        policy_loss = -torch.mean(torch.sum(mcts_probs*log_act_probs, 1))
        loss = value_loss + policy_loss
        # backward and optimize
        loss.backward()
        self.optimizer.step()
        # calc policy entropy, for monitoring only
        entropy = -torch.mean(
                torch.sum(torch.exp(log_act_probs) * log_act_probs, 1)
                )
        return loss.data[0], entropy.data[0]
Example #13
0
    def forward(self, x, k=1):
        self.k = k
        self.B = x.size()[0]
        mu, logvar = self.encode(x)
        z, logpz, logqz = self.sample(mu, logvar, k=k)
        x_hat, logpW, logqW = self.decode(z)

        logpx = log_bernoulli(x_hat, x)  #[P,B]


        elbo = logpx + logpz - logqz + (logpW - logqW)*.00000001  #[P,B]

        if k>1:
            max_ = torch.max(elbo, 0)[0] #[B]
            elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B]

        elbo = torch.mean(elbo) #[1]

        #for printing
        logpx = torch.mean(logpx)
        logpz = torch.mean(logpz)
        logqz = torch.mean(logqz)
        self.x_hat_sigmoid = F.sigmoid(x_hat)

        return elbo, logpx, logpz, logqz, logpW, logqW
Example #14
0
    def forward(self, x, k=1):
        
        self.B = x.size()[0]
        mu, logvar = self.encode(x)
        z, logpz, logqz = self.sample(mu, logvar, k=k)
        # x_hat = self.decode(z)
        x_mean, x_logvar = self.decode(z)  #[P,B,1]

        # logpx = log_bernoulli(x_hat, x)  #[P,B]
        logpx = lognormal_decoder(x, x_mean, x_logvar)  #[P,B]



        # elbo = logpx + .00000001*logpz - logqz  #[P,B]
        elbo = logpx + logpz - logqz  #[P,B]


        if k>1:
            max_ = torch.max(elbo, 0)[0] #[B]
            elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B]

        elbo = torch.mean(elbo) #[1]

        #for printing
        logpx = torch.mean(logpx)
        logpz = torch.mean(logpz)
        logqz = torch.mean(logqz)
        # self.x_hat_sigmoid = F.sigmoid(x_hat)

        return elbo, logpx, logpz, logqz
Example #15
0
def singleTagLoss(pred_tag, keypoints):
    """
    associative embedding loss for one image
    """
    eps = 1e-6
    tags = []
    pull = 0
    for i in keypoints:
        tmp = []
        for j in i:
            if j[1]>0:
                tmp.append(pred_tag[j[0]])
        if len(tmp) == 0:
            continue
        tmp = torch.stack(tmp)
        tags.append(torch.mean(tmp, dim=0))
        pull = pull +  torch.mean((tmp - tags[-1].expand_as(tmp))**2)

    if len(tags) == 0:
        return make_input(torch.zeros([1]).float()), make_input(torch.zeros([1]).float())

    tags = torch.stack(tags)[:,0]

    num = tags.size()[0]
    size = (num, num, tags.size()[1])
    A = tags.unsqueeze(dim=1).expand(*size)
    B = A.permute(1, 0, 2)

    diff = A - B
    diff = torch.pow(diff, 2).sum(dim=2)[:,:,0]
    push = torch.exp(-diff)
    push = (torch.sum(push) - num)
    return push/((num - 1) * num + eps) * 0.5, pull/(num + eps)
Example #16
0
def global_pooling(x):
	# input x [n, c, h, w]
	# output l [n, c]
	s = torch.mean(x, dim=-1)
	s = torch.mean(s, dim=-1)

	return s
Example #17
0
    def angle_length_loss(y_pred, y_true, weights):
        y_true = y_true.permute(0, 2, 3, 1)
        y_pred = y_pred.permute(0, 2, 3, 1)
        weights = weights.permute(0, 2, 3, 1)

        # Single threshold

        # score_per_bundle = {}
        # bundles = ExpUtils.get_bundle_names(HP.CLASSES)[1:]

        nr_of_classes = int(y_true.shape[-1] / 3.)
        scores = torch.zeros(nr_of_classes)

        for idx in range(nr_of_classes):
            y_pred_bund = y_pred[:, :, :, (idx * 3):(idx * 3) + 3].contiguous()
            y_true_bund = y_true[:, :, :, (idx * 3):(idx * 3) + 3].contiguous()  # [x,y,z,3]
            weights_bund = weights[:, :, :, (idx * 3)].contiguous()  # [x,y,z]

            angles = PytorchUtils.angle_last_dim(y_pred_bund, y_true_bund)
            angles_weighted = angles / weights_bund
            #norm lengths to 0-1 to be more equal to angles?? -> peaks are already around 1 -> ok
            lengths = (torch.norm(y_pred_bund, 2., -1) - torch.norm(y_true_bund, 2, -1)) ** 2
            lenghts_weighted = lengths * weights_bund

            # Divide by weights.max otherwise lens would be way bigger
            # Flip angles to make it a minimization problem
            combined = -angles_weighted + lenghts_weighted / weights_bund.max()

            scores[idx] = torch.mean(combined)

        return torch.mean(scores)
    def forward(self, frame, policies):
        # x: [B,2,84,84]
        self.B = frame.size()[0]
        

        #Predict mask
        pre_mask = self.predict_mask_nosigmoid(frame)
        mask = F.sigmoid(pre_mask)

        masked_frame = frame * mask
        kls = []
        for i in range(len(policies)):
            policy = policies[i]

            log_dist_mask = policy.action_logdist(masked_frame)
            log_dist_true = policy.action_logdist(frame)

            action_dist_kl = torch.sum((log_dist_true - log_dist_mask)*torch.exp(log_dist_true), dim=1) #[B]
            action_dist_kl = torch.mean(action_dist_kl) # * 1000
            kls.append(action_dist_kl)

        kls = torch.stack(kls)  #[policies, B]
        action_dist_kl = torch.mean(action_dist_kl) #[1] #over batch and over policies

        pre_mask = pre_mask.view(self.B, -1)
        mask_cost = torch.abs(pre_mask + 20)
        # mask_sum = torch.mean(torch.sum(mask_cost, dim=1)) * .00001
        # mask_cost = torch.mean(mask_cost) * .00001
        mask_cost = torch.mean(mask_cost) * .01

        loss = action_dist_kl + mask_cost

        return loss, action_dist_kl, mask_cost
    def encode_and_logprob(self, x):

        for i in range(len(self.first_half_weights)-1):
            x = self.act_func(self.first_half_weights[i](x))

            # pre_act = self.first_half_weights[i](x) #[B,D]
            # # pre_act_with_noise = Variable(torch.randn(1, self.arch_2[i][1]).type(self.dtype)) * pre_act
            # probs = torch.ones(1, self.arch_2[i][1]) * .5
            # pre_act_with_noise = Variable(torch.bernoulli(probs).type(self.dtype)) * pre_act
            # x = self.act_func(pre_act_with_noise)

        mean = self.first_half_weights[-1](x)
        logvar = self.q_logvar(x)

        # print (logvar)
        #Sample

        eps = Variable(torch.randn(1, self.z_size)) #.type(self.dtype))
        # x =  (torch.sqrt(torch.exp(W_logvars)) * eps) + W_means 
        x =  (torch.exp(.5*logvar) * eps) + mean 

        logq = -torch.mean(  logvar.sum(1) + ((x - mean).pow(2)/torch.exp(logvar)).sum(1))
        logp = torch.mean( x.pow(2).sum(1))


        return x, logq+logp
    def calculate_loss(self, x, beta=1., average=False):
        # pass through VAE
        x_mean, x_logvar, z1_q, z1_q_mean, z1_q_logvar, z2_q, z2_q_mean, z2_q_logvar, z1_p_mean, z1_p_logvar = self.forward(x)

        # RE
        if self.args.input_type == 'binary':
            RE = log_Bernoulli(x, x_mean, dim=1)
        elif self.args.input_type == 'gray' or self.args.input_type == 'continuous':
            RE = -log_Logistic_256(x, x_mean, x_logvar, dim=1)
        else:
            raise Exception('Wrong input type!')

        # KL
        log_p_z1 = log_Normal_diag(z1_q, z1_p_mean, z1_p_logvar, dim=1)
        log_q_z1 = log_Normal_diag(z1_q, z1_q_mean, z1_q_logvar, dim=1)
        log_p_z2 = self.log_p_z2(z2_q)
        log_q_z2 = log_Normal_diag(z2_q, z2_q_mean, z2_q_logvar, dim=1)
        KL = -(log_p_z1 + log_p_z2 - log_q_z1 - log_q_z2)

        # full loss
        loss = -RE + beta * KL

        if average:
            loss = torch.mean(loss)
            RE = torch.mean(RE)
            KL = torch.mean(KL)

        return loss, RE, KL
Example #21
0
    def setUp(self, length=3, factor=10, count=1000000,
              seed=None, dtype=torch.float64, device=None):
        '''Set up the test values.

        Args:
            length: Size of the vector.
            factor: To multiply the mean and standard deviation.
            count: Number of samples for Monte-Carlo estimation.
            seed: Seed for the random number generator.
            dtype: The data type.
            device: In which device.
        '''
        if seed is not None:
            torch.manual_seed(seed)

        # input mean and covariance
        self.mu = torch.randn(length, dtype=dtype, device=device) * factor
        self.cov = rand.definite(length, dtype=dtype, device=device,
                                 positive=True, semi=False, norm=factor**2)
        self.var = self.cov.diag()

        # Monte-Carlo estimation of the output mean and variance
        normal = torch.distributions.MultivariateNormal(self.mu, self.cov)
        out_samples = normal.sample((count,)).clamp_(min=0.0)
        self.mc_mu = torch.mean(out_samples, dim=0)
        self.mc_var = torch.var(out_samples, dim=0)
        normal = torch.distributions.MultivariateNormal(self.mu * 0, self.cov)
        out_samples = normal.sample((count,)).clamp_(min=0.0)
        mean = torch.mean(out_samples, dim=0)
        self.mc_zm_cov = cov(out_samples)
        self.mc_zm_corr = self.mc_zm_cov + outer(mean)
Example #22
0
def cos_sim(in0,in1):
    in0_norm = normalize_tensor(in0)
    in1_norm = normalize_tensor(in1)
    N = in0.size()[0]
    X = in0.size()[2]
    Y = in0.size()[3]

    return torch.mean(torch.mean(torch.sum(in0_norm*in1_norm,dim=1).view(N,1,X,Y),dim=2).view(N,1,1,Y),dim=3).view(N)
Example #23
0
def torch_pearsonr(x, y):  # https://github.com/pytorch/pytorch/issues/1254
    mean_x = torch.mean(x)
    mean_y = torch.mean(y)
    xm = x.sub(mean_x)
    ym = y.sub(mean_y)
    r_num = xm.dot(ym)
    r_den = torch.norm(xm, 2) * torch.norm(ym, 2)
    r_val = r_num / r_den
    return r_val
Example #24
0
    def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0):
        # x: [B,3,112,112]
        # q: [B,L] 
        # inf type: 0 is both, 1 is only x, 2 is only y
        # dec type: 0 is both, 1 is only x, 2 is only y

        outputs = {}

        if inf_net is None:
        	mu, logvar = self.inference_net(x)
        else:
        	mu, logvar = inf_net.inference_net(x)   



        z, logpz, logqz = self.sample(mu, logvar) 

        z_dec = self.z_to_dec(z)

        B = z_dec.shape[0]

        # Decode Image
        x_hat = self.image_decoder(z_dec)
        alpha = torch.sigmoid(x_hat)

        beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale)
        x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5)
        # logpx = beta.log_prob(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda()) #[120,3,112,112]  # add uniform noise here
        logpx = beta.log_prob(x_noise) #[120,3,112,112]  # add uniform noise here

        logpx = torch.sum(logpx.view(B, -1),1) # [PB]  * self.w_logpx
        # logpx = logpx * self.w_logpx

        log_ws = logpx + logpz - logqz

        outputs['logpx'] = torch.mean(logpx)
        outputs['x_recon'] = alpha
        outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz))
        outputs['elbo'] = torch.mean(log_ws)
        outputs['logws'] = log_ws
        outputs['z'] = z
        outputs['logpz'] = torch.mean(logpz)
        outputs['logqz'] = torch.mean(logqz)
        outputs['logvar'] = logvar

        # print (outputs['elbo'], outputs['welbo'], outputs['logpz'], outputs['logqz'])
        # fafs


        # if generate:
        #     # word_preds, sampled_words = self.text_generator.teacher_force(z_dec, generate=generate, embeder=self.encoder_embed)
        #     # if dec_type == 2:
        #     alpha = torch.sigmoid(self.image_decoder(z_dec))
        #     return outputs, alpha #, word_preds, sampled_words

        return outputs
Example #25
0
 def ganLossG(self, d_fake):
     if config.GAN_SETTING == "WGAN":
         loss = -torch.mean(d_fake)
     elif config.GAN_SETTING == "LSGAN":
         loss = 0.5 * torch.mean((d_fake - 1)**2)
     else:
         real_labels = Variable(torch.ones(d_fake.size(0)).cuda())
         fake_labels = Variable(torch.zeros(d_fake.size(0)).cuda())
         loss = self.bce_loss(d_fake, real_labels)
     return loss
Example #26
0
def MVNError(output, gt):
    outMean = torch.mean(output)
    outStd = torch.std(output)
    output = (output - outMean)/outStd
    gtMean = torch.mean(gt)
    gtStd = torch.std(gt)
    gt = (gt - gtMean)/gtStd
    d = output - gt
    diff = torch.sqrt(torch.mean(d * d))
    return diff
Example #27
0
    def forward(self, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0):

        outputs = {}
        B = x.shape[0]

        if inf_net is None:
            # mu, logvar = self.inference_net(x)
            z, logits = self.q.sample(x) 
        else:
            # mu, logvar = inf_net.inference_net(x)   
            z, logqz = inf_net.sample(x) 

        # print (z[0])
        # b = harden(z)
        # print (b[0])
        
        # logpz = torch.sum( self.prior.log_prob(b), dim=1)

        # print (logpz[0])
        # print (logpz.shape)
        # fdasf

        probs_q = torch.sigmoid(logits)
        probs_q = torch.clamp(probs_q, min=.00000001, max=.9999999)
        probs_p = torch.ones(B, self.z_size).cuda() *.5
        KL = probs_q*torch.log(probs_q/probs_p) + (1-probs_q)*torch.log((1-probs_q)/(1-probs_p))
        KL = torch.sum(KL, dim=1)

        # print (z.shape)
        # Decode Image
        x_hat = self.generator.forward(z)
        alpha = torch.sigmoid(x_hat)
        beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale)
        x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5)
        logpx = beta.log_prob(x_noise) #[120,3,112,112]  # add uniform noise here

        logpx = torch.sum(logpx.view(B, -1),1) # [PB]  * self.w_logpx

        # print (logpx.shape,logpz.shape,logqz.shape)
        # fsdfda

        log_ws = logpx - KL #+ logpz - logqz

        outputs['logpx'] = torch.mean(logpx)
        outputs['x_recon'] = alpha
        # outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz))
        outputs['welbo'] = torch.mean(logpx + warmup*(KL))
        outputs['elbo'] = torch.mean(log_ws)
        outputs['logws'] = log_ws
        outputs['z'] = z
        outputs['logpz'] = torch.zeros(1) #torch.mean(logpz)
        outputs['logqz'] = torch.mean(KL)
        # outputs['logvar'] = logvar

        return outputs
Example #28
0
 def compute_accuracy(self, x, y, dataset):
     if dataset == 'CelebA':
         x = F.sigmoid(x)
         predicted = self.threshold(x)
         correct = (predicted == y).float()
         accuracy = torch.mean(correct, dim=0) * 100.0
     else:
         _, predicted = torch.max(x, dim=1)
         correct = (predicted == y).float()
         accuracy = torch.mean(correct) * 100.0
     return accuracy
Example #29
0
def n_mpjpe(predicted, target):
    """
    Normalized MPJPE (scale only), adapted from:
    https://github.com/hrhodin/UnsupervisedGeometryAwareRepresentationLearning/blob/master/losses/poses.py
    """
    assert predicted.shape == target.shape
    
    norm_predicted = torch.mean(torch.sum(predicted**2, dim=3, keepdim=True), dim=2, keepdim=True)
    norm_target = torch.mean(torch.sum(target*predicted, dim=3, keepdim=True), dim=2, keepdim=True)
    scale = norm_target / norm_predicted
    return mpjpe(scale * predicted, target)
        def get_image_mean(img_tensor):
            """

            :param img_tensor: torch.FloatTensor with shape [3, 480, 640]
            :type img_tensor:
            :return: torch.FloatTensor with shape [3]
            :rtype:
            """
            img_mean = torch.mean(img_tensor, 1)
            img_mean = torch.mean(img_mean, 1)
            return img_mean
Example #31
0
 def mse(self, pred, label, weight):
     loss = weight * (pred - label) ** 2
     return torch.mean(loss)
Example #32
0
def train_NAVAR(data,
                maxlags=5,
                hidden_nodes=256,
                dropout=0,
                epochs=200,
                learning_rate=1e-4,
                batch_size=300,
                lambda1=0,
                val_proportion=0.0,
                weight_decay=0,
                check_every=1000,
                hidden_layers=1,
                normalize=True,
                split_timeseries=False,
                lstm=False):
    """
    Trains a Neural Additive Vector Autoregression (NAVAR) model on time series data and scores the
    potential causal links between variables.

    Args:
        data:  ndarray
            T (time points) x N (variables) input data
        maxlags: int
            Maximum number of time lags
        hidden_nodes: int
            Number of hidden nodes in each layers
        dropout: float
            Dropout probability in the hidden layers
        epochs: int
            Number of training epochs
        learning_rate: float
            Learning rate for Adam optimizer
        batch_size: int
            The size of the training batches
        lambda1: float
            Parameter for penalty to the contributions
        val_proportion: float
            Proportion of the dataset used for validation
        weight_decay: float
            Weight decay used in neural networks
        check_every: int
            Every 'check_every'th epoch we print training progress
        hidden_layers: int
            Number of hidden layers in the neural networks
        normalize: bool
            Indicates whether we should should normalize every variable
        split_timeseries: int
            If the original time series consists of multiple shorter time series, this argument should indicate the
            original time series length. Otherwise should be zero.
        lstm: bool
            Indicates whether we should use the LSTM model (instead of MLP).

    Returns:
        causal_matrix: ndarray
            N (variables) x N (variables) array containing the scores for every causal link.
            causal_matrix[i, j] indicates the score for potential link i -> j

        contributions: ndarray
            N^2 x training_examples array containing the contributions from and to every variable
            for every sample in the training_set

        loss_val: float
            Validation loss of the model after training
    """
    # T is the number of time steps, N the number of variables
    T, N = data.shape

    # initialize the NAVAR model
    if lstm:
        model = NAVARLSTM(N,
                          hidden_nodes,
                          maxlags,
                          dropout=dropout,
                          hidden_layers=hidden_layers)
    else:
        model = NAVAR(N,
                      hidden_nodes,
                      maxlags,
                      dropout=dropout,
                      hidden_layers=hidden_layers)

    # use Mean Squared Error and the Adam optimzer
    criterion = torch.nn.MSELoss(reduction='mean')
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=weight_decay)

    # obtain the training and validation data
    dataset = DataLoader(data,
                         maxlags,
                         normalize=normalize,
                         val_proportion=val_proportion,
                         split_timeseries=split_timeseries,
                         lstm=lstm)
    X_train, Y_train = dataset.train_Xs, dataset.train_Ys
    X_val, Y_val = dataset.val_Xs, dataset.val_Ys
    # push model and data to GPU if available
    if torch.cuda.is_available():
        model = model.cuda()
        X_train = X_train.cuda()
        Y_train = Y_train.cuda()
        if X_val is not None:
            X_val = X_val.cuda()
            Y_val = Y_val.cuda()

    num_training_samples = X_train.shape[0]
    total_loss = 0
    loss_val = 0

    # start of training loop
    batch_counter = 0
    for t in range(1, epochs + 1):
        #obtain batches
        batch_indeces_list = []
        if batch_size < num_training_samples:
            batch_perm = np.random.choice(num_training_samples,
                                          size=num_training_samples,
                                          replace=False)
            for i in range(int(num_training_samples / batch_size) + 1):
                start = i * batch_size
                batch_i = batch_perm[start:start + batch_size]
                if len(batch_i) > 0:
                    batch_indeces_list.append(batch_perm[start:start +
                                                         batch_size])
        else:
            batch_indeces_list = [np.arange(num_training_samples)]

        for batch_indeces in batch_indeces_list:
            batch_counter += 1
            X_batch = X_train[batch_indeces]
            Y_batch = Y_train[batch_indeces]

            # forward pass to calculate predictions and contributions
            predictions, contributions = model(X_batch)

            # calculate the loss
            loss_pred = criterion(predictions, Y_batch)
            loss_l1 = (lambda1 / N) * torch.mean(
                torch.sum(torch.abs(contributions), dim=1))
            loss = loss_pred + loss_l1
            total_loss += loss

            # Zero gradients, perform a backward pass, and update the weights.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # every 'check_every' epochs we calculate and print the validation loss
        if t % check_every == 0:
            model.eval()
            if val_proportion > 0.0:
                val_pred, val_contributions = model(X_val)
                loss_val = criterion(val_pred, Y_val)
            model.train()

            print(
                f'iteration {t}. Loss: {total_loss/batch_counter}  Val loss: {loss_val}'
            )
            total_loss = 0
            batch_counter = 0

    # use the trained model to calculate the causal scores
    model.eval()

    # X_train can be too big to fit in the GPU, then this call raises "RuntimeError: CUDA out of memory."
    if split_timeseries:
        y_pred, contributions = model(X_train[:batch_size])
    else:
        y_pred, contributions = model(X_train)
    causal_matrix = torch.std(contributions,
                              dim=0).view(N, N).detach().cpu().numpy()

    return causal_matrix, contributions, loss_val
                            generator=generator1, 
                            discriminator=discriminator1, 
                            EP=EP,
                            arguments=arguments,
                            criterion=criterion,
                            conditional_gen=False,
                            source_num=1)



    elif tr_method == 'ML':
        if loss == 'Euclidean': 
            criterion = nn.MSELoss()
        elif loss == 'Poisson':
            eps = 1e-20
            criterion = lambda lam, tar: torch.mean(-tar*torch.log(lam+eps) + lam)
        generative_trainer(loader_mix=loader_mix,
                           train_loader=loader1,
                           generator=generator1, 
                           EP=EP,
                           arguments=arguments,
                           criterion=criterion,
                           conditional_gen=False)
        
    # save models
    savepath = os.path.join(os.getcwd(), 'model_parameters')
    if not os.path.exists(savepath):
        os.mkdir(savepath) 

    ut.save_models([generator1], [discriminator1], exp_info,
                    savepath, arguments)
Example #34
0
def top2gating(logits: Tensor, capacity_factor: float,
               min_capacity: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
    """Implements Top2Gating on logits."""
    # everything is in fp32 in this function
    gates = F.softmax(logits, dim=1)

    capacity = _capacity(gates, torch.tensor(capacity_factor * 2),
                         torch.tensor(min_capacity))

    # Create a mask for 1st's expert per token
    indices1_s = torch.argmax(gates, dim=1)
    num_experts = int(gates.shape[1])
    mask1 = F.one_hot(indices1_s, num_classes=num_experts)

    # Create a mask for 2nd's expert per token using Gumbel-max trick
    # https://timvieira.github.io/blog/post/2014/07/31/gumbel-max-trick/
    logits_w_noise = logits + gumbel_rsample(logits.shape,
                                             device=logits.device)
    # Replace top-expert with min value
    logits_except1 = logits_w_noise.masked_fill(mask1.bool(), float("-inf"))
    indices2_s = torch.argmax(logits_except1, dim=1)
    mask2 = F.one_hot(indices2_s, num_classes=num_experts)

    # Compute locations in capacity buffer
    locations1 = torch.cumsum(mask1, dim=0) - 1
    locations2 = torch.cumsum(mask2, dim=0) - 1
    # Update 2nd's location by accounting for locations of 1st
    locations2 += torch.sum(mask1, dim=0, keepdim=True)

    # gating decisions
    exp_counts = torch.sum(mask1, dim=0).detach().to('cpu')

    # Compute l_aux
    me = torch.mean(gates, dim=0)
    ce = torch.mean(mask1.float(), dim=0)
    l_aux = torch.mean(me * ce) * num_experts * num_experts

    # Remove locations outside capacity from mask
    mask1 *= torch.lt(locations1, capacity)
    mask2 *= torch.lt(locations2, capacity)

    # Store the capacity location for each token
    locations1_s = torch.sum(locations1 * mask1, dim=1)
    locations2_s = torch.sum(locations2 * mask2, dim=1)

    # Normalize gate probabilities
    mask1_float = mask1.float()
    mask2_float = mask2.float()
    gates1_s = einsum("se,se->s", gates, mask1_float)
    gates2_s = einsum("se,se->s", gates, mask2_float)
    denom_s = gates1_s + gates2_s
    # Avoid divide-by-zero
    denom_s = torch.clamp(denom_s, min=torch.finfo(denom_s.dtype).eps)
    gates1_s /= denom_s
    gates2_s /= denom_s

    # Calculate combine_weights and dispatch_mask
    gates1 = einsum("s,se->se", gates1_s, mask1_float)
    gates2 = einsum("s,se->se", gates2_s, mask2_float)
    locations1_sc = _one_hot_to_float(locations1_s, capacity)
    locations2_sc = _one_hot_to_float(locations2_s, capacity)
    combine1_sec = einsum("se,sc->sec", gates1, locations1_sc)
    combine2_sec = einsum("se,sc->sec", gates2, locations2_sc)
    combine_weights = combine1_sec + combine2_sec
    dispatch_mask = combine_weights.bool()

    return l_aux, combine_weights, dispatch_mask, exp_counts
Example #35
0
def _run(data_file_path,
         dataset,
         data_generator,
         num_batches,
         vocabulary_size,
         context_size,
         num_noise_words,
         vec_dim,
         num_epochs,
         batch_size,
         lr,
         model_ver,
         vec_combine_method,
         save_all,
         generate_plot,
         model_ver_is_dbow,
         gpu):

    if torch.cuda.is_available():
        torch.cuda.set_device(gpu)
        print(torch.cuda.current_device())

    if model_ver_is_dbow:
        model = DBOW(vec_dim, num_docs=len(dataset), num_words=vocabulary_size)
    else:
        model = DM(vec_dim, num_docs=len(dataset), num_words=vocabulary_size)

    cost_func = NegativeSampling()
    optimizer = Adam(params=model.parameters(), lr=lr)

    if torch.cuda.is_available():
        model.cuda()

    print("Dataset comprised of {:d} documents.".format(len(dataset)))
    print("Vocabulary size is {:d}.\n".format(vocabulary_size))
    print("Training started.")

    best_loss = float("inf")
    prev_model_file_path = None

    for epoch_i in range(num_epochs):
        epoch_start_time = time.time()
        loss = []

        for batch_i in range(num_batches):
            batch = next(data_generator)
            if torch.cuda.is_available():
                batch.cuda_()

            if model_ver_is_dbow:
                x = model.forward(batch.doc_ids, batch.target_noise_ids)
            else:
                x = model.forward(
                    batch.context_ids,
                    batch.doc_ids,
                    batch.target_noise_ids)

            x = cost_func.forward(x)

            loss.append(x.item())
            model.zero_grad()
            x.backward()
            optimizer.step()
            _print_progress(epoch_i, batch_i, num_batches)

        # end of epoch
        loss = torch.mean(torch.FloatTensor(loss))
        is_best_loss = loss < best_loss
        best_loss = min(loss, best_loss)

        state = {
            'epoch': epoch_i + 1,
            'model_state_dict': model.state_dict(),
            'best_loss': best_loss,
            'optimizer_state_dict': optimizer.state_dict()
        }

        prev_model_file_path = save_training_state(
            data_file_path,
            model_ver,
            vec_combine_method,
            context_size,
            num_noise_words,
            vec_dim,
            batch_size,
            lr,
            epoch_i,
            loss,
            state,
            save_all,
            generate_plot,
            is_best_loss,
            prev_model_file_path,
            model_ver_is_dbow)

        epoch_total_time = round(time.time() - epoch_start_time)
        print(" ({:d}s) - loss: {:.4f}".format(epoch_total_time, loss))
Example #36
0
def get_loss(y_, y):
    return torch.mean((y_ - y_train)**2)  # torch.mean() 是一个均值函数
Example #37
0
    def log_test_results(self, n_epoch):
        '''
            Calculates errors on test dataset and logs them using TBVisualiser
        '''
        test_start_time = time.time()

        # Set maximum number of batches to test
        max_test_iters = min(self.cfg['General'][0]['max_test_iters'],
                             len(self.dataloader_test))

        if max_test_iters == 0:
            max_test_iters = 1

        # Set model to evaluation mode
        self.model.is_train = False
        self.model.eval()

        # Evaluate max_test_iters batches
        test_errors = OrderedDict()
        test_scalars = OrderedDict()
        for i_test_batch, test_batch in enumerate(self.dataloader_test):

            if i_test_batch == max_test_iters:
                break

            reconstr_image, _ = self.model.forward(test_batch)
            errors = self.model.get_current_errors()

            # Save errors from current batch
            for label, error in errors.items():
                if label in test_errors:
                    test_errors[label] += error
                else:
                    test_errors[label] = error

            scalars = self.model.get_current_scalars()

            for label, scalar in scalars.items():
                if label in test_scalars:
                    if not label in ['au_predictions', 'au_ground_truths']:
                        test_scalars[label] += scalar
                    else:
                        test_scalars[label] = torch.cat(
                            (test_scalars[label], scalar), dim=0)
                else:
                    test_scalars[label] = scalar

            # Log first image and reconstructed image
            if i_test_batch == 0:
                original_image = visualisation_utils.tensor2im(
                    test_batch[0][0])
                reconstr_image = visualisation_utils.tensor2im(
                    reconstr_image[0])

                self.tb_visualiser.log_images(
                    {
                        'Original Image': original_image,
                        'Reconstructed Image': reconstr_image
                    },
                    self.total_steps,
                    is_train=False)
        # Normalise errors
        for label in test_errors.keys():
            test_errors[label] /= max_test_iters

        # Log errors to tensorboard
        test_duration = time.time() - test_start_time
        self.tb_visualiser.plot_scalars(test_errors,
                                        self.total_steps,
                                        is_train=False)
        visualisation_utils.print_current_test_errors(n_epoch, test_duration,
                                                      test_errors)

        # Normalise scalars
        for label in test_scalars.keys():
            if not label in ['au_predictions', 'au_ground_truths']:
                test_scalars[label] /= max_test_iters

        # Calculate F1 scores
        f1_denominator = 2 * test_scalars['true_pos'] + test_scalars['false_neg'] \
                       + test_scalars['false_pos']
        f1_denominator += torch.finfo(torch.float).tiny
        individual_f1_scores = 2 * test_scalars['true_pos'] / f1_denominator

        all_tp = torch.sum(test_scalars['true_pos'])
        all_fn = torch.sum(test_scalars['false_neg'])
        all_fp = torch.sum(test_scalars['false_pos']) + torch.finfo(
            torch.float).tiny

        average_f1_score = 2 * all_tp / (2 * all_tp + all_fn + all_fp)

        f1_scores = {
            'F1 Score for AU1': individual_f1_scores[0].item(),
            'F1 Score for AU2': individual_f1_scores[1].item(),
            'F1 Score for AU4': individual_f1_scores[2].item(),
            'F1 Score for AU5': individual_f1_scores[3].item(),
            'F1 Score for AU6': individual_f1_scores[4].item(),
            'F1 Score for AU9': individual_f1_scores[5].item(),
            'F1 Score for AU12': individual_f1_scores[6].item(),
            'F1 Score for AU15': individual_f1_scores[7].item(),
            'F1 Score for AU17': individual_f1_scores[8].item(),
            'F1 Score for AU20': individual_f1_scores[9].item(),
            'F1 Score for AU25': individual_f1_scores[10].item(),
            'F1 Score for AU26': individual_f1_scores[11].item(),
            'Average of F1 Scores': torch.mean(individual_f1_scores).item(),
            'Overall F1 Score': average_f1_score.item()
        }

        # Calculate Accuracies
        num_predictions = test_scalars['true_pos'] + test_scalars['true_neg'] \
                            + test_scalars['false_pos'] + test_scalars['false_neg']

        accuracy_tensor = (test_scalars['true_pos'] +
                           test_scalars['true_neg']) / num_predictions

        accuracies = {
            'Accuracy for AU1': accuracy_tensor[0].item(),
            'Accuracy for AU2': accuracy_tensor[1].item(),
            'Accuracy for AU4': accuracy_tensor[2].item(),
            'Accuracy for AU5': accuracy_tensor[3].item(),
            'Accuracy for AU6': accuracy_tensor[4].item(),
            'Accuracy for AU9': accuracy_tensor[5].item(),
            'Accuracy for AU12': accuracy_tensor[6].item(),
            'Accuracy for AU15': accuracy_tensor[7].item(),
            'Accuracy for AU17': accuracy_tensor[8].item(),
            'Accuracy for AU20': accuracy_tensor[9].item(),
            'Accuracy for AU25': accuracy_tensor[10].item(),
            'Accuracy for AU26': accuracy_tensor[11].item(),
            'Average Accuracy': torch.mean(accuracy_tensor).item()
        }

        # Calculate 2AFC Scores
        component_2afc_tensor, average_2afc = metric_utils.compute_2AFC(
            test_scalars['au_ground_truths'], test_scalars['au_predictions'])

        values_2afc = {
            '2AFC Score for AU1': component_2afc_tensor[0].item(),
            '2AFC Score for AU2': component_2afc_tensor[1].item(),
            '2AFC Score for AU4': component_2afc_tensor[2].item(),
            '2AFC Score for AU5': component_2afc_tensor[3].item(),
            '2AFC Score for AU6': component_2afc_tensor[4].item(),
            '2AFC Score for AU9': component_2afc_tensor[5].item(),
            '2AFC Score for AU12': component_2afc_tensor[6].item(),
            '2AFC Score for AU15': component_2afc_tensor[7].item(),
            '2AFC Score for AU17': component_2afc_tensor[8].item(),
            '2AFC Score for AU20': component_2afc_tensor[9].item(),
            '2AFC Score for AU25': component_2afc_tensor[10].item(),
            '2AFC Score for AU26': component_2afc_tensor[11].item(),
            'Overall 2AFC Score': average_2afc.item()
        }

        del test_scalars['true_pos']
        del test_scalars['true_neg']
        del test_scalars['false_pos']
        del test_scalars['false_neg']
        del test_scalars['au_ground_truths']
        del test_scalars['au_predictions']

        test_scalars['F1 Scores'] = f1_scores
        test_scalars['Accuracies'] = accuracies
        test_scalars['2AFC Scores'] = values_2afc

        # Log Metrics
        self.tb_visualiser.plot_scalars(test_scalars,
                                        self.total_steps,
                                        is_train=False)

        # Set model back to training mode
        self.model.is_train = True
        self.model.train()
Example #38
0
def train(fv, model_name, criterion, balance=False, batchsize=64, size=0):
    if fv == "matlab":
        dloader = matloader
    else:
        dloader = fvloader

    train_data = dloader.load_train_data(size=size, balance=balance, fv=fv)
    val_data = dloader.load_val_data(size=size, fv=fv)
    test_data = dloader.load_test_data(size=size, fv=fv)
    # model_name = "transformer_%s_size%d_bce" % (fv, size)
    model_dir = os.path.join("./modeldir/%s" % model_name)
    model_pth = os.path.join(model_dir, "model.pth")

    writer = tensorboardX.SummaryWriter(model_dir)

    if os.path.exists(model_pth):
        print("------load model--------")
        model = torch.load(model_pth)
    else:
        # model = Transformer(fv, NUM_HEADS=4, NUM_LAYERS=3).cuda()
        model = Transformer(fv).cuda()
    model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.0001, weight_decay=0.001)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #         optimizer, factor=0.5,
    #         patience=30, min_lr=1e-4)

    epochs = 2000
    step = 1
    val_step = 1
    max_f1 = 0.0

    for e in range(epochs):
        model.train()
        print("------epoch--------", e)
        st = time.time()

        train_shuffle = fvloader.shuffle(train_data)
        for item in fvloader.batch_fv(train_shuffle, batch=batchsize):

            # for name, param in model.named_parameters():
            #     writer.add_histogram(
            #         name, param.clone().cpu().data.numpy(), step)

            # writer.add_histogram(
            #     "grad/"+name, param.grad.clone().cpu().data.numpy(), step)
            model.zero_grad()

            genes, nimgs, labels, timesteps = item
            inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor)

            gt = torch.from_numpy(labels).type(torch.cuda.FloatTensor)
            pd = model(inputs)

            # loss = criterion(pd, gt)
            all_loss = criterion(pd, gt)
            label_loss = torch.mean(all_loss, dim=0)
            loss = torch.mean(label_loss)
            # for i in range(6):
            #     writer.add_scalar("train sl_%d_loss" % i,
            #                       label_loss[i].item(), step)

            train_pd = torch_util.threshold_tensor_batch(pd)
            np_pd = train_pd.data.cpu().numpy()
            torch_util.torch_metrics(
                labels, np_pd, writer, step, mode="train")

            writer.add_scalar("train loss", loss, step)
            loss.backward()
            optimizer.step()
            step += 1

        et = time.time()
        writer.add_scalar("train time", et - st, e)
        for param_group in optimizer.param_groups:
            writer.add_scalar("lr", param_group['lr'], e)

        # run_origin_train(model, imbtrain_data, writer, e, criterion)

        if e % 1 == 0:
            val_loss, val_f1 = run_val(
                model, dloader, val_data, writer, val_step, criterion)
            # scheduler.step(val_loss)
            val_step += 1
            if e == 0:
                start_loss = val_loss
                min_loss = start_loss

            # if val_loss > 2 * min_loss:
            #     print("early stopping at %d" % e)
            #     break
            # if e % 50 == 0:
            #     pt = os.path.join(model_dir, "%d.pt" % e)
            #     torch.save(model.state_dict(), pt)
            #     result = os.path.join(model_dir, "result_epoch%d.txt" % e)
            #     run_test(model, test_data, result)

            if min_loss > val_loss or max_f1 < val_f1:
                if min_loss > val_loss:
                    print("---------save best----------", "loss", val_loss)
                    min_loss = val_loss
                if max_f1 < val_f1:
                    print("---------save best----------", "f1", val_f1)
                    max_f1 = val_f1
                torch.save(model, model_pth)
                result = os.path.join(model_dir, "result_epoch%d.txt" % e)
                run_test(model, dloader, test_data, result)
Example #39
0
 def __init__(self, tensor):
     """tensor is taken as a sample to calculate the mean and std"""
     self.mean = torch.mean(tensor)
     self.std = torch.std(tensor)
Example #40
0
 def __call__(self, input, target):
     # Average across channels in order to get the final score
     class_idx = torch.arange(input.shape[1]).to(input.device)
     input = torch.argmax(input, axis=1)==class_idx[:,None,None,None,None]
     input = input.transpose(1,0)
     return torch.mean(compute_per_channel_dice(input, target, epsilon=self.epsilon))
def main(args):
    ts = time.strftime('%Y-%b-%d-%H:%M:%S', time.gmtime())

    splits = ['train', 'valid'] + (['test'] if args.test else [])

    datasets = OrderedDict()
    for split in splits:
        datasets[split] = PTB(data_dir=args.data_dir,
                              split=split,
                              create_data=args.create_data,
                              max_sequence_length=args.max_sequence_length,
                              min_occ=args.min_occ)

    model = SentenceVAE(vocab_size=datasets['train'].vocab_size,
                        sos_idx=datasets['train'].sos_idx,
                        eos_idx=datasets['train'].eos_idx,
                        pad_idx=datasets['train'].pad_idx,
                        unk_idx=datasets['train'].unk_idx,
                        max_sequence_length=args.max_sequence_length,
                        embedding_size=args.embedding_size,
                        rnn_type=args.rnn_type,
                        hidden_size=args.hidden_size,
                        word_dropout=args.word_dropout,
                        embedding_dropout=args.embedding_dropout,
                        latent_size=args.latent_size,
                        num_layers=args.num_layers,
                        bidirectional=args.bidirectional)

    if torch.cuda.is_available():
        model = model.cuda()

    print(model)

    if args.tensorboard_logging:
        writer = SummaryWriter(
            os.path.join(args.logdir, experiment_name(args, ts)))
        writer.add_text("model", str(model))
        writer.add_text("args", str(args))
        writer.add_text("ts", ts)

    save_model_path = os.path.join(args.save_model_path, ts)
    os.makedirs(save_model_path)

    total_steps = (len(datasets["train"]) // args.batch_size) * args.epochs
    print("Train dataset size", total_steps)

    def kl_anneal_function(anneal_function, step):
        if anneal_function == 'identity':
            return 1
        if anneal_function == 'linear':
            if args.warmup is None:
                return 1 - (total_steps - step) / total_steps
            else:
                warmup_steps = (total_steps / args.epochs) * args.warmup
                return 1 - (warmup_steps - step
                            ) / warmup_steps if step < warmup_steps else 1.0

    ReconLoss = torch.nn.NLLLoss(size_average=False,
                                 ignore_index=datasets['train'].pad_idx)

    def loss_fn(logp, target, length, mean, logv, anneal_function, step):

        # cut-off unnecessary padding from target, and flatten
        target = target[:, :torch.max(length).data[0]].contiguous().view(-1)
        logp = logp.view(-1, logp.size(2))

        # Negative Log Likelihood
        recon_loss = ReconLoss(logp, target)

        # KL Divergence
        KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
        KL_weight = kl_anneal_function(anneal_function, step)

        return recon_loss, KL_loss, KL_weight

    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

    tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.Tensor
    step = 0
    for epoch in range(args.epochs):

        for split in splits:

            data_loader = DataLoader(dataset=datasets[split],
                                     batch_size=args.batch_size,
                                     shuffle=split == 'train',
                                     num_workers=cpu_count(),
                                     pin_memory=torch.cuda.is_available())

            tracker = defaultdict(tensor)

            # Enable/Disable Dropout
            if split == 'train':
                model.train()
            else:
                model.eval()

            for iteration, batch in enumerate(data_loader):

                batch_size = batch['input'].size(0)

                for k, v in batch.items():
                    if torch.is_tensor(v):
                        batch[k] = to_var(v)

                # Forward pass
                logp, mean, logv, z = model(batch['input'], batch['length'])

                # loss calculation
                recon_loss, KL_loss, KL_weight = loss_fn(
                    logp, batch['target'], batch['length'], mean, logv,
                    args.anneal_function, step)

                if split == 'train':
                    loss = (recon_loss + KL_weight * KL_loss) / batch_size
                else:
                    # report complete elbo when validation
                    loss = (recon_loss + KL_loss) / batch_size

                # backward + optimization
                if split == 'train':
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # bookkeepeing
                tracker['negELBO'] = torch.cat(
                    (tracker['negELBO'], loss.data.unsqueeze(0)))

                if args.tensorboard_logging:
                    neg_elbo = (recon_loss + KL_loss) / batch_size
                    writer.add_scalar("%s/Negative_ELBO" % split.upper(),
                                      neg_elbo.data[0],
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/Recon_Loss" % split.upper(),
                                      recon_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Loss" % split.upper(),
                                      KL_loss.data[0] / batch_size,
                                      epoch * len(data_loader) + iteration)
                    writer.add_scalar("%s/KL_Weight" % split.upper(),
                                      KL_weight,
                                      epoch * len(data_loader) + iteration)

                if iteration % args.print_every == 0 or iteration + 1 == len(
                        data_loader):
                    logger.info(
                        "%s Batch %04d/%i, Loss %9.4f, Recon-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
                        % (split.upper(), iteration, len(data_loader) - 1,
                           loss.data[0], recon_loss.data[0] / batch_size,
                           KL_loss.data[0] / batch_size, KL_weight))

                if split == 'valid':
                    if 'target_sents' not in tracker:
                        tracker['target_sents'] = list()
                    tracker['target_sents'] += idx2word(
                        batch['target'].data,
                        i2w=datasets['train'].get_i2w(),
                        pad_idx=datasets['train'].pad_idx)
                    tracker['z'] = torch.cat((tracker['z'], z.data), dim=0)

            logger.info("%s Epoch %02d/%i, Mean Negative ELBO %9.4f" %
                        (split.upper(), epoch, args.epochs,
                         torch.mean(tracker['negELBO'])))

            if args.tensorboard_logging:
                writer.add_scalar("%s-Epoch/NegELBO" % split.upper(),
                                  torch.mean(tracker['negELBO']), epoch)

            # save a dump of all sentences and the encoded latent space
            if split == 'valid':
                dump = {
                    'target_sents': tracker['target_sents'],
                    'z': tracker['z'].tolist()
                }
                if not os.path.exists(os.path.join('dumps', ts)):
                    os.makedirs('dumps/' + ts)
                with open(
                        os.path.join('dumps/' + ts +
                                     '/valid_E%i.json' % epoch),
                        'w') as dump_file:
                    json.dump(dump, dump_file)

            # save checkpoint
            if split == 'train':
                checkpoint_path = os.path.join(save_model_path,
                                               "E%i.pytorch" % (epoch))
                torch.save(model.state_dict(), checkpoint_path)
                logger.info("Model saved at %s" % checkpoint_path)

    if args.num_samples:
        torch.cuda.empty_cache()
        model.eval()
        with torch.no_grad():
            print(f"Generating {args.num_samples} samples")
            generations, _ = model.inference(n=args.num_samples)
            vocab = datasets["train"].i2w

            print(
                "Sampled latent codes from z ~ N(0, I), generated sentences:")
            for i, generation in enumerate(generations, start=1):
                sentence = [vocab[str(word.item())] for word in generation]
                print(f"{i}:", " ".join(sentence))
    def message(self, x_j, pos_i, pos_j):
        if x_j is None:
            x_j = pos_j

        # Get offsets from points
        offsets = self.offset_mlp(x_j)

        # Reshape offsets to shape [SUM n_neighbors(n_points), n_kpoints, kp_dim]
        offsets = offsets.view((-1, self.num_points, self.kernel_dim))

        # Rescale offset for this layer
        offsets *= self.kp_extent

        # Center every neighborhood [SUM n_neighbors(n_points), dim]
        neighbors = pos_j - pos_i

        # Number of points
        n_points = neighbors.shape[0]

        # Get points kernels and add offsets
        K_points = self.kernel
        K_points = K_points.float().view((-1, 3)).unsqueeze(0)
        K_points_deformed = K_points + offsets
        self.internal_losses["permissive_loss"] = permissive_loss(
            K_points_deformed, self.radius)

        # Get all difference matrices [SUM n_neighbors(n_points), n_kpoints, dim]
        neighbors = neighbors.unsqueeze(1)

        differences = neighbors - K_points_deformed

        sq_distances = (differences**2).sum(-1)

        # Get Kernel point influences [n_points, n_kpoints, n_neighbors]
        if self.KP_influence == "constant":
            # Every point get an influence of 1.
            all_weights = torch.ones_like(sq_distances)

        elif self.KP_influence == "linear":
            # Influence decrease linearly with the distance, and get to zero when d = kp_extent.
            all_weights = 1.0 - (torch.sqrt(sq_distances) / (self.kp_extent))
            all_weights[all_weights < 0] = 0.0

        elif self.KP_influence == "square":
            # Influence decrease linearly with the distance, and get to zero when d = kp_extent.
            all_weights = 1.0 - (sq_distances / (self.kp_extent**2))
            all_weights[all_weights < 0] = 0.0

        else:
            raise ValueError(
                "Unknown influence function type (config.KP_influence)")

        neighbors_1nn = torch.argmin(sq_distances, dim=-1)

        # Fitting Loss
        sq_distances_min = sq_distances.gather(1, neighbors_1nn.unsqueeze(-1))
        sq_distances_min /= self.radius**2  # To be independant of the layer
        self.internal_losses["fitting_loss"] = torch.mean(sq_distances_min)

        weights = all_weights.gather(1, neighbors_1nn.unsqueeze(-1))

        K_weights = self.kernel_weight
        K_weights = torch.index_select(K_weights, 0,
                                       neighbors_1nn.view(-1)).view(
                                           (n_points, self.in_features,
                                            self.out_features))

        # Get the features of each neighborhood [n_points, n_neighbors, in_fdim]
        features = x_j

        # Apply distance weights [n_points, n_kpoints, in_fdim]
        weighted_features = torch.einsum("nb, nc -> nc", weights, features)

        # Apply network weights [n_kpoints, n_points, out_fdim]
        out_features = torch.einsum("na, nac -> nc", weighted_features,
                                    K_weights)

        return out_features
def permissive_loss(deformed_kpoints, radius):
    """This loss is responsible to penalize deformed_kpoints to
    move outside from the radius defined for the convolution
    """
    norm_deformed_normalized = F.normalize(deformed_kpoints) / float(radius)
    return torch.mean(norm_deformed_normalized[norm_deformed_normalized > 1.0])
Example #44
0
        trainLoss = 0.
        train_estimation_relative_error = 0
        for batch, [trainX, trainY] in enumerate(tqdm(trainLoader, ncols=10)):
            # break

            nbatch += 1
            trainX = trainX.to(device)
            trainY = trainY.to(device)
            trainX = torch.sin(trainX)
            trainY = OneHotLabel(trainY, n_output)
            batch_train_repeatX, batch_train_repeatY = KMeansRepeatX(trainX, repeat_n), KMeansRepeatY(trainY,
                                                                                                      repeat_n)
            pre = net.forward(batch_train_repeatX, train=True, BP=BP_train)

            loss = CELoss(pre, batch_train_repeatY)
            trainLoss += torch.mean(loss).detach().cpu().numpy()
            if BP_train:
                net.backward(batch_train_repeatY, BP_train)
                net.update_params(learning_rate, BP_train)
            else:
                net.backward(loss, BP_train)
                net.update_params(learning_rate, BP_train, method)
        trainLoss /= nbatch
        train_loss.append(trainLoss)
        print('train epoch:{} loss:{}'.format(epoch, trainLoss))
        if ((epoch + 1) % 100 == 0):
            learning_rate *= 0.8
            print('学习率衰减至{}'.format(learning_rate))
        loss = 0.
        N = 0.
        n = 0.
Example #45
0
 def __call__(self, rnn_output):
     if self.beta == .0:
         return .0
     l2 = torch.sqrt(torch.sum(torch.pow(rnn_output, 2), dim=-1))
     l2 = self.beta * torch.mean(torch.pow(l2[:, 1:] - l2[:, :-1], 2))
     return l2
Example #46
0
    # adjust learning rate
    if i % DECAY_EPOCH == 0:
        LR_D = LR_D / 2
        LR_G = LR_G / 2
        adjust_learning_rate(optim_D, LR_D)
        adjust_learning_rate(optim_G, LR_G)


    artist_paintings = artist_works()
    G_ideas = torch.randn(BATCH_SIZE, N_IDEAS)
    G_paintings = G(G_ideas)
    
    prob_artist0 = D(artist_paintings)  # D try to increase this
    prob_artist1 = D(G_paintings)       # D try to reduce this 
    
    D_loss = -torch.mean(torch.log(prob_artist0) + torch.log(1. - prob_artist1))
    G_loss = -torch.mean(torch.log(prob_artist1))
    
    if i % 3 == 0:
        optim_D.zero_grad()
        D_loss.backward(retain_graph = True) # reuse computational graph
        optim_D.step()
    
    optim_G.zero_grad()
    G_loss.backward()
    optim_G.step()
    
    if i % 100 == 0:  # plotting
        plt.cla()
        plt.plot(PAINT_POINTS[0], G_paintings.data.numpy()[0], c='#4AD631', lw=3, label='Generated painting',)
        plt.plot(PAINT_POINTS[0], 2 * np.power(PAINT_POINTS[0], 2) + 1, c='#74BCFF', lw=3, label='upper bound')
	def train(self):
		svhn_iter = iter(self.svhn_loader)
		mnist_iter = iter(self.mnist_loader)
		iter_per_epoch = min(len(svhn_iter), len(mnist_iter))

		fixed_svhn =  self.np_to_var(svhn_iter.next()[0])
		fixed_mnist =  self.np_to_var(mnist_iter.next()[0])

		for step in range(self.train_iters+1):
			if (step+1) % iter_per_epoch == 0:
				mnist_iter = iter(self.mnist_loader)
				svhn_iter = iter(self.svhn_loader)

			svhn, _ = svhn_iter.next()
			svhn = self.np_to_var(svhn)
			mnist, _ = mnist_iter.next()
			mnist = self.np_to_var(mnist)

			#============ train D ============#
			# real images
			self.reset_grad()

			out = self.Dx(mnist)
			Dx_loss = torch.mean((out-1)**2)

			out = self.Dy(svhn)
			Dy_loss = torch.mean((out-1)**2)

			D_real_loss = Dx_loss + Dy_loss
			D_real_loss.backward()

			self.D_optim.step()

			# fake images
			self.reset_grad()

			out = self.Dy(self.Gxy(mnist))
			Dy_loss = torch.mean(out**2)

			out = self.Dx(self.Gyx(svhn))
			Dx_loss = torch.mean(out**2)

			D_fake_loss = Dx_loss + Dy_loss
			D_fake_loss.backward()

			self.D_optim.step()

			#============ train G ============#
			# mnist-svhn-mnist cycle
			self.reset_grad()

			mnist_to_svhn = self.Gxy(mnist)
			out = self.Dy(mnist_to_svhn)
			mnist_reconst = self.Gyx(mnist_to_svhn)

			# adversarial loss
			G_loss = torch.mean((out-1)**2)
			# cycle-consistency loss
			G_loss += torch.mean((mnist - mnist_reconst)**2)

			G_loss.backward()
			self.G_optim.step()

			# svhn-mnist-svhn cycle
			self.reset_grad()

			svhn_to_mnist = self.Gyx(svhn)
			out = self.Dx(svhn_to_mnist)
			svhn_reconst = self.Gxy(svhn_to_mnist)

			# adversarial loss
			G_loss = torch.mean((out-1)**2)
			# cycle-consistency loss
			G_loss += torch.mean((svhn - svhn_reconst)**2)

			G_loss.backward()
			self.G_optim.step()

			# print logs
			if (step+1) % self.log_step == 0:
				print('Step [%d/%d], d_real_loss: %.4f, d_fake_loss: %.4f, g_loss: %.4f'
					% (step+1, self.train_iters, D_real_loss.data[0], D_fake_loss.data[0], G_loss.data[0]))

			if (step+1) % self.sample_step == 0:
				fake_mnist = self.Gyx(fixed_svhn)
				fake_svhn = self.Gxy(fixed_mnist)

				mnist, fake_mnist = self.var_to_np(fixed_mnist), self.var_to_np(fake_mnist)
				svhn , fake_svhn = self.var_to_np(fixed_svhn), self.var_to_np(fake_svhn)

				merged = merge_images(mnist, fake_svhn)
				path = os.path.join(self.sample_path, 'sample-%d-m-s.png' % (step+1))
				scipy.misc.imsave(path, merged)

				print('Saved %s' % path)

				merged = merge_images(svhn, fake_mnist)
				path = os.path.join(self.sample_path, 'sample-%d-s-m.png' % (step+1))
				scipy.misc.imsave(path, merged)

				print('Saved %s' % path)

			if (step+1) % 5000 == 0:
				Gxy_path = os.path.join(self.model_path, 'Gxy-%d.pkl' % (step+1))
				Gyx_path = os.path.join(self.model_path, 'Gyx-%d.pkl' % (step+1))

				Dx_path = os.path.join(self.model_path, 'Dx-%d.pkl' % (step+1))
				Dy_path = os.path.join(self.model_path, 'Dy-%d.pkl' % (step+1))

				torch.save(self.Gxy.state_dict(), Gxy_path)
				torch.save(self.Gyx.state_dict(), Gyx_path)
				torch.save(self.Dx.state_dict(), Dx_path)
				torch.save(self.Dy.state_dict(), Dy_path)
Example #48
0
def mean(input, dim):
    return th.mean(input, dim=dim)
Example #49
0
    def train(self):
        #torch.autograd.set_detect_anomaly(True)
        """
        Main training loop
        Helpful URL: https://github.com/balakg/posewarp-cvpr2018/blob/master/code/posewarp_gan_train.py
        """

        for epoch in range(self.num_epochs):
            num_batches = len(self.train_dataset_loader)
            # Initialize running averages
            disc_losses = AverageMeter()
            train_disc_accuracies = AverageMeter()
            tot_losses = AverageMeter()
            train_accuracies = AverageMeter()

            for batch_id, batch_data in enumerate(self.train_dataset_loader):
                self.gan.train()  # Set the model to train mode
                self.vgg_loss_network.eval()
                current_step = epoch * num_batches + batch_id

                # Get data from dataset
                src_img = batch_data['im'].cuda(async=True)
                target_img = batch_data['target_im'].cuda(async=True)
                src_iuv = batch_data['im_iuv'].cuda(async=True)
                target_iuv = batch_data['target_iuv'].cuda(async=True)
                #pdb.set_trace()

                # ============
                # Run predictive GAN on source image
                _, classification_src = self.gan(src_img, src_iuv, target_iuv, use_gt=False)
                # Run predictive GAN on target image
                _ , classification_tgt = self.gan(target_img, src_iuv, target_iuv, use_gt=True)
                # Create discriminator groundtruth
                # For src, we create zeros
                # For tgt, we create ones
                disc_gt_src = torch.zeros(classification_src.shape[0], 1, dtype=torch.float32).cuda()
                disc_gt_tgt = torch.ones(classification_src.shape[0], 1, dtype=torch.float32).cuda()
                disc_gt = torch.cat((disc_gt_src, disc_gt_tgt), dim=0).cuda(async=True)

                classification_all = torch.cat((classification_src, classification_tgt) , dim=0)
                # Train Discriminator network
                disc_loss = self._optimizeDiscriminator(classification_all, disc_gt)
                disc_losses.update(disc_loss.item(), disc_gt.shape[0])
                disc_acc = 100.0 * torch.mean( ( torch.round(F.softmax(classification_all, dim=1)) == disc_gt ).float() )

                train_disc_accuracies.update(disc_acc.item(), disc_gt.shape[0])

                print("Epoch: {}, Batch {}/{} has Discriminator loss {}, and acc {}".format(epoch, batch_id, num_batches, disc_losses.avg, train_disc_accuracies.avg))
                # Start training GAN first for several iterations
                if current_step < self.start_disc_iters:
                    print("Discriminator training only: {}/{}\n".format(current_step,self.start_disc_iters))
                    continue
               
                # ============
                # Optimize the GAN
                # Note that now we use disc_gt_tgt which are 1's
                generated_img, classification_src = self.gan(src_img, src_iuv, target_iuv, use_gt=False)
                tot_loss = self._optimizeGAN(generated_img, target_img, classification_src, disc_gt_tgt)
                tot_losses.update(tot_loss.item(), disc_gt_tgt.shape[0])

                acc = 100.0 * torch.mean( ( torch.round(F.softmax(classification_src, dim=1)) == disc_gt_tgt ).float() )

                tot_losses.update(tot_loss.item(), disc_gt_tgt.shape[0])
                train_accuracies.update(acc.item(), disc_gt_tgt.shape[0])

                # Not adjusting learning rate currently
                # if epoch % 100 == 99:
                #     self._adjust_learning_rate(epoch)
                # # Not Clipping Weights
                # self._clip_weights()

                if current_step % self.log_freq == 0:
                    print("Epoch: {}, Batch {}/{} has loss {}, and acc {}".format(epoch, batch_id, num_batches, tot_losses.avg, train_accuracies.avg))
                    # TODO: you probably want to plot something here
                    self.txwriter.add_scalar('train/discriminator_loss', disc_losses.avg, current_step)
                    self.txwriter.add_scalar('train/total_loss', tot_losses.avg, current_step)
                    self.txwriter.add_scalar('train/discriminator_acc', train_accuracies.avg, current_step)
                """
                Visualize some images
                """
                if current_step % self.display_freq == 0:
                    name1 = '{0}_{1}_{2}'.format(epoch, current_step, "image1")
                    name2 = '{0}_{1}_{2}'.format(epoch, current_step, "image2")
                    name3 = '{0}_{1}_{2}'.format(epoch, current_step, "gan_image")
                    im1 = denormalizeImage(src_img[0,:,:,:].cpu().numpy())
                    im2 = denormalizeImage(target_img[0,:,:,:].cpu().numpy())
                    im3 = denormalizeImage(generated_img[0,:,:,:].detach().cpu().numpy())
                    self.txwriter.add_image("Image1/"+name1,im1)
                    self.txwriter.add_image("Image2/"+name2,im2)
                    self.txwriter.add_image("GAN/"+name3,im3)
                """
                TODO : Test accuracies
                if current_step % self.test_freq == 0:#self._test_freq-1:
                    self._model.eval()
                    val_accuracy = self.validate()
                    print("Epoch: {} has val accuracy {}".format(epoch, val_accuracy))
                    self.txwriter.add_scalar('test/acc', val_accuracy, current_step)
                """
                """
                Save Model periodically
                """
                if (current_step % self.save_freq == 0) and current_step > 0:
                    save_name = 'model_checkpoint.pth'
                    torch.save(self.gan.state_dict(), save_name)
                    print('Saved model to {}'.format(save_name))
Example #50
0
    def learn(self):
        self.learn_iter += 1
        # for x in self.Actor_target.state_dict().keys():
        #     eval('self.Actor_target.' + x + '.data.mul_((1-TAU))')
        #     eval('self.Actor_target.' + x + '.data.add_(TAU*self.Actor_eval.' + x + '.data)')
        # for x in self.Critic_target.state_dict().keys():
        #     eval('self.Critic_target.' + x + '.data.mul_((1-TAU))')
        #     eval('self.Critic_target.' + x + '.data.add_(TAU*self.Critic_eval.' + x + '.data)')

        # for target_param, param in zip(net_target.parameters(), net.parameters()):
        #     target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
        # for k, v in self.eval_critic_net.state_dict().items():
        #     self.target_critic_net.state_dict()[k].copy_(self.tau * v + (1-self.tau) * self.target_critic_net.state_dict()[k])
        # for k, v in self.eval_actor_net.state_dict().items():
        #     self.target_actor_net.state_dict()[k].copy_(self.tau * v + (1-self.tau) * self.target_actor_net.state_dict()[k])

        batch_data = self.memory.sample(self.batch_size)
        s0, a0, r1, s1 = zip(*batch_data)
        s0 = torch.tensor(s0, dtype=torch.float)
        a0 = torch.tensor(a0, dtype=torch.float).view(self.batch_size,
                                                      len(self.action_bounds))
        r1 = torch.tensor(r1, dtype=torch.float).view(self.batch_size, -1)
        s1 = torch.tensor(s1, dtype=torch.float)

        # Select action according to policy and add clipped noise

        # Input (s, a), output q
        q_s0_a0_1 = self.eval_critic_net1(s0, a0)
        q_s0_a0_2 = self.eval_critic_net2(s0, a0)
        # Input (s_, a_), output q_ for q_target
        # 得到a_
        noise = (torch.randn_like(a0) * self.policy_noise_clip * 2).clamp(
            -self.policy_noise_clip, self.policy_noise_clip)
        a1 = self.target_actor_net(s1).detach() + noise
        action_bound = self.action_bounds.expand_as(a1)
        a1[a1 < -action_bound] = -action_bound[a1 < -action_bound]
        a1[a1 > action_bound] = action_bound[a1 > action_bound]

        q_s1_a1_1 = self.target_critic_net1(s1, a1).detach()
        q_s1_a1_2 = self.target_critic_net2(s1, a1).detach()
        q_s1_a1 = torch.min(q_s1_a1_1, q_s1_a1_2)
        q_target = r1 + self.gamma * q_s1_a1

        loss_critic = nn.MSELoss()(q_s0_a0_1, q_target) + nn.MSELoss()(
            q_s0_a0_2, q_target)

        # critic 学习过程
        # # td_error=R + GAMMA * ct(bs_,at(bs_))-ce(s,ba) 更新ce ,
        # 但这个ae(s)是记忆中的ba,让ce得出的Q靠近Q_target,让评价更准确
        # loss = (Q(st, at) - (rt + r*Q'(st+1, u'(st+1))))**2
        self.optimizer_critic1.zero_grad()
        self.optimizer_critic2.zero_grad()
        loss_critic.backward()
        self.optimizer_critic1.step()
        self.optimizer_critic2.step()
        loss_actor = 0
        # actor 学习过程
        # https://zhuanlan.zhihu.com/p/84321382
        # Delayed policy updates
        if self.learn_iter % self.policy_delay == 0:
            actor_a = self.eval_actor_net(s0)
            critic_q = self.eval_critic_net1(s0, actor_a)
            # loss=-q=-ce(s,ae(s))更新ae   ae(s)=a   ae(s_)=a_
            # 如果 a是一个正确的行为的话,那么它的Q应该更贴近0
            loss_actor = -torch.mean(critic_q)

            self.optimizer_actor.zero_grad()
            loss_actor.backward()
            self.optimizer_actor.step()
            # Update the frozen target models
            for param, target_param in zip(
                    self.eval_critic_net1.parameters(),
                    self.target_critic_net1.parameters()):
                target_param.data.copy_(self.tau * param.data +
                                        (1 - self.tau) * target_param.data)
            for param, target_param in zip(
                    self.eval_critic_net2.parameters(),
                    self.target_critic_net2.parameters()):
                target_param.data.copy_(self.tau * param.data +
                                        (1 - self.tau) * target_param.data)
            for param, target_param in zip(self.eval_actor_net.parameters(),
                                           self.target_actor_net.parameters()):
                target_param.data.copy_(self.tau * param.data +
                                        (1 - self.tau) * target_param.data)

        return loss_critic, loss_actor
Example #51
0
def kl_loss(code):
    return torch.mean(torch.pow(code, 2))
Example #52
0
            w_comp_p = int(w_p / 16) + 1
            h_mask_p.append(h_comp_p)
            w_mask_p.append(w_comp_p)

        x = x.cuda()
        y = y.cuda()
        xp = xp.cuda()
        # out is CNN featuremaps
        output_highfeature = encoder(x)
        # print("out: ", output_highfeature.size())
        # print("output_highfeature: ", output_highfeature.size())
        output_highfeature_p = encoder(xp)
        x_mean = []
        xp_mean = []
        for i, j in zip(output_highfeature, output_highfeature_p):
            x_mean.append(float(torch.mean(i)))
            xp_mean.append(float(torch.mean(j)))
        # x_mean = torch.mean(output_highfeature)
        # x_mean = float(x_mean)
        for i in range(batch_size):
            decoder_hidden_init[i] = decoder_hidden_init[i] * x_mean[i]
            decoder_hidden_init[i] = torch.tanh(decoder_hidden_init[i])
            decoder_hidden_init_p[i] = decoder_hidden_init_p[i] * xp_mean[i]
            decoder_hidden_init_p[i] = torch.tanh(decoder_hidden_init_p[i])
            # decoder_hidden_init[]

        # dense_input is height and output_area is width which is bb
        output_area1 = output_highfeature.size()
        output_area1_p = output_highfeature_p.size()

        output_area = output_area1[3]
Example #53
0
def top1gating(
        logits: Tensor,
        capacity_factor: float,
        min_capacity: int,
        used_token: Tensor = None,
        noisy_gate_policy: Optional[str] = None,
        drop_tokens: bool = True,
        use_rts: bool = True,
        use_tutel: bool = False) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
    """Implements Top1Gating on logits."""
    if noisy_gate_policy == 'RSample':
        logits_w_noise = logits + gumbel_rsample(logits.shape,
                                                 device=logits.device)
    # everything is in fp32 in this function
    gates = F.softmax(logits, dim=1)

    capacity = _capacity(gates, torch.tensor(capacity_factor),
                         torch.tensor(min_capacity))

    # Create a mask for 1st's expert per token
    # noisy gating
    indices1_s = torch.argmax(
        logits_w_noise if noisy_gate_policy == 'RSample' else gates, dim=1)
    num_experts = int(gates.shape[1])
    mask1 = F.one_hot(indices1_s, num_classes=num_experts)

    # mask only used tokens
    if used_token is not None:
        mask1 = einsum("s,se->se", used_token, mask1)

    # gating decisions
    exp_counts = torch.sum(mask1, dim=0).detach().to('cpu')

    # if we don't want to drop any tokens
    if not drop_tokens:
        new_capacity = torch.max(exp_counts).to(logits.device)
        dist.all_reduce(new_capacity,
                        op=dist.ReduceOp.MAX,
                        group=dist.group.WORLD)
        capacity = new_capacity

    # Compute l_aux
    me = torch.mean(gates, dim=0)
    ce = torch.mean(mask1.float(), dim=0)
    l_aux = torch.sum(me * ce) * num_experts

    # Random Token Selection
    if use_rts:
        uniform = exp_selection_uniform_map.get(logits.device)
        if uniform is None:
            uniform = torch.distributions.uniform.Uniform(
                low=torch.tensor(0.0, device=logits.device),
                high=torch.tensor(1.0, device=logits.device)).rsample
            exp_selection_uniform_map[logits.device] = uniform

        mask1_rand = mask1 * uniform(mask1.shape)
    else:
        mask1_rand = mask1

    assert logits.shape[
        0] >= min_capacity, "No. of tokens (batch-size) should be greater than min_capacity. Either set min_capacity to 0 or increase your batch size."

    top_idx = _top_idx(mask1_rand, capacity)

    new_mask1 = mask1 * torch.zeros_like(mask1).scatter_(0, top_idx, 1)
    mask1 = new_mask1

    if use_tutel:
        # Tutel doesn't support index values masked with zero
        # so we need to replace masked indices with -1
        indices_mask = mask1.sum(dim=1) * num_experts - 1
        indices1_s = torch.min(indices1_s, indices_mask)

    # Compute locations in capacity buffer
    if use_tutel:
        locations1 = tutel_moe.fast_cumsum_sub_one(mask1)
    else:
        locations1 = torch.cumsum(mask1, dim=0) - 1

    if use_tutel:
        gates1_s = (gates * mask1).sum(dim=1)
        locations1_s = torch.sum(locations1 * mask1, dim=1)
        return l_aux, capacity, num_experts, [
            indices1_s,
        ], [
            locations1_s,
        ], [
            gates1_s,
        ], exp_counts

    # Store the capacity location for each token
    locations1_s = torch.sum(locations1 * mask1, dim=1)

    # Normalize gate probabilities
    mask1_float = mask1.float()
    gates = gates * mask1_float

    locations1_sc = _one_hot_to_float(locations1_s, capacity)
    combine_weights = einsum("se,sc->sec", gates, locations1_sc)

    dispatch_mask = combine_weights.bool()

    return l_aux, combine_weights, dispatch_mask, exp_counts
    def __train_pass__(self, epoch, is_training=True):
        st_epoch = time.time()
        if (is_training):
            self.G.train()
            status = 'TRAIN'
        else:
            self.G.eval()
            status = 'EVAL'

        g_time = 0.0
        for i, batch in enumerate(self.dataloader):
            if (i >= len(self.dataloader) - 2):
                break
            st_batch = time.time()

            if (self.opt_parser.comb_fan_awing):
                image_in, image_out, fan_pred_landmarks = batch
                fan_pred_landmarks = fan_pred_landmarks.reshape(
                    -1, 68, 3).detach().cpu().numpy()
            elif (self.opt_parser.add_audio_in):
                image_in, image_out, audio_in = batch
                audio_in = audio_in.reshape(-1, 1, 256, 256).to(device)
            else:
                image_in, image_out = batch

            with torch.no_grad():
                # # online landmark (AwingNet)
                image_in, image_out = \
                    image_in.reshape(-1, 3, 256, 256).to(device), image_out.reshape(-1, 3, 256, 256).to(device)
                inputs = image_out
                outputs, boundary_channels = self.fa_model(inputs)
                pred_heatmap = outputs[-1][:, :-1, :, :].detach().cpu()
                pred_landmarks, _ = get_preds_fromhm(pred_heatmap)
                pred_landmarks = pred_landmarks.numpy() * 4

                # online landmark (FAN) -> replace jaw + eye brow in AwingNet
                if (self.opt_parser.comb_fan_awing):
                    fl_jaw_eyebrow = fan_pred_landmarks[:, 0:27, 0:2]
                    fl_rest = pred_landmarks[:, 51:, :]
                    pred_landmarks = np.concatenate([fl_jaw_eyebrow, fl_rest],
                                                    axis=1).astype(np.int)

            # draw landmark on while bg
            img_fls = []
            for pred_fl in pred_landmarks:
                img_fl = np.ones(shape=(256, 256, 3)) * 255.0
                if (self.opt_parser.comb_fan_awing):
                    img_fl = vis_landmark_on_img74(img_fl, pred_fl)  # 74x2
                else:
                    img_fl = vis_landmark_on_img98(img_fl, pred_fl)  # 98x2
                img_fls.append(img_fl.transpose((2, 0, 1)))
            img_fls = np.stack(img_fls, axis=0).astype(np.float32) / 255.0
            image_fls_in = torch.tensor(img_fls,
                                        requires_grad=False).to(device)

            if (self.opt_parser.add_audio_in):
                # print(image_fls_in.shape, image_in.shape, audio_in.shape)
                image_in = torch.cat([image_fls_in, image_in, audio_in], dim=1)
            else:
                image_in = torch.cat([image_fls_in, image_in], dim=1)

            # image_in, image_out = \
            #     image_in.reshape(-1, 6, 256, 256).to(device), image_out.reshape(-1, 3, 256, 256).to(device)

            # image2image net fp
            g_out = self.G(image_in)
            g_out = torch.tanh(g_out)

            loss_l1 = self.criterionL1(g_out, image_out)
            loss_vgg, loss_style = self.criterionVGG(g_out,
                                                     image_out,
                                                     style=True)

            loss_vgg, loss_style = torch.mean(loss_vgg), torch.mean(loss_style)

            loss = loss_l1 + loss_vgg + loss_style
            if (is_training):
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            # log
            if (self.opt_parser.write):
                self.writer.add_scalar('loss',
                                       loss.cpu().detach().numpy(), self.count)
                self.writer.add_scalar('loss_l1',
                                       loss_l1.cpu().detach().numpy(),
                                       self.count)
                self.writer.add_scalar('loss_vgg',
                                       loss_vgg.cpu().detach().numpy(),
                                       self.count)
                self.count += 1

            # save image to track training process
            if (i % self.opt_parser.jpg_freq == 0):
                vis_in = np.concatenate([
                    image_in[0, 3:6].cpu().detach().numpy().transpose(
                        (1, 2, 0)),
                    image_in[0, 0:3].cpu().detach().numpy().transpose(
                        (1, 2, 0))
                ],
                                        axis=1)
                vis_out = np.concatenate([
                    image_out[0].cpu().detach().numpy().transpose(
                        (1, 2, 0)), g_out[0].cpu().detach().numpy().transpose(
                            (1, 2, 0))
                ],
                                         axis=1)
                vis = np.concatenate([vis_in, vis_out], axis=0)
                try:
                    os.makedirs(
                        os.path.join(self.opt_parser.jpg_dir,
                                     self.opt_parser.name))
                except:
                    pass
                cv2.imwrite(
                    os.path.join(self.opt_parser.jpg_dir, self.opt_parser.name,
                                 'e{:03d}_b{:04d}.jpg'.format(epoch, i)),
                    vis * 255.0)
            # save ckpt
            if (i % self.opt_parser.ckpt_last_freq == 0):
                self.__save_model__('last', epoch)

            print(
                "Epoch {}, Batch {}/{}, loss {:.4f}, l1 {:.4f}, vggloss {:.4f}, styleloss {:.4f} time {:.4f}"
                .format(epoch, i,
                        len(self.dataset) // self.opt_parser.batch_size,
                        loss.cpu().detach().numpy(),
                        loss_l1.cpu().detach().numpy(),
                        loss_vgg.cpu().detach().numpy(),
                        loss_style.cpu().detach().numpy(),
                        time.time() - st_batch))

            g_time += time.time() - st_batch

            if (self.opt_parser.test_speed):
                if (i >= 100):
                    break

        print('Epoch time usage:',
              time.time() - st_epoch, 'I/O time usage:',
              time.time() - st_epoch - g_time, '\n=========================')
        if (self.opt_parser.test_speed):
            exit(0)
        if (epoch % self.opt_parser.ckpt_epoch_freq == 0):
            self.__save_model__('{:02d}'.format(epoch), epoch)
def optimize_model():

    if len(memory) < BATCH_SIZE:
        return

    transitions = memory.sample(BATCH_SIZE)
    batch = Transition(*zip(*transitions))

    # Compute a mask of non-final states and concatenate the batch elements
    non_final_mask = torch.tensor(tuple(
        map(lambda s: s is not None, batch.next_state)),
                                  device=device,
                                  dtype=torch.uint8)

    # Divide memory into different tensors
    non_final_next_states = torch.cat(
        [s for s in batch.next_state if s is not None])
    state_batch = torch.cat(batch.state)
    action_batch = torch.cat(batch.action).view(BATCH_SIZE, 1)
    reward_batch = torch.cat(batch.reward)

    # Create state-action (s,a) tensor for input into the critic network with taken actions
    state_action = torch.cat([state_batch, action_batch], -1)

    # Compute Q(s,a) using critic network
    state_action_values = critic_nn(state_action)

    # Compute deterministic next state action using actor target network
    next_action = target_actor_nn(non_final_next_states).detach()

    # Compute next timestep state-action (s,a) tensor for non-final next states
    next_state_action = torch.zeros(BATCH_SIZE, 4, device=device)
    next_state_action[non_final_mask, :] = torch.cat(
        [non_final_next_states, next_action], -1)

    # Compute next state values at t+1 using target critic network
    next_state_values = target_critic_nn(next_state_action).detach()

    # Compute expected state action values y[i]= r[i] + Q'(s[i+1], a[i+1])
    expected_state_action_values = reward_batch.view(
        BATCH_SIZE, 1) + GAMMA * next_state_values

    # Critic loss by mean squared error
    loss_critic = F.mse_loss(state_action_values, expected_state_action_values)

    # Optimize the critic network
    optimizer_critic.zero_grad()
    loss_critic.backward()

    for param in critic_nn.parameters():
        param.grad.data.clamp_(-1, 1)

    optimizer_critic.step()

    #optimize actor
    # Actor actions
    state_actor_action = actor_nn(state_batch)
    # State-actor-actions tensor
    state_actor_action_values = torch.cat([state_batch, state_actor_action],
                                          -1)
    # Loss
    loss_actor = -1 * torch.mean(critic_nn(state_actor_action_values))
    optimizer_actor.zero_grad()
    loss_actor.backward()
    for param in actor_nn.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer_actor.step()

    # Soft parameter update
    update_targets(target_critic_nn, critic_nn)
    update_targets(target_actor_nn, actor_nn)
Example #56
0
 def forward(self, true, pred):
     loss = true - pred
     return torch.mean(torch.log(torch.cosh(loss + 1e-12)))
Example #57
0
 def forward(self, x):
     mean = torch.mean(x, 1, keepdim=True)
     variance = torch.mean((x - mean)**2, 1, keepdim=True)
     x = (x - mean) * torch.rsqrt(variance + self.eps)
     x = x * self.gamma + self.beta
     return x
Example #58
0
 def forward(self, true, pred):
     return torch.mean(torch.abs(true - pred))
Example #59
0
    def forward(self, map_pred, map_gtd):  # map_pred : input prediction saliency map, map_gtd : input ground truth density map
        map_pred = map_pred.float()
        map_gtd = map_gtd.float()

        map_pred = map_pred.view(1, -1) # change the map_pred into a tensor with n rows and 1 cols
        map_gtd = map_gtd.view(1, -1) # change the map_pred into a tensor with n rows and 1 cols

        min1 = torch.min(map_pred)
        max1 = torch.max(map_pred)
        # print("min1 and max1 are (saliecny map):", min1, max1)
        map_pred = (map_pred - min1) / (max1 - min1 + self.epsilon) # min-max normalization for keeping KL loss non-NAN

        min2 = torch.min(map_gtd)
        max2 = torch.max(map_gtd)
        # print("min2 and max2 are (fixation points) :", min2, max2)
        map_gtd = (map_gtd - min2) / (max2 - min2 + self.epsilon) # min-max normalization for keeping KL loss non-NAN
        
        map_gtd_id_1 = torch.gt(map_gtd, 0.5)
        map_gtd_id_0 = torch.lt(map_gtd, 0.5)
        map_gtd_id_00 = torch.eq(map_gtd, 0.5)
        map_gtd[map_gtd_id_1] = 1.0
        map_gtd[map_gtd_id_0] = 0.0
        map_gtd[map_gtd_id_00] = 0.0

        map_pred_mean = torch.mean(map_pred) # calculating the mean value of tensor
        map_pred_mean = map_pred_mean.item() # change the tensor into a number

        map_pred_std = torch.std(map_pred) # calculate the standard deviation
        map_pred_std = map_pred_std.item() # change the tensor into a number 

        map_pred = (map_pred - map_pred_mean) / (map_pred_std + self.epsilon) # normalization

        NSS = map_pred * map_gtd
        # print("early NSS is :", NSS)
        '''
        dim_NSS = NSS.size()
        print("dim_NSS is :", dim_NSS)
        dim_NSS = dim_NSS[1]
        sum_nss = 0
        dim_sum = 0
        
        for idxnss in range(0, dim_NSS):
            if (NSS[0, idxnss] > 0.05): # # should not be 0, because there are a lot of 0.00XXX in map1_NSS due to float format
                sum_nss += NSS[0, idxnss]
                dim_sum += 1
        
        NSS = sum_nss / dim_sum
        '''
        # NSS = NSS # should not add anythin, because there are a lot of 0.00XXX in map1_NSS due to float format
        # id = torch.nonzero(NSS)
        id = torch.gt(NSS, 0.1) # find out the id of NSS > 0.1
        bignss = NSS[id]
        # print(bignss)
        if(len(bignss) == 0): # NSS[id] is empty 
            id = torch.gt(NSS, -0.00000001) # decrease the threshold, because must set it as tensor not inter
            bignss = NSS[id]
        # NSS = torch.sum(NSS[id])
        # NSS = torch.mean(NSS)
        NSS = torch.mean(bignss)
        
        NSS = -NSS # the bigger NSS the better
        return NSS 
Example #60
0
 def forward(self, input):
     return input / torch.sqrt(
         torch.mean(input**2, dim=1, keepdim=True) + 1e-8)