コード例 #1
0
ファイル: rejector.py プロジェクト: lewisKit/pyro
 def rsample(self, sample_shape=torch.Size()):
     # Implements parallel batched accept-reject sampling.
     x = self.propose(sample_shape) if sample_shape else self.propose()
     log_prob_accept = self.log_prob_accept(x)
     probs = torch.exp(log_prob_accept).clamp_(0.0, 1.0)
     done = torch.bernoulli(probs).byte()
     while not done.all():
         proposed_x = self.propose(sample_shape) if sample_shape else self.propose()
         log_prob_accept = self.log_prob_accept(proposed_x)
         prob_accept = torch.exp(log_prob_accept).clamp_(0.0, 1.0)
         accept = torch.bernoulli(prob_accept).byte() & ~done
         if accept.any():
             x[accept] = proposed_x[accept]
             done |= accept
     return x
コード例 #2
0
    def pixelcnn_generate(self, z1, z2):
        # Sampling from PixelCNN
        x_zeros = torch.zeros(
            (z1.size(0), self.args.input_size[0], self.args.input_size[1], self.args.input_size[2]))
        if self.args.cuda:
            x_zeros = x_zeros.cuda()

        for i in range(self.args.input_size[1]):
            for j in range(self.args.input_size[2]):
                samples_mean, samples_logvar = self.p_x(Variable(x_zeros, volatile=True), z1, z2)
                samples_mean = samples_mean.view(samples_mean.size(0), self.args.input_size[0], self.args.input_size[1],
                                                 self.args.input_size[2])

                if self.args.input_type == 'binary':
                    probs = samples_mean[:, :, i, j].data
                    x_zeros[:, :, i, j] = torch.bernoulli(probs).float()
                    samples_gen = samples_mean

                elif self.args.input_type == 'gray' or self.args.input_type == 'continuous':
                    binsize = 1. / 256.
                    samples_logvar = samples_logvar.view(samples_mean.size(0), self.args.input_size[0],
                                                         self.args.input_size[1], self.args.input_size[2])
                    means = samples_mean[:, :, i, j].data
                    logvar = samples_logvar[:, :, i, j].data
                    # sample from logistic distribution
                    u = torch.rand(means.size()).cuda()
                    y = torch.log(u) - torch.log(1. - u)
                    sample = means + torch.exp(logvar) * y
                    x_zeros[:, :, i, j] = torch.floor(sample / binsize) * binsize
                    samples_gen = samples_mean

        return samples_gen
コード例 #3
0
ファイル: corrupter.py プロジェクト: cai-lw/KBGAN
 def corrupt(self, src, rel, dst):
     prob = self.bern_prob[rel]
     selection = torch.bernoulli(prob).numpy().astype('int64')
     ent_random = choice(self.n_ent, len(src))
     src_out = (1 - selection) * src.numpy() + selection * ent_random
     dst_out = selection * dst.numpy() + (1 - selection) * ent_random
     return torch.from_numpy(src_out), torch.from_numpy(dst_out)
コード例 #4
0
ファイル: layers.py プロジェクト: AlliedToasters/elko_den
 def forward(self, x):
     if self.training:
         eps = Variable(torch.bernoulli(self.probs) - 0.5)
     else:
         eps = 0.0
     output = F.linear(x, self.W*eps)
     if self.bias is not None:
         output = output + self.bias
     return output
コード例 #5
0
ファイル: binomial.py プロジェクト: lewisKit/pyro
 def sample(self, sample_shape=torch.Size()):
     with torch.no_grad():
         max_count = max(int(self.total_count.max()), 1)
         shape = self._extended_shape(sample_shape) + (max_count,)
         bernoullis = torch.bernoulli(self.probs.unsqueeze(-1).expand(shape))
         if self.total_count.min() != max_count:
             arange = torch.arange(max_count, out=self.total_count.new_empty(max_count))
             mask = arange >= self.total_count.unsqueeze(-1)
             bernoullis.masked_fill_(mask, 0.)
         return bernoullis.sum(dim=-1)
コード例 #6
0
    def decode(self, x):

        for i in range(len(self.second_half_weights)-1):
            pre_act = self.second_half_weights[i](x) #[B,D]
            # pre_act_with_noise = Variable(torch.randn(1, self.arch_2[i][1]).type(self.dtype)) * pre_act
            probs = torch.ones(1, self.arch_2[i][1]) * .5
            pre_act_with_noise = Variable(torch.bernoulli(probs).type(self.dtype)) * pre_act
            x = self.act_func(pre_act_with_noise)
        y_hat = self.second_half_weights[-1](x)
        return y_hat
コード例 #7
0
ファイル: seq2seq.py プロジェクト: chubbymaggie/tranX
    def decode(self, src_encodings, src_sents_len, dec_init_vec, tgt_sents_var):
        """
        compute the final softmax layer at each decoding step
        :param src_encodings: Variable(src_sent_len, batch_size, hidden_size * 2)
        :param src_sents_len: list[int]
        :param dec_init_vec: tuple((batch_size, hidden_size))
        :param tgt_sents_var: Variable(tgt_sent_len, batch_size)
        :return:
            scores: Variable(src_sent_len, batch_size, src_vocab_size)
        """
        new_tensor = src_encodings.data.new
        batch_size = src_encodings.size(1)

        h_tm1 = dec_init_vec
        # (batch_size, query_len, hidden_size * 2)
        src_encodings = src_encodings.permute(1, 0, 2)
        # (batch_size, query_len, hidden_size)
        src_encodings_att_linear = self.att_src_linear(src_encodings)
        # initialize the attentional vector
        att_tm1 = Variable(new_tensor(batch_size, self.hidden_size).zero_(), requires_grad=False)
        # (batch_size, src_sent_len)
        src_sent_masks = nn_utils.length_array_to_mask_tensor(src_sents_len, cuda=self.cuda)

        # (tgt_sent_len, batch_size, embed_size)
        tgt_token_embed = self.tgt_embed(tgt_sents_var)

        scores = []
        # start from `<s>`, until y_{T-1}
        for t, y_tm1_embed in list(enumerate(tgt_token_embed.split(split_size=1)))[:-1]:
            # input feeding: concate y_tm1 and previous attentional vector
            # split() keeps the first dim
            y_tm1_embed = y_tm1_embed.squeeze(0)
            if t > 0 and self.decoder_word_dropout:
                # (batch_size)
                y_tm1_mask = Variable(torch.bernoulli(new_tensor(batch_size).fill_(1 - self.decoder_word_dropout)))
                y_tm1_embed = y_tm1_embed * y_tm1_mask.unsqueeze(1)

            x = torch.cat([y_tm1_embed, att_tm1], 1)

            (h_t, cell_t), att_t, score_t = self.step(x, h_tm1,
                                                      src_encodings, src_encodings_att_linear,
                                                      src_sent_masks=src_sent_masks)

            scores.append(score_t)

            att_tm1 = att_t
            h_tm1 = (h_t, cell_t)

        # (src_sent_len, batch_size, tgt_vocab_size)
        scores = torch.stack(scores)

        return scores
コード例 #8
0
ファイル: corrupter.py プロジェクト: cai-lw/KBGAN
 def corrupt(self, src, rel, dst, keep_truth=True):
     n = len(src)
     prob = self.bern_prob[rel]
     selection = torch.bernoulli(prob).numpy().astype('bool')
     src_out = np.tile(src.numpy(), (self.n_sample, 1)).transpose()
     dst_out = np.tile(dst.numpy(), (self.n_sample, 1)).transpose()
     rel_out = rel.unsqueeze(1).expand(n, self.n_sample)
     if keep_truth:
         ent_random = choice(self.n_ent, (n, self.n_sample - 1))
         src_out[selection, 1:] = ent_random[selection]
         dst_out[~selection, 1:] = ent_random[~selection]
     else:
         ent_random = choice(self.n_ent, (n, self.n_sample))
         src_out[selection, :] = ent_random[selection]
         dst_out[~selection, :] = ent_random[~selection]
     return torch.from_numpy(src_out), rel_out, torch.from_numpy(dst_out)
コード例 #9
0
    def draw(self, N):
        '''
            Draw N samples from multinomial
        '''
        K = self.alias.size(0)

        #kk = torch.LongTensor(np.random.randint(0,K, size=N))
        kk = torch.cuda.LongTensor(N).random_(0, K)
        prob = self.prob.index_select(0, kk)
        alias = self.alias.index_select(0, kk)
        # b is whether a random number is greater than q
        b = torch.bernoulli(prob)
        oq = kk.mul(b.long())
        oj = alias.mul((1-b).long())

        return oq + oj
コード例 #10
0
ファイル: layers.py プロジェクト: ericsolo/python
    def forward(self, x0, x1, x2, x3):
        if self.p > 0 and self.training:
            coef = torch.bernoulli((1.0 - self.p) * torch.ones(8))
            out1 = coef[0] * self.block01(x0) + coef[1] * self.block11(x1) + coef[2] * self.block21(x2)
            out2 = coef[3] * self.block12(x1) + coef[4] * self.block22(x2) + coef[5] * self.block32(x3)
            out3 = coef[6] * self.block23(x2) + coef[7] * self.block33(x3)
        else:
            out1 = (1 - self.p) * (self.block01(x0) + self.block11(x1) + self.block21(x2))
            out2 = (1 - self.p) * (self.block12(x1) + self.block22(x2) + self.block32(x3))
            out3 = (1 - self.p) * (self.block23(x2) + self.block33(x3))

        if self.integrate:
            out1 += x1
            out2 += x2
            out3 += x3

        return x0, self.relu(out1), self.relu(out2), self.relu(out3)
コード例 #11
0
ファイル: training.py プロジェクト: jramapuram/vae_vampprior
def train_vae(epoch, args, train_loader, model, optimizer):
    # set loss to 0
    train_loss = 0
    train_re = 0
    train_kl = 0
    # set model in training mode
    model.train()

    # start training
    if args.warmup == 0:
        beta = 1.
    else:
        beta = 1.* epoch / args.warmup
        if beta > 1.:
            beta = 1.
    print('beta: {}'.format(beta))

    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        # dynamic binarization
        if args.dynamic_binarization:
            x = torch.bernoulli(data)
        else:
            x = data

        # reset gradients
        optimizer.zero_grad()
        # loss evaluation (forward pass)
        loss, RE, KL = model.calculate_loss(x, beta, average=True)
        # backward pass
        loss.backward()
        # optimization
        optimizer.step()

        train_loss += loss.data[0]
        train_re += -RE.data[0]
        train_kl += KL.data[0]

    # calculate final loss
    train_loss /= len(train_loader)  # loss function already averages over batch size
    train_re /= len(train_loader)  # re already averages over batch size
    train_kl /= len(train_loader)  # kl already averages over batch size

    return model, train_loss, train_re, train_kl
コード例 #12
0
    def net(self, x_input):

        output = self.l1(x_input)
        output = self.a1(output)


        # mask = Variable(torch.bernoulli(output.data.new(output.data.size()).fill_(0.5)))
        mask = Variable(torch.bernoulli(output.data.new(1,50).fill_(0.5)))

        # print (mask)
        # fsad

        output = output*mask

        output = self.l2(output)


        return output
コード例 #13
0
ファイル: distributions.py プロジェクト: Northrend/pytorch
 def sample(self):
     return torch.bernoulli(self.probs)
コード例 #14
0
ファイル: gaussian.py プロジェクト: codes-kzhan/sparse-hyper
    def forward_inner(self, input, means, sigmas, values, bias, train=True):

        t0total = time.time()

        rng = tuple(self.out_size) + tuple(input.size()[1:])

        batchsize = input.size()[0]

        # NB: due to batching, real_indices has shape batchsize x K x rank(W)
        #     real_values has shape batchsize x K

        # print('--------------------------------')
        # for i in range(util.prod(sigmas.size())):
        #     print(sigmas.view(-1)[i].data[0])

        # turn the real values into integers in a differentiable way
        t0 = time.time()

        if train:
            if not self.reinforce:
                if self.subsample is None:
                    indices, props, values = self.discretize(
                        means,
                        sigmas,
                        values,
                        rng=rng,
                        additional=self.additional,
                        use_cuda=self.use_cuda,
                        relative_range=self.relative_range)

                    values = values * props
                else:  # select a small proportion of the indices to learn over

                    b, k, r = means.size()

                    prop = torch.cuda.FloatTensor([
                        self.subsample
                    ]) if self.use_cuda else torch.FloatTensor(
                        [self.subsample])

                    selection = None
                    while (selection is None) or (float(selection.sum()) < 1):
                        selection = torch.bernoulli(prop.expand(k)).byte()

                    mselection = selection.unsqueeze(0).unsqueeze(2).expand_as(
                        means)
                    sselection = selection.unsqueeze(0).unsqueeze(2).expand_as(
                        sigmas)
                    vselection = selection.unsqueeze(0).expand_as(values)

                    means_in, means_out = means[mselection].view(
                        b, -1, r), means[~mselection].view(b, -1, r)
                    sigmas_in, sigmas_out = sigmas[sselection].view(
                        b, -1, r), sigmas[~sselection].view(b, -1, r)
                    values_in, values_out = values[vselection].view(
                        b, -1), values[~vselection].view(b, -1)

                    means_out = means_out.detach()
                    values_out = values_out.detach()

                    indices_in, props, values_in = self.discretize(
                        means_in,
                        sigmas_in,
                        values_in,
                        rng=rng,
                        additional=self.additional,
                        use_cuda=self.use_cuda)
                    values_in = values_in * props

                    indices_out = means_out.data.round().long()

                    indices = torch.cat([indices_in, indices_out], dim=1)
                    values = torch.cat([values_in, values_out], dim=1)

            else:  # reinforce approach
                dists = torch.distributions.Normal(means, sigmas)
                samples = dists.sample()

                indices = samples.data.round().long()

                # if the sampling puts the indices out of bounds, we just clip to the min and max values
                indices[indices < 0] = 0

                rngt = torch.tensor(data=rng,
                                    device='cuda' if self.use_cuda else 'cpu')

                maxes = rngt.unsqueeze(0).unsqueeze(0).expand_as(means) - 1
                indices[indices > maxes] = maxes[indices > maxes]

        else:  # not train, just use the nearest indices
            indices = means.round().long()

        if self.use_cuda:
            indices = indices.cuda()

        # # Create bias for permutation matrices
        # TAU = 1
        # if SINKHORN_ITS is not None:
        #     values = values / TAU
        #     for _ in range(SINKHORN_ITS):
        #         values = util.normalize(indices, values, rng, row=True)
        #         values = util.normalize(indices, values, rng, row=False)

        # translate tensor indices to matrix indices
        t0 = time.time()

        # mindices, flat_size = flatten_indices(indices, input.size()[1:], self.out_shape, self.use_cuda)
        mindices, flat_size = flatten_indices_mat(indices,
                                                  input.size()[1:],
                                                  self.out_size)

        # NB: mindices is not an autograd Variable. The error-signal for the indices passes to the hypernetwork
        #     through 'values', which are a function of both the real_indices and the real_values.

        ### Create the sparse weight tensor

        # -- Turns out we don't have autograd over sparse tensors yet (let alone over the constructor arguments). For
        #    now, we'll do a slow, naive multiplication.

        x_flat = input.view(batchsize, -1)
        ly = prod(self.out_size)

        y_flat = torch.cuda.FloatTensor(
            batchsize, ly) if self.use_cuda else FloatTensor(batchsize, ly)
        y_flat.fill_(0.0)

        sparsemult = util.sparsemult(self.use_cuda)

        t0 = time.time()

        # Prevent segfault
        assert not util.contains_nan(values.data)

        bm = self.bmult(flat_size[1], flat_size[0],
                        mindices.size()[1], batchsize, self.use_cuda)
        bfsize = Variable(flat_size * batchsize)

        bfindices = mindices + bm
        bfindices = bfindices.view(1, -1, 2).squeeze(0)
        vindices = Variable(bfindices.t())

        bfvalues = values.view(1, -1).squeeze(0)
        bfx = x_flat.view(1, -1).squeeze(0)

        # print(vindices.size(), bfvalues.size(), bfsize, bfx.size())
        bfy = sparsemult(vindices, bfvalues, bfsize, bfx)

        y_flat = bfy.unsqueeze(0).view(batchsize, -1)

        y_shape = [batchsize]
        y_shape.extend(self.out_size)

        y = y_flat.view(y_shape)  # reshape y into a tensor

        ### Handle the bias
        if self.bias_type == Bias.DENSE:
            y = y + bias
        if self.bias_type == Bias.SPARSE:  # untested!
            pass

        if self.reinforce and train:
            return y, dists, samples
        else:
            return y
コード例 #15
0
 def sample_v(self, y):
     wy = torch.mm(y, self.W)
     activation = wy + self.b.expand_as(wy)
     p_v_given_h = torch.sigmoid(activation)
     return p_v_given_h, torch.bernoulli(p_v_given_h)
コード例 #16
0
ファイル: RNN.py プロジェクト: pfriesch/PhnKWS
    def forward(self, x):

        # Applying Layer/Batch Norm
        if bool(self.rnn_use_laynorm_inp):
            x = self.ln0((x))

        if bool(self.rnn_use_batchnorm_inp):
            x_bn = self.bn0(x.view(x.shape[0] * x.shape[1], x.shape[2]))
            x = x_bn.view(x.shape[0], x.shape[1], x.shape[2])

        for i in range(self.N_rnn_lay):

            # Initial state and concatenation
            if self.bidir:
                h_init = torch.zeros(2 * x.shape[1], self.rnn_lay[i])
                x = torch.cat([x, flip(x, 0)], 1)
            else:
                h_init = torch.zeros(x.shape[1], self.rnn_lay[i])

            # Drop mask initilization (same mask for all time steps)
            if self.test_flag == False:
                drop_mask = torch.bernoulli(
                    torch.Tensor(h_init.shape[0],
                                 h_init.shape[1]).fill_(1 - self.rnn_drop[i]))
            else:
                drop_mask = torch.FloatTensor([1 - self.rnn_drop[i]])

            if self.use_cuda:
                h_init = h_init.cuda()
                drop_mask = drop_mask.cuda()

            # Feed-forward affine transformations (all steps in parallel)
            wh_out = self.wh[i](x)

            # Apply batch norm if needed (all steos in parallel)
            if self.rnn_use_batchnorm[i]:
                wh_out_bn = self.bn_wh[i](wh_out.view(
                    wh_out.shape[0] * wh_out.shape[1], wh_out.shape[2]))
                wh_out = wh_out_bn.view(wh_out.shape[0], wh_out.shape[1],
                                        wh_out.shape[2])

            # Processing time steps
            hiddens = []
            ht = h_init

            for k in range(x.shape[0]):

                # rnn equation
                at = wh_out[k] + self.uh[i](ht)
                ht = self.act[i](at) * drop_mask

                if self.rnn_use_laynorm[i]:
                    ht = self.ln[i](ht)

                hiddens.append(ht)

            # Stacking hidden states
            h = torch.stack(hiddens)

            # Bidirectional concatenations
            if self.bidir:
                h_f = h[:, 0:int(x.shape[1] / 2)]
                h_b = flip(h[:, int(x.shape[1] / 2):x.shape[1]].contiguous(),
                           0)
                h = torch.cat([h_f, h_b], 2)

            # Setup x for the next hidden layer
            x = h

        return x
コード例 #17
0
 def update_noise(self, x):
     self.p = 1 - self.rate
     self.noise.data = torch.bernoulli(self.p.expand(x.shape))
コード例 #18
0
def train_both_networks(num_epochs, dataloader, netD, netG, d_labelSmooth, outputDir,
						model_option =1,binary = False, epoch_interval = 1):
	use_gpu = tc.cuda.is_available()
	
	for epoch in range(num_epochs):
		for i, data in enumerate(dataloader, 0):
			start_iter = time.time()
			
			
			############################
			# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
			# 1A - Train the detective network in the Real Dataset
			###########################
			# train with real
			netD.zero_grad()
			real_cpu, _ = data
			batch_size = real_cpu.size(0)
			input.data.resize_(real_cpu.size()).copy_(real_cpu)
			label.data.resize_(batch_size).fill_(real_label - d_labelSmooth) # use smooth label for discriminator

			output = netD(input)
			errD_real = criterion(output, label)
			errD_real.backward()
			
			#######################################################
		   
			#######################################################
			# 1B - Train the detective network in the False Dataset
			#######################################################
			
			D_x = output.data.mean()
			# train with fake
			noise.data.resize_(batch_size, nz, 1, 1)
			if binary:
				bernoulli_prob.resize_(noise.data.size())
				noise.data.copy_(2*(torch.bernoulli(bernoulli_prob)-0.5))
			else:
				noise.data.normal_(0, 1)
			fake,z_prediction = netG(noise)
			label.data.fill_(fake_label)
			output = netD(fake.detach()) # add ".detach()" to avoid backprop through G
			errD_fake = criterion(output, label)
			errD_fake.backward() # gradients for fake/real will be accumulated
			D_G_z1 = output.data.mean()
			errD = errD_real + errD_fake
			optimizerD.step() # .step() can be called once the gradients are computed

			#######################################################

			#######################################################
			# (2) Update G network: maximize log(D(G(z)))
			#  Train the faker with de output from the Detective (but don't train the Detective)
			#############3#########################################
			netG.zero_grad()
			label.data.fill_(real_label) # fake labels are real for generator cost
			output = netD(fake)
			errG = criterion(output, label)
			errG.backward(retain_variables=True) # True if backward through the graph for the second time
			if model_option == 2: # with z predictor
				errG_z = criterion_MSE(z_prediction, noise)
				errG_z.backward()
			D_G_z2 = output.data.mean()
			optimizerG.step()

			end_iter = time.time()
			
			#Print the info
			print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f Elapsed %.2f s'
				  % (epoch, num_epochs, i, len(dataloader),
					 errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2, end_iter-start_iter))
			
			#Save a grid with the pictures from the dataset, up until 64
			if i % 100 == 0:
				# the first 64 samples from the mini-batch are saved.
				vutils.save_image(real_cpu[0:64,:,:,:],
						'%s/real_samples.png' % outputDir, nrow=8)
				fake,_ = netG(fixed_noise)
				vutils.save_image(fake.data[0:64,:,:,:],
						'%s/fake_samples_epoch_%03d.png' % (outputDir, epoch), nrow=8)
		if epoch % epoch_interval == 0:
			# do checkpointing
			torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (outputDir, epoch))
			torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (outputDir, epoch))
コード例 #19
0
def train_both_networks(num_epochs, dataloader, netD, netG, d_labelSmooth, outputDir,
						model_option =1,binary = False, epoch_interval = 1):
	use_gpu = tc.cuda.is_available()
	
	for epoch in range(num_epochs):
		for i, data in enumerate(dataloader, 0):
			start_iter = time.time()
			
			############################
			# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
			# 1A - Train the detective network in the Real Dataset
			###########################
			# train with real
			netD.zero_grad() #zero the gradients
			real_cpu, _ = data #get the batch of images
			batch_size = real_cpu.size(0) #defines, online, the batch size
			input.data.resize_(real_cpu.size()).copy_(real_cpu) # Faz uma copia do batch de imagens no Tensor que está na GPU
			#Faz um tensor do tamanho do batchsize e enche de 1's ou de (1-smoother)'s
			label.data.resize_(batch_size).fill_(real_label - d_labelSmooth) # use smooth label for discriminator 

			output = netD(input) #Makes the predict (foward-pass) with the Detective Network 
			errD_real = criterion(output, label) #Generate the error (isn't just a scalar) for what detective thinks of a true image
			errD_real.backward() #Backpropagate the error of the evaluation on a real image by the Detective Network.
			#######################################################
			#######################################################
			# 1B - Train the detective network in the False Dataset
			#######################################################
			
			D_x = output.data.mean() # Gets the mean of the error in detective evaluations on the real data. 
			# Closer to zero the better. This is a good metric! 
			
			# train with fake
			noise.data.resize_(batch_size, nz, 1, 1) #Cria um ruido de dimensoes (batchsize, dimensionalidade_do_ruido), os
			# 1 e 1 finais é para não dar erro na multiplicação de tensores

			if binary: ## This if-else deals with the distribuition of data to get the random sample 
				bernoulli_prob.resize_(noise.data.size())
				noise.data.copy_(2*(torch.bernoulli(bernoulli_prob)-0.5))
			else:
				noise.data.normal_(0, 1)
			
			fake,z_prediction = netG(noise) # Here we create fake data (images)
			label.data.fill_(fake_label)  #Fills the tensor that is on the GPU with 0's or (0 + smoother)'s
			output = netD(fake.detach()) # add ".detach()" to avoid backprop through G. #Here Detective evaluates the fake images
			errD_fake = criterion(output, label) #Detective calculates the error between the evaluations and the fake label (0) "this number should increase"
			errD_fake.backward() # gradients for fake/real will be accumulated
			D_G_z1 = output.data.mean() #Calculate the error on the evaluations. Faker network wants to increase this and Detective to lower
			errD = errD_real + errD_fake # Sums up the Detective error in real evaluations with fake ones
			optimizerD.step() # .step() can be called once the gradients are computed

			#######################################################

			#######################################################
			# (2) Update G network: maximize log(D(G(z)))
			#  Train the faker with de output from the Detective (but don't train the Detective)
			#############3#########################################

			netG.zero_grad() # Zeros the gradient of the Generative network
			label.data.fill_(real_label) # fake labels are real for generator cost, since the Faker network want its image to look like real ones, therefore their label should be closer to 1 as possible
			output = netD(fake) # Detective network evaluates the fake data
			errG = criterion(output, label) #Calculates the error between 1 and the Detective evaluation on the fake data
			errG.backward(retain_graph=True) # True if backward through the graph for the second time. # Backpropagates the error in the Faker Network.
			
			# If this if is enabled, it propagates the error on the noise_predictor (on Faker Network) as well
			if model_option == 2: # with z predictor
				errG_z = criterion_MSE(z_prediction, noise)
				errG_z.backward()
			
			D_G_z2 = output.data.mean() # Calculates evaluations of the Detective on the fake data generated by the Faker. Faker wants this to increase 
			# as in Detective thinking he is making authentic images
		
			optimizerG.step() #Updates the optimizer

			end_iter = time.time()
			
			#Print the info
			print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f Elapsed %.2f s'
				  % (epoch, num_epochs, i, len(dataloader),
					 errD.data[0], errG.data[0], D_x, D_G_z1, D_G_z2, end_iter-start_iter))
			
			#Save a grid with the pictures from the dataset, up until 64
			if i % 100 == 0:
				# the first 64 samples from the mini-batch are saved.
				vutils.save_image(real_cpu[0:64,:,:,:],
						'%s/real_samples.png' % outputDir, nrow=8)
				fake,_ = netG(fixed_noise)
				vutils.save_image(fake.data[0:64,:,:,:],
						'%s/fake_samples_epoch_%03d.png' % (outputDir, epoch), nrow=8)
		if epoch % epoch_interval == 0:
			# do checkpointing
			torch.save(netG.state_dict(), '%s/netG_epoch_%d.pth' % (outputDir, epoch))
			torch.save(netD.state_dict(), '%s/netD_epoch_%d.pth' % (outputDir, epoch))
コード例 #20
0
def train(epoch,
          train_loader,
          model,
          opt,
          args,
          logger,
          nfef_meter=None,
          nfeb_meter=None):

    model.train()
    train_loss = np.zeros(len(train_loader))
    train_bpd = np.zeros(len(train_loader))

    num_data = 0

    # set warmup coefficient
    beta = min([(epoch * 1.) / max([args.warmup, 1.]), args.max_beta])
    logger.info('beta = {:5.4f}'.format(beta))
    end = time.time()
    for batch_idx, (data, _) in enumerate(train_loader):
        if args.cuda:
            data = data.cuda()

        if args.dynamic_binarization:
            data = torch.bernoulli(data)

        data = data.view(-1, *args.input_size)

        opt.zero_grad()
        x_mean, z_mu, z_var, ldj, z0, zk = model(data,
                                                 is_eval=False,
                                                 epoch=epoch)

        if 'cnf' in args.flow:
            f_nfe = count_nfe(model)

        loss, rec, kl, bpd = calculate_loss(x_mean,
                                            data,
                                            z_mu,
                                            z_var,
                                            z0,
                                            zk,
                                            ldj,
                                            args,
                                            beta=beta)

        loss.backward()

        if 'cnf' in args.flow:
            t_nfe = count_nfe(model)
            b_nfe = t_nfe - f_nfe

            nfef_meter.update(f_nfe)
            nfeb_meter.update(b_nfe)

        train_loss[batch_idx] = loss.item()
        train_bpd[batch_idx] = bpd

        opt.step()

        rec = rec.item()
        kl = kl.item()

        num_data += len(data)

        batch_time = time.time() - end
        end = time.time()

        if batch_idx % args.log_interval == 0:
            if args.input_type == 'binary':
                perc = 100. * batch_idx / len(train_loader)
                log_msg = (
                    'Epoch {:3d} [{:5d}/{:5d} ({:2.0f}%)] | Time {:.3f} | Loss {:11.6f} | '
                    'Rec {:11.6f} | KL {:11.6f}'.format(
                        epoch, num_data, len(train_loader.sampler), perc,
                        batch_time, loss.item(), rec, kl))
            else:
                perc = 100. * batch_idx / len(train_loader)
                tmp = 'Epoch {:3d} [{:5d}/{:5d} ({:2.0f}%)] | Time {:.3f} | Loss {:11.6f} | Bits/dim {:8.6f}'
                log_msg = tmp.format(
                    epoch, num_data, len(train_loader.sampler), perc,
                    batch_time, loss.item(),
                    bpd), '\trec: {:11.3f}\tkl: {:11.6f}'.format(rec, kl)
                log_msg = "".join(log_msg)
            if 'cnf' in args.flow:
                log_msg += ' | NFE Forward {:.0f}({:.1f}) | NFE Backward {:.0f}({:.1f})'.format(
                    f_nfe, nfef_meter.avg, b_nfe, nfeb_meter.avg)
            logger.info(log_msg)

    if args.input_type == 'binary':
        logger.info('====> Epoch: {:3d} Average train loss: {:.4f}'.format(
            epoch,
            train_loss.sum() / len(train_loader)))
    else:
        logger.info(
            '====> Epoch: {:3d} Average train loss: {:.4f}, average bpd: {:.4f}'
            .format(epoch,
                    train_loss.sum() / len(train_loader),
                    train_bpd.sum() / len(train_loader)))

    if 'cnf' not in args.flow:
        return train_loss

    else:
        return train_loss, nfef_meter, nfeb_meter
コード例 #21
0
def test_basic_block():
    block = BasicBlock
    x = Variable(torch.randn(1, 3, 64, 64))
    mask = Variable(torch.bernoulli(torch.rand(1, 1, 64, 64)))
    mask2 = Variable(torch.bernoulli(torch.rand(1, 1, 128, 128)))

    # without upsample
    cfg = {
        'in_channels': 3,
        'out_channels': 4,
        'kernel_size': 3,
        'stride': 2,
        'padding': 1
    }
    # conv-bn-relu
    print('conv-bn-relu')
    b1 = block(False, False, False, **cfg)
    out1 = b1(x)
    assert_block_contains(b1, ['Conv2d', 'BatchNorm2d', 'ReLU'])
    assert_block_not_contains(b1, ['Upsample'])
    assert_size_match(out1.size(), [1, 4, 32, 32])

    # conv-relu
    print('conv-relu')
    b2 = block(False, False, True, **cfg)
    out2 = b2(x)
    assert_block_contains(b2, ['Conv2d', 'ReLU'])
    assert_block_not_contains(b2, ['BatchNorm2d', 'Upsample'])
    assert_size_match(out2.size(), [1, 4, 32, 32])

    # pconv-in-lrelu
    print('pconv-in-lrelu')
    b3 = block(False,
               True,
               False,
               **cfg,
               norm=nn.InstanceNorm2d,
               activation=nn.LeakyReLU(0.2))
    out3, _mask_ = b3(x, mask)
    assert_block_contains(b3, ['PartialConv2d', 'InstanceNorm2d', 'LeakyReLU'])
    assert_block_not_contains(b3, ['BatchNorm2d', 'ReLU', 'Upsample'])
    assert_size_match(out3.size(), [1, 4, 32, 32]) and assert_size_match(
        _mask_.size(), [1, 1, 32, 32])

    # pconv-sigmoid
    print('pconv-sigmoid')
    b4 = block(False, True, True, **cfg, activation=nn.Sigmoid())
    out4, _mask_ = b4(x, mask)
    assert_block_contains(b4, ['PartialConv2d', 'Sigmoid'])
    assert_block_not_contains(b4, ['BatchNorm2d', 'ReLU', 'Upsample'])
    assert_size_match(out4.size(), [1, 4, 32, 32]) and assert_size_match(
        _mask_.size(), [1, 1, 32, 32])

    # with upsample
    cfg = {
        'in_channels': 3,
        'out_channels': 4,
        'kernel_size': 3,
        'stride': 1,
        'padding': 1
    }
    # conv-bn-relu
    print('upsample + conv-bn-relu')
    b1 = block(True, False, False, **cfg)
    out1 = b1(x)
    assert_block_contains(b1, ['Conv2d', 'BatchNorm2d', 'ReLU', 'Upsample'])
    assert_size_match(out1.size(), [1, 4, 128, 128])

    # conv-relu
    print('upsample + conv-relu')
    b2 = block(True, False, True, **cfg)
    out2 = b2(x)
    assert_block_contains(b2, ['Conv2d', 'ReLU', 'Upsample'])
    assert_block_not_contains(b2, ['BatchNorm2d'])
    assert_size_match(out2.size(), [1, 4, 128, 128])

    # pconv-in-lrelu
    print('upsample + pconv-in-lrelu')
    b3 = block(True,
               True,
               False,
               **cfg,
               norm=nn.InstanceNorm2d,
               activation=nn.LeakyReLU(0.2))
    out3, _mask_ = b3(x, mask2)
    assert_block_contains(
        b3, ['PartialConv2d', 'InstanceNorm2d', 'LeakyReLU', 'Upsample'])
    assert_block_not_contains(b3, ['BatchNorm2d', 'ReLU'])
    assert_size_match(out3.size(), [1, 4, 128, 128]) and assert_size_match(
        _mask_.size(), [1, 1, 128, 128])

    # pconv-sigmoid
    print('upsample + pconv-sigmoid')
    b4 = block(True, True, True, **cfg, activation=nn.Sigmoid())
    out4, _mask_ = b4(x, mask2)
    assert_block_contains(b4, ['PartialConv2d', 'Sigmoid', 'Upsample'])
    assert_block_not_contains(b4, ['BatchNorm2d', 'ReLU'])
    assert_size_match(out4.size(), [1, 4, 128, 128]) and assert_size_match(
        _mask_.size(), [1, 1, 128, 128])
コード例 #22
0
def main(args):
    if args.save_path == '':
        make_savepath(args)
        seed(args)

    if args.cuda:
        print('using cuda')

    print(args)

    device = torch.device("cuda" if args.cuda else "cpu")
    args.device = device

    opt_dict = {"not_improved": 0, "lr": 1., "best_loss": 1e4}

    all_data = torch.load(args.data_file)
    x_train, x_val, x_test = all_data

    x_train = x_train.to(device)
    x_val = x_val.to(device)
    x_test = x_test.to(device)
    y_size = 1
    y_train = x_train.new_zeros(x_train.size(0), y_size)
    y_val = x_train.new_zeros(x_val.size(0), y_size)
    y_test = x_train.new_zeros(x_test.size(0), y_size)
    print(torch.__version__)
    train_data = torch.utils.data.TensorDataset(x_train, y_train)
    val_data = torch.utils.data.TensorDataset(x_val, y_val)
    test_data = torch.utils.data.TensorDataset(x_test, y_test)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=args.batch_size,
                                              shuffle=True)
    print('Train data: %d batches' % len(train_loader))
    print('Val data: %d batches' % len(val_loader))
    print('Test data: %d batches' % len(test_loader))
    sys.stdout.flush()

    log_niter = len(train_loader) // 5

    encoder = ResNetEncoderV2(args)
    decoder = PixelCNNDecoderV2(args)

    vae = VAE(encoder, decoder, args).to(device)

    if args.sample_from != '':
        save_dir = "samples/%s" % args.dataset
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        vae.load_state_dict(torch.load(args.sample_from))
        vae.eval()
        with torch.no_grad():
            sample_z = vae.sample_from_prior(400).to(device)
            sample_x, sample_probs = vae.decoder.decode(sample_z, False)
        image_file = 'sample_binary_from_%s.png' % (
            args.sample_from.split('/')[-1][:-3])
        save_image(sample_x.data.cpu(),
                   os.path.join(save_dir, image_file),
                   nrow=20)
        image_file = 'sample_cont_from_%s.png' % (
            args.sample_from.split('/')[-1][:-3])
        save_image(sample_probs.data.cpu(),
                   os.path.join(save_dir, image_file),
                   nrow=20)

        return

    if args.eval:
        print('begin evaluation')
        test_loader = torch.utils.data.DataLoader(test_data,
                                                  batch_size=50,
                                                  shuffle=True)
        vae.load_state_dict(torch.load(args.load_path))
        vae.eval()
        with torch.no_grad():
            test(vae, test_loader, "TEST", args)
            au, au_var = calc_au(vae, test_loader)
            print("%d active units" % au)
            # print(au_var)

            calc_iwnll(vae, test_loader, args)

        return

    enc_optimizer = optim.Adam(vae.encoder.parameters(), lr=0.001)
    dec_optimizer = optim.Adam(vae.decoder.parameters(), lr=0.001)
    opt_dict['lr'] = 0.001

    iter_ = 0
    best_loss = 1e4
    best_kl = best_nll = best_ppl = 0
    decay_cnt = pre_mi = best_mi = mi_not_improved = 0
    aggressive_flag = True if args.aggressive else False
    vae.train()
    start = time.time()

    kl_weight = args.kl_start
    anneal_rate = (1.0 - args.kl_start) / (args.warm_up * len(train_loader))

    for epoch in range(args.epochs):
        report_kl_loss = report_rec_loss = 0
        report_num_examples = 0
        for datum in train_loader:
            batch_data, _ = datum
            batch_data = torch.bernoulli(batch_data)
            batch_size = batch_data.size(0)

            report_num_examples += batch_size

            # kl_weight = 1.0
            kl_weight = min(1.0, kl_weight + anneal_rate)

            sub_iter = 1
            batch_data_enc = batch_data
            burn_num_examples = 0
            burn_pre_loss = 1e4
            burn_cur_loss = 0
            while aggressive_flag and sub_iter < 100:

                enc_optimizer.zero_grad()
                dec_optimizer.zero_grad()

                burn_num_examples += batch_data_enc.size(0)
                loss, loss_rc, loss_kl = vae.loss(batch_data_enc,
                                                  kl_weight,
                                                  nsamples=args.nsamples)

                burn_cur_loss += loss.sum().item()
                loss = loss.mean(dim=-1)

                loss.backward()
                torch.nn.utils.clip_grad_norm_(vae.parameters(), clip_grad)

                enc_optimizer.step()

                id_ = np.random.choice(x_train.size(0),
                                       args.batch_size,
                                       replace=False)

                batch_data_enc = torch.bernoulli(x_train[id_])

                if sub_iter % 10 == 0:
                    burn_cur_loss = burn_cur_loss / burn_num_examples
                    if burn_pre_loss - burn_cur_loss < 0:
                        break
                    burn_pre_loss = burn_cur_loss
                    burn_cur_loss = burn_num_examples = 0

                sub_iter += 1

            # print(sub_iter)

            enc_optimizer.zero_grad()
            dec_optimizer.zero_grad()

            loss, loss_rc, loss_kl = vae.loss(batch_data,
                                              kl_weight,
                                              nsamples=args.nsamples)

            loss = loss.mean(dim=-1)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(vae.parameters(), clip_grad)

            loss_rc = loss_rc.sum()
            loss_kl = loss_kl.sum()

            if not aggressive_flag:
                enc_optimizer.step()

            dec_optimizer.step()

            report_rec_loss += loss_rc.item()
            report_kl_loss += loss_kl.item()

            if iter_ % log_niter == 0:
                train_loss = (report_rec_loss +
                              report_kl_loss) / report_num_examples
                if aggressive_flag or epoch == 0:
                    vae.eval()
                    with torch.no_grad():
                        mi = calc_mi(vae, val_loader)
                        au, _ = calc_au(vae, val_loader)

                    vae.train()

                    print('epoch: %d, iter: %d, avg_loss: %.4f, kl: %.4f, mi: %.4f, recon: %.4f,' \
                           'au %d, time elapsed %.2fs' %
                           (epoch, iter_, train_loss, report_kl_loss / report_num_examples, mi,
                           report_rec_loss / report_num_examples, au, time.time() - start))
                else:
                    print('epoch: %d, iter: %d, avg_loss: %.4f, kl: %.4f, recon: %.4f,' \
                          'time elapsed %.2fs' %
                          (epoch, iter_, train_loss, report_kl_loss / report_num_examples,
                          report_rec_loss / report_num_examples, time.time() - start))
                sys.stdout.flush()

                report_rec_loss = report_kl_loss = 0
                report_num_examples = 0

            iter_ += 1

            if aggressive_flag and (iter_ % len(train_loader)) == 0:
                vae.eval()
                cur_mi = calc_mi(vae, val_loader)
                vae.train()
                if cur_mi - best_mi < 0:
                    mi_not_improved += 1
                    if mi_not_improved == 5:
                        aggressive_flag = False
                        print("STOP BURNING")

                else:
                    best_mi = cur_mi

                pre_mi = cur_mi

        print('kl weight %.4f' % kl_weight)
        print('epoch: %d, VAL' % epoch)

        vae.eval()

        with torch.no_grad():
            loss, nll, kl = test(vae, val_loader, "VAL", args)
            au, au_var = calc_au(vae, val_loader)
            print("%d active units" % au)
            # print(au_var)

        if loss < best_loss:
            print('update best loss')
            best_loss = loss
            best_nll = nll
            best_kl = kl
            torch.save(vae.state_dict(), args.save_path)

        if loss > best_loss:
            opt_dict["not_improved"] += 1
            if opt_dict["not_improved"] >= decay_epoch:
                opt_dict["best_loss"] = loss
                opt_dict["not_improved"] = 0
                opt_dict["lr"] = opt_dict["lr"] * lr_decay
                vae.load_state_dict(torch.load(args.save_path))
                decay_cnt += 1
                print('new lr: %f' % opt_dict["lr"])
                enc_optimizer = optim.Adam(vae.encoder.parameters(),
                                           lr=opt_dict["lr"])
                dec_optimizer = optim.Adam(vae.decoder.parameters(),
                                           lr=opt_dict["lr"])
        else:
            opt_dict["not_improved"] = 0
            opt_dict["best_loss"] = loss

        if decay_cnt == max_decay:
            break

        if epoch % args.test_nepoch == 0:
            with torch.no_grad():
                loss, nll, kl = test(vae, test_loader, "TEST", args)

        vae.train()

    # compute importance weighted estimate of log p(x)
    vae.load_state_dict(torch.load(args.save_path))
    vae.eval()
    with torch.no_grad():
        loss, nll, kl = test(vae, test_loader, "TEST", args)
        au, au_var = calc_au(vae, test_loader)
        print("%d active units" % au)
        # print(au_var)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=50,
                                              shuffle=True)

    with torch.no_grad():
        calc_iwnll(vae, test_loader, args)
コード例 #23
0
    netG.load_state_dict(torch.load(opt.netG))
print(netG)

netD.apply(weights_init)
if opt.netD != '':
    netD.load_state_dict(torch.load(opt.netD))
print(netD)

criterion = nn.BCELoss()
criterion_MSE = nn.MSELoss()

input = torch.FloatTensor(opt.batchSize, 3, opt.imageSize, opt.imageSize)
noise = torch.FloatTensor(opt.batchSize, nz, 1, 1)
if opt.binary:
    bernoulli_prob = torch.FloatTensor(opt.batchSize, nz, 1, 1).fill_(0.5)
    fixed_noise = torch.bernoulli(bernoulli_prob)
else:
    fixed_noise = torch.FloatTensor(opt.batchSize, nz, 1, 1).normal_(0, 1)
label = torch.FloatTensor(opt.batchSize)
real_label = 1
fake_label = 0

if opt.cuda:
    netD.cuda()
    netG.cuda()
    criterion.cuda()
    criterion_MSE.cuda()
    input, label = input.cuda(), label.cuda()
    noise, fixed_noise = noise.cuda(), fixed_noise.cuda()

input = Variable(input)
コード例 #24
0
def forward(numeric, train=True, printHere=False):
    global hidden
    global beginning
    global beginning_chars
    if hidden is None:
        hidden = None
        beginning = zeroBeginning
#       beginning_chars = zeroBeginning_chars
    elif hidden is not None:
        hidden1 = Variable(hidden[0]).detach()
        hidden2 = Variable(hidden[1]).detach()
        forRestart = bernoulli.sample()
        hidden1 = torch.where(
            forRestart.unsqueeze(0).unsqueeze(2) == 1, zeroHidden, hidden1)
        hidden2 = torch.where(
            forRestart.unsqueeze(0).unsqueeze(2) == 1, zeroHidden, hidden2)
        hidden = (hidden1, hidden2)
        beginning = torch.where(
            forRestart.unsqueeze(0) == 1, zeroBeginning, beginning)

#        beginning_chars = torch.where(forRestart.unsqueeze(0).unsqueeze(2) == 1, zeroBeginning_chars, beginning_chars)

    numeric, numeric_chars = numeric

    #      print(numeric.size())
    numeric = numeric.expand(-1, args.NUMBER_OF_REPLICATES)
    numeric = torch.cat([beginning, numeric], dim=0)
    embedded_everything = word_embeddings(numeric)

    # Positional embeddings
    numeric_positions = torch.LongTensor(range(args.sequence_length +
                                               1)).cuda().unsqueeze(1)
    embedded_positions = positional_embeddings(numeric_positions)
    numeric_embedded = memory_word_pos_inter(embedded_positions)
    #      numeric_transformed = memory_mlp_inner_from_pos(numeric_embedded)
    #      print(numeric_transformed.size(), embedded_everything.size())

    # Retention probabilities
    memory_byword_inner = memory_mlp_inner(embedded_everything.detach())
    memory_hidden_logit_per_wordtype = memory_mlp_outer(
        relu(memory_byword_inner))
    #print(embedded_positions.size(), embedded_everything.size())
    #print(memory_bilinear(embedded_positions).size(), embedded_everything.size())
    attention_bilinear_term = torch.bmm(
        memory_bilinear(embedded_positions),
        relu(memory_mlp_inner_bilinear(
            embedded_everything.detach())).transpose(1, 2)).transpose(1, 2)
    #print(

    memory_hidden_logit = numeric_embedded + memory_hidden_logit_per_wordtype + attention_bilinear_term
    #     print("----")
    #      print(numeric_embedded.size(), memory_hidden_logit_per_wordtype.size())
    #      print(positional_embeddings.weight)
    #     print(numeric_embedded)
    #      print(memory_mlp_outer(relu(memory_mlp_inner(embedded_everything.detach()))))
    memory_hidden = sigmoid(memory_hidden_logit)

    #     forWords = memory_mlp_outer(relu(memory_mlp_inner(embedded_everything.detach())))
    #    print(numeric_transformed.size(), forWords.size())
    #   interaction = torch.bmm(memory_word_pos_inter(forWords).transpose(0,1), numeric_transformed.transpose(0,1).transpose(1,2))

    #  memory_hidden = sigmoid(memory_linear_position(numeric_embedded) + interaction + memory_linear_word(forWords))
    # quit()

    #memory_hidden = (numeric_transformed + sigmoid(memory_mlp_outer(relu(memory_mlp_inner(embedded_everything.detach()))))

    # Baseline predictions for prediction loss
    baselineValues = 10 * sigmoid(
        perword_baseline_outer(
            relu(perword_baseline_inner(
                embedded_everything[-1].detach())))).squeeze(1)
    assert tuple(baselineValues.size()) == (args.NUMBER_OF_REPLICATES, )

    # Noise decisions
    memory_filter = torch.bernoulli(input=memory_hidden)
    bernoulli_logprob = torch.where(memory_filter == 1,
                                    torch.log(memory_hidden + 1e-10),
                                    torch.log(1 - memory_hidden + 1e-10))
    bernoulli_logprob_perBatch = bernoulli_logprob.mean(dim=0)
    if args.entropy_weight > 0:
        entropy = -(
            memory_hidden * torch.log(memory_hidden + 1e-10) +
            (1 - memory_hidden) * torch.log(1 - memory_hidden + 1e-10)).mean()
    else:
        entropy = -1.0
    memory_filter = memory_filter.squeeze(2)
    numeric_noised = torch.where(
        memory_filter == 1, numeric, 0 * numeric
    )  #[[x if random.random() > args.deletion_rate else 0 for x in y] for y in numeric.cpu().t()]

    # Input to language model
    input_tensor = Variable(numeric_noised[:-1], requires_grad=False)
    # Target
    target_tensor = Variable(numeric[1:], requires_grad=False)

    #      baselineValues = perword_baseline(target_tensor[-1]).squeeze(1)

    embedded = word_embeddings(input_tensor)
    if TRAIN_LM:
        embedded = char_dropout(embedded)
        mask = bernoulli_input.sample()
        mask = mask.view(1, args.batchSize, 2 * args.word_embedding_size)
        embedded = embedded * mask

    out, hidden = rnn_drop(embedded, hidden)

    # Only aim to predict the last word
    out = out[-1:]

    if TRAIN_LM:
        mask = bernoulli_output.sample()
        mask = mask.view(1, args.batchSize, args.hidden_dim)
        out = out * mask

    logits = output(out)
    log_probs = logsoftmax(logits)

    # Prediction Loss
    lossTensor = print_loss(log_probs.view(
        -1,
        len(itos) + 3), target_tensor[-1].view(-1)).view(
            -1, args.NUMBER_OF_REPLICATES)  # , args.batchSize is 1

    # Reward, term 1
    negativeRewardsTerm1 = lossTensor.mean(dim=0)

    # Reward, term 2
    # Regularization towards lower retention rates
    negativeRewardsTerm2 = memory_filter.mean(dim=0)

    # Overall Reward
    negativeRewardsTerm = negativeRewardsTerm1 + args.RATE_WEIGHT * negativeRewardsTerm2

    # baselineValues: the baselines for the prediction loss (term 1)
    # memory_hidden: baseline for term 2
    # Important to detach all but the baseline values

    # Reward Minus Baseline
    # Detached surprisal and mean retention
    rewardMinusBaseline = (
        negativeRewardsTerm.detach() - baselineValues -
        args.RATE_WEIGHT * memory_hidden.mean(dim=0).squeeze(dim=1).detach())
    # Important to detach from the baseline!!!
    loss = (rewardMinusBaseline.detach() *
            bernoulli_logprob_perBatch.squeeze(1)).mean()
    if args.entropy_weight > 0:
        loss -= args.entropy_weight * entropy

    # Loss for trained baseline
    loss += args.reward_multiplier_baseline * rewardMinusBaseline.pow(2).mean()

    loss += args.bilinear_l2 * (numeric_embedded.pow(2).mean() +
                                memory_hidden_logit_per_wordtype.pow(2).mean()
                                + attention_bilinear_term.pow(2).mean())

    ############################
    # Construct running averages
    factor = 0.9996**args.batchSize

    # Update running averages
    global runningAverageBaselineDeviation
    global runningAveragePredictionLoss
    global runningAverageReward
    global expectedRetentionRate

    expectedRetentionRate = factor * expectedRetentionRate + (
        1 - factor) * float(memory_hidden.mean())
    runningAverageBaselineDeviation = factor * runningAverageBaselineDeviation + (
        1 - factor) * float((rewardMinusBaseline).abs().mean())
    runningAveragePredictionLoss = factor * runningAveragePredictionLoss + (
        1 - factor) * round(float(negativeRewardsTerm1.mean()), 3)
    runningAverageReward = factor * runningAverageReward + (
        1 - factor) * float(negativeRewardsTerm.mean())
    ############################

    if printHere:
        losses = lossTensor.data.cpu().numpy()
        numericCPU = numeric.cpu().data.numpy()
        numeric_noisedCPU = numeric_noised.cpu().data.numpy()
        memory_hidden_CPU = memory_hidden[:, 0, 0].cpu().data.numpy()
        memory_hidden_logit_per_wordtype_cpu = memory_hidden_logit_per_wordtype.cpu(
        ).data
        attention_bilinear_term = attention_bilinear_term.cpu().data
        numeric_embedded_cpu = numeric_embedded.cpu().data
        print(("NONE", itos_total[numericCPU[0][0]]))
        for i in range((args.sequence_length)):
            print(
                (losses[0][0] if i == args.sequence_length - 1 else None,
                 itos_total[numericCPU[i + 1][0]],
                 itos_total[numeric_noisedCPU[i + 1][0]], memory_hidden_CPU[i +
                                                                            1],
                 float(baselineValues[0]) if i == args.sequence_length -
                 1 else "", float(numeric_embedded_cpu[i + 1, 0, 0]),
                 float(memory_hidden_logit_per_wordtype_cpu[i + 1, 0, 0]),
                 float(attention_bilinear_term[i + 1, 0, 0])))

        print(lossTensor.view(-1))
        print(baselineValues.view(-1))
        print("EMPIRICAL DEVIATION FROM BASELINE",
              (lossTensor - baselineValues).abs().mean())

        print("PREDICTION_LOSS", runningAveragePredictionLoss, "\tTERM2",
              round(float(negativeRewardsTerm2.mean()),
                    3), "\tAVERAGE_RETENTION", expectedRetentionRate,
              "\tDEVIATION FROM BASELINE", runningAverageBaselineDeviation,
              "\tREWARD", runningAverageReward, "\tENTROPY", float(entropy))
    if updatesCount % 5000 == 0:
        print("\t".join([
            str(x)
            for x in ("PREDICTION_LOSS", runningAveragePredictionLoss,
                      "\tTERM2", round(float(negativeRewardsTerm2.mean()), 3),
                      "\tAVERAGE_RETENTION", expectedRetentionRate,
                      "\tDEVIATION FROM BASELINE",
                      runningAverageBaselineDeviation, "\tREWARD",
                      runningAverageReward, "\tENTROPY", float(entropy))
        ]),
              file=sys.stderr)

    #runningAveragePredictionLoss = 0.95 * runningAveragePredictionLoss + (1-0.95) * float(negativeRewardsTerm1.mean())

    return loss, target_tensor.view(-1).size()[0]
コード例 #25
0
def main(seed=0, p_destroy=0):

    model = '0_16_2_250_4_0.01_0.99_60000_250.0_250_1.0_0.05_1e-07_0.5_0.2_10_250.pt'

    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    torch.cuda.manual_seed_all(seed)

    crop = 4
    time = 250
    n_filters = 250
    intensity = 0.5
    n_examples = 10000
    n_classes = 10

    # Load network.
    network = load_network(
        os.path.join(
            ROOT_DIR, 'params', 'mnist', 'crop_locally_connected', model
        ), learning=False
    )

    network.connections['X', 'Y'].update_rule = NoOp(
        connection=network.connections['X', 'Y'], nu=network.connections['X', 'Y'].nu
    )
    network.layers['Y'].theta_decay = 0
    network.layers['Y'].theta_plus = 0
    network.connections['X', 'Y'].norm = None

    for l in network.layers:
        network.layers[l].dt = network.dt

    for c in network.connections:
        network.connections[c].dt = network.dt

    network.layers['Y'].lbound = None
    network.layers['Y'].one_spike = True

    # Destroy `p_destroy` percentage of synapses (set to 0).
    mask = torch.bernoulli(p_destroy * torch.ones(network.connections['X', 'Y'].w.size())).byte()
    network.connections['X', 'Y'].w[mask] = 0

    conv_size = network.connections['X', 'Y'].conv_size
    conv_prod = int(np.prod(conv_size))
    n_neurons = n_filters * conv_prod

    # Voltage recording for excitatory and inhibitory layers.
    voltage_monitor = Monitor(network.layers['Y'], ['v'], time=time)
    network.add_monitor(voltage_monitor, name='output_voltage')

    # Load MNIST data.
    dataset = MNIST(path=data_path, download=True, shuffle=True)

    images, labels = dataset.get_test()
    images *= intensity
    images = images[:, crop:-crop, crop:-crop]

    update_interval = 250

    # Record spikes during the simulation.
    spike_record = torch.zeros(update_interval, time, n_neurons)

    # Neuron assignments and spike proportions.
    path = os.path.join(
        ROOT_DIR, 'params', 'mnist', 'crop_locally_connected', f'auxiliary_{model}'
    )
    assignments, proportions, rates, ngram_scores = torch.load(open(path, 'rb'))

    # Sequence of accuracy estimates.
    curves = {'all': [], 'proportion': [], 'ngram': []}
    predictions = {
        scheme: torch.Tensor().long() for scheme in curves.keys()
    }

    spikes = {}
    for layer in set(network.layers):
        spikes[layer] = Monitor(network.layers[layer], state_vars=['s'], time=time)
        network.add_monitor(spikes[layer], name=f'{layer}_spikes')

    start = t()
    for i in range(n_examples):
        if i % 10 == 0:
            print(f'Progress: {i} / {n_examples} ({t() - start:.4f} seconds)')
            start = t()

        if i % update_interval == 0 and i > 0:
            if i % len(labels) == 0:
                current_labels = labels[-update_interval:]
            else:
                current_labels = labels[i % len(images) - update_interval:i % len(images)]

            # Update and print accuracy evaluations.
            curves, preds = update_curves(
                curves, current_labels, n_classes, spike_record=spike_record, assignments=assignments,
                proportions=proportions, ngram_scores=ngram_scores, n=2
            )
            print_results(curves)

            for scheme in preds:
                predictions[scheme] = torch.cat([predictions[scheme], preds[scheme]], -1)

        # Get next input sample.
        image = images[i % len(images)].contiguous().view(-1)
        sample = poisson(datum=image, time=time, dt=1)
        inpts = {'X': sample}

        # Run the network on the input.
        network.run(inpts=inpts, time=time)

        retries = 0
        while spikes['Y'].get('s').sum() < 5 and retries < 3:
            retries += 1
            image *= 2
            sample = poisson(datum=image, time=time, dt=1)
            inpts = {'X': sample}
            network.run(inpts=inpts, time=time)

        # Add to spikes recording.
        spike_record[i % update_interval] = spikes['Y'].get('s').t()

        network.reset_()  # Reset state variables.

    print(f'Progress: {n_examples} / {n_examples} ({t() - start:.4f} seconds)')

    i += 1

    if i % len(labels) == 0:
        current_labels = labels[-update_interval:]
    else:
        current_labels = labels[i % len(images) - update_interval:i % len(images)]

    # Update and print accuracy evaluations.
    curves, preds = update_curves(
        curves, current_labels, n_classes, spike_record=spike_record, assignments=assignments,
        proportions=proportions, ngram_scores=ngram_scores, n=2
    )
    print_results(curves)

    for scheme in preds:
        predictions[scheme] = torch.cat([predictions[scheme], preds[scheme]], -1)

    print('Average accuracies:\n')
    for scheme in curves.keys():
        print('\t%s: %.2f' % (scheme, float(np.mean(curves[scheme]))))

    # Save results to disk.
    results = [
        np.mean(curves['all']), np.mean(curves['proportion']), np.mean(curves['ngram']),
        np.max(curves['all']), np.max(curves['proportion']), np.max(curves['ngram'])
    ]

    to_write = [str(x) for x in [seed, p_destroy] + results]
    name = 'synapse_robust.csv'

    if not os.path.isfile(os.path.join(results_path, name)):
        with open(os.path.join(results_path, name), 'w') as f:
            f.write(
                'random_seed,p_destroy\n'
            )

    with open(os.path.join(results_path, name), 'a') as f:
        f.write(','.join(to_write) + '\n')
コード例 #26
0
ファイル: dvrl_modules.py プロジェクト: isaurabh19/dvrl
    def training_step(self, batch, batch_idx):
        is_corrupted = None
        if len(batch) == 2:
            x, y = batch
        else:
            x, y, is_corrupted = batch

        estimated_dv = torch.sigmoid(self(x, y)).squeeze()

        selection_vector = torch.bernoulli(estimated_dv).detach()

        if selection_vector.sum() == 0:
            # exception when selection probability is 0
            estimated_dv_ = 0.5 * torch.ones_like(estimated_dv)
            selection_vector = torch.bernoulli(estimated_dv_).detach()

        # calling detach here since we don't want to track gradients of ops in prediction model wrt to dve
        training_accuracy = self.prediction_model.dvrl_fit(
            x, y, selection_vector)

        log_prob = torch.sum(
            selection_vector * torch.log(estimated_dv + self.hparams.epsilon) +
            (1.0 - selection_vector) *
            torch.log(1.0 - estimated_dv + self.hparams.epsilon))

        exploration_bonus = torch.max(
            torch.mean(estimated_dv.squeeze()) - self.exploration_threshold,
            torch.tensor(0.0, device=estimated_dv.device)) + torch.max(
                (1.0 - self.exploration_threshold) -
                torch.mean(estimated_dv.squeeze()),
                torch.tensor(0.0, device=estimated_dv.device))

        cross_entropy_loss_sum = 0.0

        accuracy_tracker = pl.metrics.Accuracy(compute_on_step=False)

        if is_corrupted is not None:
            with torch.no_grad():
                self.dve.eval()
                corrupted_indices = torch.where(is_corrupted)[0]
                clean_indices = torch.where(~is_corrupted)[0]

                self.log('mean_corrupted_dve',
                         torch.sigmoid(
                             self(x[corrupted_indices],
                                  y[corrupted_indices])).mean(),
                         prog_bar=True)
                self.log('mean_clean_dve',
                         torch.sigmoid(self(x[clean_indices],
                                            y[clean_indices])).mean(),
                         prog_bar=True)

            self.dve.train()

        for val_batch in self.validation_dataloader:
            if len(val_batch) == 2:
                x_val, y_val = val_batch
            else:
                x_val, y_val, val_corrupted = val_batch
            with torch.no_grad():
                self.prediction_model.eval()
                logits = self.prediction_model(x_val.cuda()).cpu()
                accuracy_tracker(logits.detach().cpu(), y_val.detach().cpu())
                cross_entropy_loss_sum += F.cross_entropy(logits,
                                                          y_val,
                                                          reduction='sum')
        mean_cross_entropy_loss = cross_entropy_loss_sum / self.val_split
        val_accuracy = accuracy_tracker.compute()
        dve_loss = -(val_accuracy - self.validation_performance
                     ) * log_prob + 1.e3 * exploration_bonus
        self.baseline_delta = (self.hparams.T - 1) * self.baseline_delta / self.hparams.T + \
                              mean_cross_entropy_loss / self.hparams.T
        self.log('val_accuracy', val_accuracy, prog_bar=True, on_step=True)
        self.log('training_accuracy',
                 training_accuracy,
                 prog_bar=True,
                 on_step=True)
        self.log('estimated_dv_sum',
                 estimated_dv.sum(),
                 prog_bar=True,
                 on_step=True)
        self.log('estimated_dv_mean',
                 estimated_dv.mean(),
                 prog_bar=True,
                 on_step=True)
        self.log('estimated_dv_std',
                 estimated_dv.std(),
                 prog_bar=True,
                 on_step=True)
        self.log('exploration_bonus',
                 exploration_bonus,
                 prog_bar=True,
                 on_step=True)
        # self.log('ori_validation_accuracy', self.validation_performance, prog_bar=True, on_step=True)
        return {'loss': dve_loss, 'val_accuracy': val_accuracy}
コード例 #27
0
ファイル: copia.py プロジェクト: zenna/dddt
 def bern_eq(*shape):
   return cuda(torch.bernoulli(torch.ones(*shape).fill_(0.5)))
コード例 #28
0
 def sample_v(self, y):
     wy = torch.mm(y, self.W)  # computing the weights times the neurons
     activation = wy + self.b.expand_as(
         wy)  # expand is used to convert a in to dimensions of wx
     p_v_given_h = torch.sigmoid(activation)
     return p_v_given_h, torch.bernoulli(p_v_given_h)
コード例 #29
0
ファイル: rbm.py プロジェクト: proticom/Deep_Learning_A_Z
 def sample_h(self, x):
     wx = torch.mm(x, self.W.t())
     activation = wx + self.a.expand_as(wx)
     p_h_given_v = torch.sigmoid(activation)
     return p_h_given_v, torch.bernoulli(p_h_given_v)
コード例 #30
0
def random_mask(x, p, training):
    if training:
        return torch.bernoulli((1. - p) * torch.ones(x.shape)).cuda()
    else:
        return 1.
コード例 #31
0
ファイル: pcd_ebm_ema.py プロジェクト: wgrathwohl/GWG_release
 def preprocess(data):
     if args.dynamic_binarization:
         return torch.bernoulli(data)
     else:
         return data
コード例 #32
0
 def __call__(self, img):
     mask = torch.Tensor(img.shape[0], img.shape[1]).fill_(self.p)
     mask = torch.bernoulli(mask)
     cpy = img.copy()
     cpy[mask.numpy() == 1] = 255
     return cpy
コード例 #33
0
ファイル: utils.py プロジェクト: km01/myrl
 def sample(theta):
     x = torch.bernoulli(theta)
     # x = theta - (theta - x).detach().clone()
     return x
コード例 #34
0
 def __call__(self, img):
     if torch.bernoulli(torch.Tensor([self.prob]))[0] == 1:
         return self.transform(img)
     else:
         return img
コード例 #35
0
 def sample_h(self, x):
     wx = torch.mm(x, self.W.t())
     activation = wx + self.a.expand_as(wx)
     p_h_given_v = torch.sigmoid(activation)
     return p_h_given_v, torch.bernoulli(p_h_given_v)
コード例 #36
0
if args.model == 'VAE' or args.model == 'ConditionalVAE' or args.model == 'VIS':
    train_dataset = datasets.MNIST(root='./data/',
                                   train=True,
                                   transform=transforms.ToTensor(),
                                   download=True)
    test_dataset = datasets.MNIST(root='./data/',
                                  train=False,
                                  transform=transforms.ToTensor())

    print(len(train_dataset))
    print(len(test_dataset))
    train_dataset[0][0]

    torch.manual_seed(args.seed)
    train_img = torch.stack([torch.bernoulli(d[0]) for d in train_dataset])
    train_label = torch.LongTensor([d[1] for d in train_dataset])
    test_img = torch.stack([torch.bernoulli(d[0]) for d in test_dataset])
    test_label = torch.LongTensor([d[1] for d in test_dataset])
    # print(train_img[0])
    print(train_img.size(), train_label.size(), test_img.size(),
          test_label.size())

    val_img = train_img[-10000:].clone()
    val_label = train_label[-10000:].clone()
    train_img = train_img[:10000]
    train_label = train_label[:10000]

    train = torch.utils.data.TensorDataset(train_img, train_label)
    val = torch.utils.data.TensorDataset(val_img, val_label)
    test = torch.utils.data.TensorDataset(test_img, test_label)
コード例 #37
0
ファイル: recommender_system.py プロジェクト: kv83821/kv83821
 def sample_v(self, y):
     wy = torch.mm(y, self.W)
     #note here we do not have transpose of W
     activation = wy + self.bias_visible.expand_as(wy)
     prob_v_given_h = torch.sigmoid(activation)
     return prob_v_given_h, torch.bernoulli(prob_v_given_h)
コード例 #38
0
    def decode(self, mol_vec, prob_decode):
        stack,trace = [],[]
        init_hidden = create_var(torch.zeros(1,self.hidden_size))
        zero_pad = create_var(torch.zeros(1,1,self.hidden_size))

        #Root Prediction
        root_hidden = torch.cat([init_hidden, mol_vec], dim=1)
        root_hidden = nn.ReLU()(self.W(root_hidden))
        root_score = self.W_o(root_hidden)
        _,root_wid = torch.max(root_score, dim=1)
        root_wid = root_wid.data[0]

        root = MolTreeNode(self.vocab.get_smiles(root_wid))
        root.wid = root_wid
        root.idx = 0
        stack.append( (root, self.vocab.get_slots(root.wid)) )

        all_nodes = [root]
        h = {}
        for step in range(MAX_DECODE_LEN):
            node_x,fa_slot = stack[-1]
            cur_h_nei = [ h[(node_y.idx,node_x.idx)] for node_y in node_x.neighbors ]
            if len(cur_h_nei) > 0:
                cur_h_nei = torch.stack(cur_h_nei, dim=0).view(1,-1,self.hidden_size)
            else:
                cur_h_nei = zero_pad

            cur_x = create_var(torch.LongTensor([node_x.wid]))
            cur_x = self.embedding(cur_x)

            #Predict stop
            cur_h = cur_h_nei.sum(dim=1)
            stop_hidden = torch.cat([cur_x,cur_h,mol_vec], dim=1)
            stop_hidden = nn.ReLU()(self.U(stop_hidden))
            stop_score = nn.Sigmoid()(self.U_s(stop_hidden) * 20).squeeze()
            
            if prob_decode:
                backtrack = (torch.bernoulli(1.0 - stop_score.data).item() == 1)
            else:
                backtrack = (stop_score.data[0] < 0.5)

            if not backtrack: #Forward: Predict next clique
                new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h)
                pred_hidden = torch.cat([new_h,mol_vec], dim=1)
                pred_hidden = nn.ReLU()(self.W(pred_hidden))
                pred_score = nn.Softmax(dim=1)(self.W_o(pred_hidden) * 20)
                if prob_decode:
                    if(pred_score.data.squeeze().sum().item() > 1):
                        print(pred_score.data.squeeze().sum().item())
                    sort_wid = torch.multinomial(pred_score.data.squeeze(), 5)
                    #sort_wid = np.random.multinomial(5, pred_score.data.squeeze())
                else:
                    _,sort_wid = torch.sort(pred_score, dim=1, descending=True)
                    sort_wid = sort_wid.data.squeeze()
                sort_wid = sort_wid.cpu().numpy()
                next_wid = None
                for wid in sort_wid[:5]:
                    slots = self.vocab.get_slots(wid)
                    node_y = MolTreeNode(self.vocab.get_smiles(wid))
                    if have_slots(fa_slot, slots) and can_assemble(node_x, node_y):
                        next_wid = wid
                        next_slots = slots
                        break

                if next_wid is None:
                    backtrack = True #No more children can be added
                else:
                    node_y = MolTreeNode(self.vocab.get_smiles(next_wid))
                    node_y.wid = next_wid
                    node_y.idx = step + 1
                    node_y.neighbors.append(node_x)
                    h[(node_x.idx,node_y.idx)] = new_h[0]
                    stack.append( (node_y,next_slots) )
                    all_nodes.append(node_y)

            if backtrack: #Backtrack, use if instead of else
                if len(stack) == 1: 
                    break #At root, terminate

                node_fa,_ = stack[-2]
                cur_h_nei = [ h[(node_y.idx,node_x.idx)] for node_y in node_x.neighbors if node_y.idx != node_fa.idx ]
                if len(cur_h_nei) > 0:
                    cur_h_nei = torch.stack(cur_h_nei, dim=0).view(1,-1,self.hidden_size)
                else:
                    cur_h_nei = zero_pad

                new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h)
                h[(node_x.idx,node_fa.idx)] = new_h[0]
                node_fa.neighbors.append(node_x)
                stack.pop()

        return root, all_nodes
コード例 #39
0
ファイル: bernoulli.py プロジェクト: Magica-Chen/pyro
 def sample(self):
     """
     Ref: :py:meth:`pyro.distributions.distribution.Distribution.sample`.
     """
     return Variable(torch.bernoulli(self.ps.data))
コード例 #40
0
 def sample(self, sample_shape=torch.Size()):
     shape = self._extended_shape(sample_shape)
     return torch.bernoulli(self.probs.expand(shape))
コード例 #41
0
ファイル: distributions.py プロジェクト: Northrend/pytorch
 def sample_n(self, n):
     return torch.bernoulli(self.probs.expand(n, *self.probs.size()))
コード例 #42
0
ファイル: neural_nets.py プロジェクト: yunzqq/pytorch-kaldi
    def forward(self, x,lab,test_flag):
    
      # initial state
      if self.bidir or self.twin_reg:
          h_init = Variable(torch.zeros(2*x.shape[1], self.hidden_dim))
      else:
          h_init = Variable(torch.zeros(x.shape[1],self. hidden_dim))   
           
      # Drop mask initialization             
      if test_flag==0:
         drop_mask=Variable(torch.bernoulli(torch.Tensor(h_init.shape[0],h_init.shape[1]).fill_(1-self.drop_rate)))
      else:
         drop_mask=Variable(torch.FloatTensor([1-self.drop_rate]))
          
      if self.use_cuda:
          x=x.cuda()
          lab=lab.cuda()
          h_init=h_init.cuda()
          drop_mask=drop_mask.cuda()
          
      if self.twin_reg:
          reg=0
          
      if self.cnn_pre:
          x=self.cnn(x)
          
      # Processing hidden layers
      for i in range(self.N_hid):
        
        # frame concatenation for bidirectional RNNs
        if self.bidir or self.twin_reg: 
            x=torch.cat([x,flip(x,0)],1)
           
        # Feed-forward affine transformation (done in parallel)
        wfx_out=self.wfx[i](x)
        wix_out=self.wix[i](x)
        wox_out=self.wox[i](x)
        wcx_out=self.wcx[i](x)

        
        # Applying batch norm
        if self.use_batchnorm:
         wfx_out_bn=self.bn_wfx[i](wfx_out.view(wfx_out.shape[0]*wfx_out.shape[1],wfx_out.shape[2]))
         wfx_out=wfx_out_bn.view(wfx_out.shape[0],wfx_out.shape[1],wfx_out.shape[2])
         
         wix_out_bn=self.bn_wix[i](wix_out.view(wix_out.shape[0]*wix_out.shape[1],wix_out.shape[2]))
         wix_out=wix_out_bn.view(wix_out.shape[0],wix_out.shape[1],wix_out.shape[2])
   
         wox_out_bn=self.bn_wox[i](wox_out.view(wox_out.shape[0]*wox_out.shape[1],wox_out.shape[2]))
         wox_out=wox_out_bn.view(wox_out.shape[0],wox_out.shape[1],wox_out.shape[2])

         wcx_out_bn=self.bn_wcx[i](wcx_out.view(wcx_out.shape[0]*wcx_out.shape[1],wcx_out.shape[2]))
         wcx_out=wcx_out_bn.view(wcx_out.shape[0],wcx_out.shape[1],wcx_out.shape[2])           
        
        if i==0 and self.skip_conn:
          prev_pre_act= Variable(torch.zeros(wfx_out.shape[0],wfx_out.shape[1],wfx_out.shape[2]))
          if self.use_cuda:
            prev_pre_act=prev_pre_act.cuda()
          
        if i>0 and self.skip_conn:
          prev_pre_act=pre_act    

        # Processing time steps
        hiddens = []
        pre_act = []
        c=h_init
        h=h_init
        
        for k in range(x.shape[0]):
          
          ft=self.act_gate(wfx_out[k]+self.ufh[i](h))
          it=self.act_gate(wix_out[k]+self.uih[i](h))
          ot=self.act_gate(wox_out[k]+self.uoh[i](h))
          
          at=wcx_out[k]+self.uch[i](h)
             
          if self.skip_conn:
              pre_act.append(at)
              at=at-prev_pre_act[k]
              
             
           
          if self.use_laynorm:
              at=self.ln[i](at)
              
          c=it*self.act(at)*drop_mask+ft*c
          h=ot*self.act(c)
          
          hiddens.append(h)
          
    
        # stacking hidden states
        h=torch.stack(hiddens)
        if self.skip_conn:
         pre_act=torch.stack(pre_act)

         
        # bidirectional concatenations
        if self.bidir:
         h_f=h[:,0:int(x.shape[1]/2)]
         h_b=flip(h[:,int(x.shape[1]/2):x.shape[1]].contiguous(),0)
         h=torch.cat([h_f,h_b],2)
         
        if self.twin_reg:
          if not(self.bidir):
            h_f=h[:,0:int(x.shape[1]/2)]
            h_b=flip(h[:,int(x.shape[1]/2):x.shape[1]].contiguous(),0)
            h=h_f
          reg=reg+torch.mean((h_f - h_b)**2)
        
        # setup x for the next hidden layer
        x=h

      # computing output (done in parallel)
      out=self.fco(h)

        
      # computing loss
      if self.cost=="nll":
        pout=F.log_softmax(out,dim=2)
        pred=torch.max(pout,dim=2)[1]
        loss=self.criterion(pout.view(h.shape[0]*h.shape[1],-1), lab.view(-1)) 
        err = torch.sum((pred!=lab).float())/(h.shape[0]*h.shape[1])
        
      if self.cost=="mse":
        loss=self.criterion(out, lab)
        pout=out
        err=Variable(torch.FloatTensor([0]))
        
      if self.twin_reg:
          loss=loss+self.twin_w*reg
        
      return [loss,err,pout]
コード例 #43
0
 def setup_reparam_mask(self, N):
     while True:
         mask = torch.bernoulli(0.30 * torch.ones(N))
         if torch.sum(mask) < 0.40 * N and torch.sum(mask) > 0.5:
             return mask
コード例 #44
0
def evaluate_vae(args, model, train_loader, data_loader, epoch, dir, mode):
    # set loss to 0
    evaluate_loss = 0
    evaluate_re = 0
    evaluate_kl = 0
    # set model to evaluation mode
    model.eval()

    # evaluate
    for batch_idx, (data, target) in enumerate(data_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)

        x = data

        # calculate loss function
        loss, RE, KL = model.calculate_loss(x, average=True)

        evaluate_loss += loss.data[0]
        evaluate_re += -RE.data[0]
        evaluate_kl += KL.data[0]

        # print N digits
        if batch_idx == 1 and mode == 'validation':
            if epoch == 1:
                if not os.path.exists(dir + 'reconstruction/'):
                    os.makedirs(dir + 'reconstruction/')
                # VISUALIZATION: plot real images
                plot_images(args, data.data.cpu().numpy()[0:9], dir + 'reconstruction/', 'real', size_x=3, size_y=3)
            x_mean = model.reconstruct_x(x)
            plot_images(args, x_mean.data.cpu().numpy()[0:9], dir + 'reconstruction/', str(epoch), size_x=3, size_y=3)

    if mode == 'test':
        # load all data
        test_data = Variable(data_loader.dataset.data_tensor)
        test_target = Variable(data_loader.dataset.target_tensor)
        full_data = Variable(train_loader.dataset.data_tensor)

        if args.cuda:
            test_data, test_target, full_data = test_data.cuda(), test_target.cuda(), full_data.cuda()

        if args.dynamic_binarization:
            full_data = torch.bernoulli(full_data)

        # print(model.means(model.idle_input))

        # VISUALIZATION: plot real images
        plot_images(args, test_data.data.cpu().numpy()[0:25], dir, 'real', size_x=5, size_y=5)

        # VISUALIZATION: plot reconstructions
        samples = model.reconstruct_x(test_data[0:25])

        plot_images(args, samples.data.cpu().numpy(), dir, 'reconstructions', size_x=5, size_y=5)

        # VISUALIZATION: plot generations
        samples_rand = model.generate_x(25)

        plot_images(args, samples_rand.data.cpu().numpy(), dir, 'generations', size_x=5, size_y=5)

        if args.prior == 'vampprior':
            # VISUALIZE pseudoinputs
            pseudoinputs = model.means(model.idle_input).cpu().data.numpy()

            plot_images(args, pseudoinputs[0:25], dir, 'pseudoinputs', size_x=5, size_y=5)

        # CALCULATE lower-bound
        t_ll_s = time.time()
        elbo_test = model.calculate_lower_bound(test_data, MB=args.MB)
        t_ll_e = time.time()
        print('Test lower-bound value {:.2f} in time: {:.2f}s'.format(elbo_test, t_ll_e - t_ll_s))

        # CALCULATE log-likelihood
        t_ll_s = time.time()
        elbo_train = model.calculate_lower_bound(full_data, MB=args.MB)
        t_ll_e = time.time()
        print('Train lower-bound value {:.2f} in time: {:.2f}s'.format(elbo_train, t_ll_e - t_ll_s))

        # CALCULATE log-likelihood
        t_ll_s = time.time()
        log_likelihood_test = model.calculate_likelihood(test_data, dir, mode='test', S=args.S, MB=args.MB)
        t_ll_e = time.time()
        print('Test log_likelihood value {:.2f} in time: {:.2f}s'.format(log_likelihood_test, t_ll_e - t_ll_s))

        # CALCULATE log-likelihood
        t_ll_s = time.time()
        log_likelihood_train = 0. #model.calculate_likelihood(full_data, dir, mode='train', S=args.S, MB=args.MB)) #commented because it takes too much time
        t_ll_e = time.time()
        print('Train log_likelihood value {:.2f} in time: {:.2f}s'.format(log_likelihood_train, t_ll_e - t_ll_s))

    # calculate final loss
    evaluate_loss /= len(data_loader)  # loss function already averages over batch size
    evaluate_re /= len(data_loader)  # re already averages over batch size
    evaluate_kl /= len(data_loader)  # kl already averages over batch size
    if mode == 'test':
        return evaluate_loss, evaluate_re, evaluate_kl, log_likelihood_test, log_likelihood_train, elbo_test, elbo_train
    else:
        return evaluate_loss, evaluate_re, evaluate_kl
コード例 #45
0
ファイル: rbm.py プロジェクト: proticom/Deep_Learning_A_Z
 def sample_v(self, y):
     wy = torch.mm(y, self.W)
     activation = wy + self.b.expand_as(wy)
     p_v_given_h = torch.sigmoid(activation)
     return p_v_given_h, torch.bernoulli(p_v_given_h)
コード例 #46
0
 def setup_reparam_mask(self, N):
     while True:
         mask = torch.bernoulli(0.30 * torch.ones(N))
         if torch.sum(mask) < 0.40 * N and torch.sum(mask) > 0.5:
             return mask
コード例 #47
0
ファイル: types.py プロジェクト: zenna/dddt
 def sample(*shape):
   return Variable(cuda(torch.bernoulli(torch.ones(*shape).fill_(0.5))))
コード例 #48
0
ファイル: vae.py プロジェクト: anihamde/cs287-s18
alpha = args.alpha

train_dataset = datasets.MNIST(root='./data/',
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)
test_dataset = datasets.MNIST(root='./data/',
                           train=False, 
                           transform=transforms.ToTensor())

print(len(train_dataset))
print(len(test_dataset))
# train_dataset[0][0]

torch.manual_seed(3435)
train_img = torch.stack([torch.bernoulli(d[0]) for d in train_dataset])
train_label = torch.LongTensor([d[1] for d in train_dataset])
test_img = torch.stack([torch.bernoulli(d[0]) for d in test_dataset])
test_label = torch.LongTensor([d[1] for d in test_dataset])
# print(train_img[0])
print(train_img.size(), train_label.size(), test_img.size(), test_label.size())

# MNIST does not have an official train dataset. So we will use the last 10000 training points as your validation set.
val_img = train_img[-10000:].clone()
val_label = train_label[-10000:].clone()
train_img = train_img[:-10000] # TODO: this should be -10000 right?
train_label = train_label[:-10000]

train = torch.utils.data.TensorDataset(train_img, train_label)
val = torch.utils.data.TensorDataset(val_img, val_label)
test = torch.utils.data.TensorDataset(test_img, test_label)
コード例 #49
0

input = torch.FloatTensor(batch_size, 3, imageSize, imageSize)
print(input.size())
noise = torch.FloatTensor(batch_size, nz, 1, 1)
print(noise.size())


# In[22]:

#parser.add_argument('--binary', action='store_true', help='z from bernoulli distribution, with prob=0.5')
binary=False
#Ele testa pergunta se vc quer que o seu Z venha da distribuição bernoulli
if binary:
	bernoulli_prob = torch.FloatTensor(batch_size, nz, 1, 1).fill_(0.5)
	fixed_noise = torch.bernoulli(bernoulli_prob)
else:
	fixed_noise = torch.FloatTensor(batch_size, nz, 1, 1).normal_(0, 1)


# In[23]:

label = torch.FloatTensor(batch_size)
real_label = 1
fake_label = 0


# ### Broadcast para CUDA, se quiser

# In[24]:
コード例 #50
0
ファイル: topology.py プロジェクト: yiJamong/SNN-with-KIST
    def __init__(
        self,
        source: Nodes,
        target: Nodes,
        nu: Optional[Union[float, Sequence[float]]] = None,
        reduction: Optional[callable] = None,
        weight_decay: float = None,
        **kwargs
    ) -> None:
        # language=rst
        """
        Instantiates a :code:`Connection` object with sparse weights.

        :param source: A layer of nodes from which the connection originates.
        :param target: A layer of nodes to which the connection connects.
        :param nu: Learning rate for both pre- and post-synaptic events.
        :param reduction: Method for reducing parameter updates along the minibatch
            dimension.
        :param weight_decay: Constant multiple to decay weights by on each iteration.

        Keyword arguments:

        :param torch.Tensor w: Strengths of synapses.
        :param float sparsity: Fraction of sparse connections to use.
        :param LearningRule update_rule: Modifies connection parameters according to
            some rule.
        :param float wmin: Minimum allowed value on the connection weights.
        :param float wmax: Maximum allowed value on the connection weights.
        :param float norm: Total weight per target neuron normalization constant.
        """
        super().__init__(source, target, nu, reduction, weight_decay, **kwargs)

        w = kwargs.get("w", None)
        self.sparsity = kwargs.get("sparsity", None)

        assert (
            w is not None
            and self.sparsity is None
            or w is None
            and self.sparsity is not None
        ), 'Only one of "weights" or "sparsity" must be specified'

        if w is None and self.sparsity is not None:
            i = torch.bernoulli(
                1 - self.sparsity * torch.ones(*source.shape, *target.shape)
            )
            if self.wmin == -np.inf or self.wmax == np.inf:
                v = torch.clamp(
                    torch.rand(*source.shape, *target.shape)[i.bool()],
                    self.wmin,
                    self.wmax,
                )
            else:
                v = self.wmin + torch.rand(*source.shape, *target.shape)[i.bool()] * (
                    self.wmax - self.wmin
                )
            w = torch.sparse.FloatTensor(i.nonzero().t(), v)
        elif w is not None and self.sparsity is None:
            assert w.is_sparse, "Weight matrix is not sparse (see torch.sparse module)"
            if self.wmin != -np.inf or self.wmax != np.inf:
                w = torch.clamp(w, self.wmin, self.wmax)

        self.w = Parameter(w, requires_grad=False)
コード例 #51
0
def train_our(num_epochs, dataloader, netD, netG, d_labelSmooth, outputDir,
						model_option =1,binary = False, epoch_interval = 1,
						D_steps = 1, G_steps = 1):
	use_gpu = tc.cuda.is_available()

	for epoch in range(num_epochs):
		start_iter = time.time()  
		D_x = 0
		D_G_z1 = 0
		D_G_z2 = 0
		errD_acum = 0
		errG_acum = 0
		
		for z in range(D_steps):
			if z > 3:
				raise ValueError('KEEP IT LOW!')
			print('z', z)
			for j, data in enumerate(dataloader, 0):


				############################
				# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
				# 1A - Train the detective network in the Real Dataset
				###########################
				# train with real
				netD.zero_grad()
				real_cpu, _ = data
				if (epoch == 0 and z == 0 ):
					vutils.save_image(real_cpu[0:64,:,:,:],
					'%s/real_samples.png' % outputDir, nrow=8)
				
				batch_size = real_cpu.size(0)
				input.data.resize_(real_cpu.size()).copy_(real_cpu)
				label.data.resize_(batch_size).fill_(real_label - d_labelSmooth) # use smooth label for discriminator

				output = netD(input)
				errD_real = criterion(output, label)
				errD_real.backward()

				#######################################################

				#######################################################
				# 1B - Train the detective network in the False Dataset
				#######################################################

				D_x += output.data.mean()
				print()
				# train with fake
				noise.data.resize_(batch_size, nz, 1, 1)
				if binary:
					bernoulli_prob.resize_(noise.data.size())
					noise.data.copy_(2*(torch.bernoulli(bernoulli_prob)-0.5))
				else:
					noise.data.normal_(0, 1)
				fake,z_prediction = netG(noise)
				label.data.fill_(fake_label)
				output = netD(fake.detach()) # add ".detach()" to avoid backprop through G
				errD_fake = criterion(output, label)
				errD_fake.backward() # gradients for fake/real will be accumulated
				# ERROR MEAN
				D_G_z1 += output.data.mean()
				
				errD_acum += errD_real.data[0] + errD_fake.data[0]
				
				optimizerD.step() # .step() can be called once the gradients are computed

				#######################################################
		
		# PARADA PARA VER O Q ESTÁ ACONTENDO
		
		for a in range(G_steps):
			print('interacao = ',a, 'de ',G_steps )
			for i, data in enumerate(dataloader, 0):


				# G_steps > D_steps (G_steps \geq D_steps)
				if a > 3:
					raise ValueError('KEEP IT LOW!')
				#######################################################
				# (2) Update G network: maximize log(D(G(z)))
				#  Train the faker with de output from the Detective (but don't train the Detective)
				#############3#########################################
			   #	print('ITERACAO QUE VAI DA MERDA = ',i)


				#if i==150:
			   #		pdb.set_trace()
				netG.zero_grad()
				label.data.fill_(real_label) # fake labels are real for generator cost
				output = netD(fake)
				errG = criterion(output, label)
				errG.backward(retain_variables=True) # True if backward through the graph for the second time
				#errG.backward() # True if backward through the graph for the second time
				
				#print("DEU ESSA SAIDA")
				if model_option == 2: # with z predictor
					errG_z = criterion_MSE(z_prediction, noise)
					errG_z.backward()
				D_G_z2 += output.data.mean()
				errG_acum += errG.data[0]
				#pdb.set_trace()
				#D_G_z2 = output.data.mean()
				#errG_acum = errG
				
				optimizerG.step()


		print('epoch = ',epoch)
		
		end_iter = time.time()        
		#Print the info
		print('[%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f Elapsed %.2f s'
			  % (epoch, num_epochs, errD_acum/D_steps, errG_acum/G_steps, D_x, D_G_z1, D_G_z2, end_iter-start_iter))

		print('chegou no print')
		#Save a grid with the pictures from the dataset, up until 64
		save_images(netG = netG, noise = fixed_noise, outputDir = outputDir, epoch = epoch)
		   
		if epoch % epoch_interval == 0:
			# do checkpointing
			save_models(netG = netG, netD = netD, outputDir = outputDir, epoch = epoch)
コード例 #52
0
ファイル: LSTM.py プロジェクト: pfriesch/PhnKWS
    def forward(self, x):

        # Applying Layer/Batch Norm
        if bool(self.lstm_use_laynorm_inp):
            x = self.ln0((x))

        if bool(self.lstm_use_batchnorm_inp):
            x_bn = self.bn0(x.view(x.shape[0] * x.shape[1], x.shape[2]))
            x = x_bn.view(x.shape[0], x.shape[1], x.shape[2])

        for i in range(self.N_lstm_lay):

            # Initial state and concatenation
            if self.bidir:
                h_init = torch.zeros(2 * x.shape[1], self.lstm_lay[i])
                x = torch.cat([x, flip(x, 0)], 1)
            else:
                h_init = torch.zeros(x.shape[1], self.lstm_lay[i])

            # Drop mask initilization (same mask for all time steps)
            if self.test_flag == False:
                drop_mask = torch.bernoulli(
                    torch.Tensor(h_init.shape[0],
                                 h_init.shape[1]).fill_(1 - self.lstm_drop[i]))
            else:
                drop_mask = torch.FloatTensor([1 - self.lstm_drop[i]])

            h_init = h_init.to(x.device)
            drop_mask = drop_mask.to(x.device)

            # Feed-forward affine transformations (all steps in parallel)
            wfx_out = self.wfx[i](x)
            wix_out = self.wix[i](x)
            wox_out = self.wox[i](x)
            wcx_out = self.wcx[i](x)

            # Apply batch norm if needed (all steos in parallel)
            if self.lstm_use_batchnorm[i]:
                wfx_out_bn = self.bn_wfx[i](wfx_out.view(
                    wfx_out.shape[0] * wfx_out.shape[1], wfx_out.shape[2]))
                wfx_out = wfx_out_bn.view(wfx_out.shape[0], wfx_out.shape[1],
                                          wfx_out.shape[2])

                wix_out_bn = self.bn_wix[i](wix_out.view(
                    wix_out.shape[0] * wix_out.shape[1], wix_out.shape[2]))
                wix_out = wix_out_bn.view(wix_out.shape[0], wix_out.shape[1],
                                          wix_out.shape[2])

                wox_out_bn = self.bn_wox[i](wox_out.view(
                    wox_out.shape[0] * wox_out.shape[1], wox_out.shape[2]))
                wox_out = wox_out_bn.view(wox_out.shape[0], wox_out.shape[1],
                                          wox_out.shape[2])

                wcx_out_bn = self.bn_wcx[i](wcx_out.view(
                    wcx_out.shape[0] * wcx_out.shape[1], wcx_out.shape[2]))
                wcx_out = wcx_out_bn.view(wcx_out.shape[0], wcx_out.shape[1],
                                          wcx_out.shape[2])

                # Processing time steps
            hiddens = []
            ct = h_init
            ht = h_init

            for k in range(x.shape[0]):

                # LSTM equations
                ft = torch.sigmoid(wfx_out[k] + self.ufh[i](ht))
                it = torch.sigmoid(wix_out[k] + self.uih[i](ht))
                ot = torch.sigmoid(wox_out[k] + self.uoh[i](ht))
                ct = it * self.act[i](wcx_out[k] + self.uch[i]
                                      (ht)) * drop_mask + ft * ct
                ht = ot * self.act[i](ct)

                if self.lstm_use_laynorm[i]:
                    ht = self.ln[i](ht)

                hiddens.append(ht)

            # Stacking hidden states
            h = torch.stack(hiddens)

            # Bidirectional concatenations
            if self.bidir:
                h_f = h[:, 0:int(x.shape[1] / 2)]
                h_b = flip(h[:, int(x.shape[1] / 2):x.shape[1]].contiguous(),
                           0)
                h = torch.cat([h_f, h_b], 2)

            # Setup x for the next hidden layer
            x = h

        return x
コード例 #53
0
# Create input and output groups of neurons.
input_group = nodes.Input(n=n_input)  # 100 input nodes.
output_group = nodes.LIFNodes(n=n_output)  # 500 output nodes.

network.add_layer(input_group, name='input')
network.add_layer(output_group, name='output')

# Input -> output connection.
# Unit Gaussian feed-forward weights.
w = torch.randn(n_input, n_output)
forward_conn = topology.Connection(input_group, output_group, w=w)

# Output -> output connection.
# Random, inhibitory recurrent weights.
w = torch.bernoulli(torch.rand(n_output, n_output)) - torch.diag(torch.ones(n_output))
recurrent_conn = topology.Connection(output_group, output_group, w=w)

network.add_connection(forward_conn, source='input', target='output')
network.add_connection(recurrent_conn, source='output', target='output')

# Monitor input and output spikes during the simulation.
for l in network.layers:
    monitor = monitors.Monitor(network.layers[l], state_vars=['s'], time=time)
    network.add_monitor(monitor, name=l)

# Create input ~ Bernoulli(0.1) for 1,000 timesteps.
inpts = {'input': torch.bernoulli(0.05 * torch.rand(time, n_input))}

# Run network simulation for 1,000 timesteps and retrieve spikes.
network.run(inpts=inpts, time=time)
コード例 #54
0
 def sample(self, probas):
     return torch.bernoulli(probas).detach()
コード例 #55
0
    def forward(ctx, input):

        return torch.bernoulli(input)
コード例 #56
0
 def sample_h(self, x):
     activation = torch.matmul(x, self.W) + self.b
     p_h_given_v = torch.sigmoid(activation)
     return p_h_given_v, torch.bernoulli(p_h_given_v)
コード例 #57
0
ファイル: binomial.py プロジェクト: Jsmilemsj/pytorch
 def sample(self, sample_shape=torch.Size()):
     shape = self._extended_shape(sample_shape) + (self.total_count,)
     with torch.no_grad():
         return torch.bernoulli(self.probs.unsqueeze(-1).expand(shape)).sum(dim=-1)
コード例 #58
0
 def sample_v(self, y):
     activation = torch.matmul(y, self.W.t()) + self.a
     p_v_given_h = torch.sigmoid(activation)
     return p_v_given_h, torch.bernoulli(p_v_given_h)
コード例 #59
0
 def sample(self, sample_shape=torch.Size()):
     shape = self._extended_shape(sample_shape)
     return torch.bernoulli(self.probs.expand(shape))
コード例 #60
0
def tmp_lambda(x):
    return torch.bernoulli(x)