Esempio n. 1
0
    def forward(self, adj, temp=10, hard=False, beta=1):
        self.assign_tensor = gumbel_softmax(self.params,
                                            temp=temp,
                                            hard=hard,
                                            beta=beta)
        self.assign_tensor_t = torch.transpose(self.assign_tensor, 0, 1)

        super_adj = self.assign_tensor_t @ adj @ self.assign_tensor  # A' = S^T*A*S
        return super_adj
Esempio n. 2
0
def ncut_loss(adj, embeddings, temp=1.0, hard=True):
    # assign_tensor = F.gumbel_softmax(embeddings, tau=temp, hard=hard)
    assign_tensor = gumbel_softmax(embeddings, temp, hard)
    assign_tensor_t = torch.transpose(assign_tensor, 0, 1)
    super_adj = assign_tensor_t @ adj @ assign_tensor  # A' = S^T*A*S
    vol = super_adj.sum(1)
    diag = torch.diagonal(super_adj)
    norm_cut = (vol - diag) / (vol + 1e-20)
    loss = norm_cut.sum()
    return loss
Esempio n. 3
0
def build_encoder(temperature, hard):
    x = Input(batch_shape=(batch_size, n))
    h_encoded = Dense(hidden_dim, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='tanh')(x)
    z = Dense(m * nb_classes, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias)(h_encoded)

    logits_z = Reshape((m, nb_classes))(z)  # batch x m * nb_classes -> batch x m x nb_classes
    q_z = Softmax()(logits_z)
    log_q_z = Lambda(lambda x: K.log(x + K.epsilon()))(q_z)

    z = Lambda(lambda x: gumbel_softmax(x, temperature, hard))(logits_z)

    z = Flatten()(z)
    q_z = Flatten()(q_z)
    log_q_z = Flatten()(log_q_z)

    return x, z, q_z, log_q_z
    def forward(self, images, temperature, hard):
        batch_size = images.shape[0]

        hidden = self.encoder(images.view(batch_size, -1))
        cell = torch.zeros((batch_size, 128)).to(self.device)

        seq = []
        qs = []

        z = torch.zeros(batch_size, self.categorical_dim).to(self.device)
        z[:, 0] = 1.

        while len(seq) < 2:
            hidden, cell = self.rnn(z, (hidden, cell))
            z = self.fc2(hidden)
            qs.append(z)
            z = gumbel_softmax(z, temperature, True)
            seq.append(z)

        return seq, qs
Esempio n. 5
0
    def forward(self, s_t, c_prev, temp, hard):
        if self.use_recurrent:
            q = self.recurrent_encode(s_t, c_prev)
        else:
            x = torch.cat((s_t, c_prev), -1)
            q = self.encode(
                x.view(-1, self.state_dim +
                       self.latent_dim * self.categorical_dim))
        q_y = q.view(q.size(0), self.latent_dim, self.categorical_dim)
        c_t = gumbel_softmax(q_y,
                             temp,
                             self.latent_dim,
                             self.categorical_dim,
                             hard,
                             device=self.device)

        action_pred = self.decode_action(s_t, c_t, c_prev)
        next_state_pred = self.decode_next_state(s_t, c_t, c_prev)

        return action_pred, next_state_pred, c_t
Esempio n. 6
0
 def forward(self, x, temp=1):
   logits = self.fc2(F.relu(self.fc1(x))).view(-1, 2)
   result = gumbel_softmax(logits, temp, hard=True)
   return logits, result
Esempio n. 7
0
    def symbol2code(self,
                    inputs,
                    logits=None,
                    hparams=None,
                    reuse=None,
                    output_embb=False,
                    is_training=False,
                    output_logits=False,
                    logits_bn_overwrite=None):
        """Maps a batch of symbols into a batch of codes.

    Args:
      inputs: a id `Tensor` of shape (d1, d2, ..., dx).
      logits: a logits `Tensor` of shape (d1, d2, ..., dx, K, D), if given,
        inputs will be ignored, otherwise it is indexed from self._code_logits
        using inputs.

    Returns:
      According to the generator, one of the following code would be returned:
        A `Tensor` of (d1, d2, ..., dx, D) where each symbol is replaced by a
          D-dimensional code.
        A `Tensor` of (d1, d2, ..., dx, D, K) where each symbol is replaced by a
          D-dimensional code in one-hot embedding format.
      code_embs: only not None for vq method, others will be created in embed()
    """
        if hparams is None:
            hparams = self._hparams
        get_hparam = util.hparam_fn(hparams, prefix="ec")
        logits_bn = get_hparam("logits_bn")
        code_generator = get_hparam("code_generator")
        code_dropout = get_hparam("code_dropout")
        hard = get_hparam("hard_code_output")
        STE_softmax = get_hparam("STE_softmax_transform")
        entropy_reg = get_hparam("entropy_reg")
        decay_method = get_hparam("temperature_decay_method")
        decay_steps = get_hparam("temperature_decay_steps")
        decay_rate = get_hparam("temperature_decay_rate")
        t_init = get_hparam("temperature_init")
        t_low = get_hparam("temperature_lower_bound")
        if decay_method == "none":
            temperature = 1.0
        else:
            temperature = (decay_method, decay_steps, decay_rate, t_init,
                           t_low)

        if reuse is None:
            try:
                self._symbol2code_reuse
                reuse = True
            except:
                self._symbol2code_reuse = True
                reuse = False

        with tf.variable_scope("symbol2code", reuse=reuse):
            code_embs = None
            # One-hot encoding output for all code_generators except preassign.
            if code_generator == "preassign":
                codes = tf.nn.embedding_lookup(self._code, inputs)
            else:
                if logits is None:
                    logits = tf.nn.embedding_lookup(self._code_logits, inputs)
                if logits_bn_overwrite is not None:
                    logits_bn = logits_bn_overwrite
                if logits_bn > 0.:
                    if decay_method == "none":
                        center, scale = True, True
                    else:
                        center, scale = False, False
                    logits = logits_bn * tf.layers.batch_normalization(
                        logits,
                        training=is_training,
                        center=center,
                        scale=scale)
                if code_generator == "gumbel_softmax":
                    codes, codes_soft, _ = gb.gumbel_softmax(
                        logits,
                        temperature=temperature,
                        entropy_reg=entropy_reg,
                        random=is_training,  # Use argmax for test.
                        straight_through=True,
                        is_training=is_training)
                    codes = codes if hard or (
                        not is_training) else codes_soft  # Use hard for test.
                elif code_generator == "STE_argmax":
                    codes = gb.straight_through(
                        logits,
                        thresholding=False,
                        softmax=STE_softmax,
                        hard=hard or (not is_training),  # Use hard for test.
                        temperature=temperature,
                        entropy_reg=entropy_reg,
                        is_training=is_training)
                elif code_generator == "STE_threshold":
                    codes = gb.straight_through(
                        logits,
                        thresholding=True,
                        softmax=STE_softmax,
                        hard=hard or (not is_training),  # Use hard for test.
                        temperature=temperature,
                        entropy_reg=entropy_reg,
                        is_training=is_training)
                else:
                    raise ValueError(
                        "Unknown code_generator {}".format(code_generator))

        if code_dropout > 0. and is_training:
            # Randomly dropout for each examples and each of D dimension of the code.
            codes_shape = codes.shape.as_list()
            noise_shape = [1] * len(codes_shape)
            noise_shape[0], noise_shape[-2] = tf.shape(codes)[0], self._D
            codes = tf.nn.dropout(codes,
                                  keep_prob=1. - code_dropout,
                                  noise_shape=noise_shape)

        if output_embb:
            if not self._emb_baseline:
                raise ValueError(
                    "output_embb can only be True when emb_baseline=True")
            embs = tf.nn.embedding_lookup(self._embb, inputs)
            if output_logits:
                return codes, code_embs, embs, logits
            else:
                return codes, code_embs, embs
        else:
            if output_logits:
                return codes, code_embs, logits
            else:
                return codes, code_embs
Esempio n. 8
0
    edge_index = edge_index.cuda()

optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    model.parameters()),
                             lr=lr,
                             weight_decay=weight_decay)

smallest_loss = 1e20
embeddings_np = None
best_at = 0

for epoch in tqdm(range(num_epochs)):
    model.train()
    model.zero_grad()
    logits, embeddings = model(features, edge_index)
    assign_tensor = gumbel_softmax(embeddings, temp=args.temp, hard=args.hard)
    #assign_tensor =  F.softmax(embeddings,-1)
    assign_tensor_t = torch.transpose(assign_tensor, 0, 1)
    super_adj = assign_tensor_t @ adj @ assign_tensor  # A' = S^T*A*S
    vol = super_adj.sum(1)
    diag = torch.diagonal(super_adj)
    norm_cut = (vol - diag) / (vol + 1e-20)
    #print(torch.max(norm_cut), torch.min(norm_cut))
    loss = norm_cut.sum(
    )  #+ torch.sqrt(((norm_cut-norm_cut.mean())**2).sum()) *10

    loss.backward()
    optimizer.step()
    # if loss.item() < smallest_loss:
    if epoch % 100 == 0:
        #import pdb;pdb.set_trace()
Esempio n. 9
0
w = torch.FloatTensor(4, 2)
init.xavier_normal(w)
w = Variable(w, requires_grad=True)

optimizer = optim.Adam([w], lr=1e-1)

BATCH_SIZE = 30

st = torch.FloatTensor(BATCH_SIZE, 4)
dreward = torch.Tensor([-1]).expand(BATCH_SIZE)
for ep in range(1000):
    st.uniform_(-0.05, 0.05)
    vst = Variable(st)
    reward = Variable(torch.zeros(BATCH_SIZE))
    multiplier = 1
    notdone = Variable(torch.ones(BATCH_SIZE))
    for i in range(800):
        logits = vst @ w
        y = gumbel_softmax(logits, tau=1, hard=True)
        vst, r, d = step_cartpole(vst, y)
        tester = notdone * (1 - d) * r * multiplier
        reward.add_(tester)
        notdone = (d == 0).float()
        multiplier *= 0.99
        if (d.data > 0).all(): break
    if ep % 5 == 0:
        print("Num steps", i)
        optimizer.zero_grad()
    reward.backward(dreward)
    optimizer.step()
Esempio n. 10
0
    model = model.cuda()
    adj = adj.cuda()
    features = features.cuda()
    edge_index = edge_index.cuda()

optimizer = torch.optim.Adam(filter(lambda p : p.requires_grad, model.parameters()), lr=lr, weight_decay=weight_decay)

smallest_loss = 1e20
embeddings_np = None 
best_at = 0

for epoch in tqdm(range(num_epochs)):
    model.train()
    model.zero_grad()
    embeddings = model(features, edge_index)
    assign_tensor = gumbel_softmax(embeddings, temp=0.1,hard=True)
    assign_tensor_t = torch.transpose(assign_tensor, 0, 1)
    super_adj = assign_tensor_t @ adj @ assign_tensor # A' = S^T*A*S
    vol = super_adj.sum(1)
    diag = torch.diagonal(super_adj)
    norm_cut = (vol - diag)/(vol+1e-20)
    loss = norm_cut.sum() 

    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), 2.0)
    optimizer.step()
    if loss.item() < smallest_loss:
        smallest_loss = loss.item()
    embeddings_np = embeddings.cpu().detach().numpy()
    X_train = embeddings_np[idx_train]
    Y_train = labels[idx_train]
Esempio n. 11
0
def train(epoch, args):
    featureNet.train()
    maskNet.train()
    fcNet.train()
    train_loss = 0
    classification_loss = 0
    correct = 0
    total = 0
    batch_idx = 0
    ds = ImageDataset(args.dataset,
                      dataset_load,
                      'data/casia_landmark.txt',
                      name=args.net + ':train',
                      bs=args.bs,
                      shuffle=True,
                      nthread=6,
                      imagesize=128)
    global n_iter
    while True:
        if batch_idx % 100 == 0:
            print(batch_idx)
        print(batch_idx)
        n_iter += 1
        img, label = ds.get()
        if img is None: break
        inputs = torch.from_numpy(img).float()
        targets = torch.from_numpy(label[:, 0]).long()
        if use_cuda: inputs, targets = inputs.cuda(), targets.cuda()

        optimizerMask.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        features = featureNet(inputs)
        mask = maskNet(features)
        mask = gumbel_softmax(mask)
        # print(mask.size())
        maskedFeatures = torch.mul(mask, features)
        # print(features.shape, mask.shape, maskedFeatures.shape)

        outputs = fcNet(maskedFeatures)
        outputs1 = outputs[0]  # 0=cos_theta 1=phi_theta
        _, predicted = torch.max(outputs1.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        # training the advNet:
        lossAdv = criterion(outputs, targets)
        # print(conv2d(mask, laplacianKernel, stride=1, groups=1).size())
        lossCompact = torch.sum(
            conv2d(mask, laplacianKernel, stride=1, groups=1))
        # lossSize   #L1 norm of the mask to make the mask sparse.
        if use_cuda:
            lossSize = F.l1_loss(mask,
                                 target=torch.ones(mask.size()).cuda(),
                                 size_average=False)
        else:
            lossSize = F.l1_loss(mask,
                                 target=torch.ones(mask.size()),
                                 size_average=False)
        # print("advnet:", - criterion2(outputs1, targets).data/10, lossCompact.data/1000000, lossSize.data/10000)
        writer.add_scalar('Loss/adv-classification',
                          -criterion2(outputs1, targets) / 100, n_iter)
        writer.add_scalar('Loss/adv-compactness', lossCompact / 1000000,
                          n_iter)
        writer.add_scalar('Loss/adv-size', lossSize / 10000, n_iter)
        loss = -criterion2(
            outputs1, targets) / 100 + lossCompact / 1000000 + lossSize / 10000
        writer.add_scalar('Accuracy/adv-totalLoss', loss, n_iter)
        lossd = loss.data
        loss.backward(retain_graph=True)
        optimizerMask.step()

        optimizerFC.zero_grad()
        lossC = criterion(outputs, targets)
        lossClassification = lossC.data
        lossC.backward()
        optimizerFC.step()
        classification_loss += lossClassification
        train_loss += loss.data

        # print("classification loss:", classification_loss / (batch_idx + 1))
        writer.add_scalar('Loss/classn-loss',
                          classification_loss / (batch_idx + 1), n_iter)
        writer.add_scalar('Loss/adv-avgloss', train_loss / (batch_idx + 1),
                          n_iter)
        # printoneline(dt(),'Te=%d Loss=%.4f | AccT=%.4f%% (%d/%d) %.4f %.2f %d\n'
        # % (epoch,train_loss/(batch_idx+1), 100.0*correct/total, correct, total,
        # lossd, criterion.lamb, criterion.it))
        writer.add_scalar('Accuracy/classification', 100 * correct / total,
                          n_iter)
        # writer.add_scalar
        writer.add_scalar('Accuracy/correct', correct, n_iter)
        batch_idx += 1
        # break
    print('')