def forward(self, adj, temp=10, hard=False, beta=1): self.assign_tensor = gumbel_softmax(self.params, temp=temp, hard=hard, beta=beta) self.assign_tensor_t = torch.transpose(self.assign_tensor, 0, 1) super_adj = self.assign_tensor_t @ adj @ self.assign_tensor # A' = S^T*A*S return super_adj
def ncut_loss(adj, embeddings, temp=1.0, hard=True): # assign_tensor = F.gumbel_softmax(embeddings, tau=temp, hard=hard) assign_tensor = gumbel_softmax(embeddings, temp, hard) assign_tensor_t = torch.transpose(assign_tensor, 0, 1) super_adj = assign_tensor_t @ adj @ assign_tensor # A' = S^T*A*S vol = super_adj.sum(1) diag = torch.diagonal(super_adj) norm_cut = (vol - diag) / (vol + 1e-20) loss = norm_cut.sum() return loss
def build_encoder(temperature, hard): x = Input(batch_shape=(batch_size, n)) h_encoded = Dense(hidden_dim, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias, activation='tanh')(x) z = Dense(m * nb_classes, kernel_regularizer=l2(decay), bias_regularizer=l2(decay), use_bias=use_bias)(h_encoded) logits_z = Reshape((m, nb_classes))(z) # batch x m * nb_classes -> batch x m x nb_classes q_z = Softmax()(logits_z) log_q_z = Lambda(lambda x: K.log(x + K.epsilon()))(q_z) z = Lambda(lambda x: gumbel_softmax(x, temperature, hard))(logits_z) z = Flatten()(z) q_z = Flatten()(q_z) log_q_z = Flatten()(log_q_z) return x, z, q_z, log_q_z
def forward(self, images, temperature, hard): batch_size = images.shape[0] hidden = self.encoder(images.view(batch_size, -1)) cell = torch.zeros((batch_size, 128)).to(self.device) seq = [] qs = [] z = torch.zeros(batch_size, self.categorical_dim).to(self.device) z[:, 0] = 1. while len(seq) < 2: hidden, cell = self.rnn(z, (hidden, cell)) z = self.fc2(hidden) qs.append(z) z = gumbel_softmax(z, temperature, True) seq.append(z) return seq, qs
def forward(self, s_t, c_prev, temp, hard): if self.use_recurrent: q = self.recurrent_encode(s_t, c_prev) else: x = torch.cat((s_t, c_prev), -1) q = self.encode( x.view(-1, self.state_dim + self.latent_dim * self.categorical_dim)) q_y = q.view(q.size(0), self.latent_dim, self.categorical_dim) c_t = gumbel_softmax(q_y, temp, self.latent_dim, self.categorical_dim, hard, device=self.device) action_pred = self.decode_action(s_t, c_t, c_prev) next_state_pred = self.decode_next_state(s_t, c_t, c_prev) return action_pred, next_state_pred, c_t
def forward(self, x, temp=1): logits = self.fc2(F.relu(self.fc1(x))).view(-1, 2) result = gumbel_softmax(logits, temp, hard=True) return logits, result
def symbol2code(self, inputs, logits=None, hparams=None, reuse=None, output_embb=False, is_training=False, output_logits=False, logits_bn_overwrite=None): """Maps a batch of symbols into a batch of codes. Args: inputs: a id `Tensor` of shape (d1, d2, ..., dx). logits: a logits `Tensor` of shape (d1, d2, ..., dx, K, D), if given, inputs will be ignored, otherwise it is indexed from self._code_logits using inputs. Returns: According to the generator, one of the following code would be returned: A `Tensor` of (d1, d2, ..., dx, D) where each symbol is replaced by a D-dimensional code. A `Tensor` of (d1, d2, ..., dx, D, K) where each symbol is replaced by a D-dimensional code in one-hot embedding format. code_embs: only not None for vq method, others will be created in embed() """ if hparams is None: hparams = self._hparams get_hparam = util.hparam_fn(hparams, prefix="ec") logits_bn = get_hparam("logits_bn") code_generator = get_hparam("code_generator") code_dropout = get_hparam("code_dropout") hard = get_hparam("hard_code_output") STE_softmax = get_hparam("STE_softmax_transform") entropy_reg = get_hparam("entropy_reg") decay_method = get_hparam("temperature_decay_method") decay_steps = get_hparam("temperature_decay_steps") decay_rate = get_hparam("temperature_decay_rate") t_init = get_hparam("temperature_init") t_low = get_hparam("temperature_lower_bound") if decay_method == "none": temperature = 1.0 else: temperature = (decay_method, decay_steps, decay_rate, t_init, t_low) if reuse is None: try: self._symbol2code_reuse reuse = True except: self._symbol2code_reuse = True reuse = False with tf.variable_scope("symbol2code", reuse=reuse): code_embs = None # One-hot encoding output for all code_generators except preassign. if code_generator == "preassign": codes = tf.nn.embedding_lookup(self._code, inputs) else: if logits is None: logits = tf.nn.embedding_lookup(self._code_logits, inputs) if logits_bn_overwrite is not None: logits_bn = logits_bn_overwrite if logits_bn > 0.: if decay_method == "none": center, scale = True, True else: center, scale = False, False logits = logits_bn * tf.layers.batch_normalization( logits, training=is_training, center=center, scale=scale) if code_generator == "gumbel_softmax": codes, codes_soft, _ = gb.gumbel_softmax( logits, temperature=temperature, entropy_reg=entropy_reg, random=is_training, # Use argmax for test. straight_through=True, is_training=is_training) codes = codes if hard or ( not is_training) else codes_soft # Use hard for test. elif code_generator == "STE_argmax": codes = gb.straight_through( logits, thresholding=False, softmax=STE_softmax, hard=hard or (not is_training), # Use hard for test. temperature=temperature, entropy_reg=entropy_reg, is_training=is_training) elif code_generator == "STE_threshold": codes = gb.straight_through( logits, thresholding=True, softmax=STE_softmax, hard=hard or (not is_training), # Use hard for test. temperature=temperature, entropy_reg=entropy_reg, is_training=is_training) else: raise ValueError( "Unknown code_generator {}".format(code_generator)) if code_dropout > 0. and is_training: # Randomly dropout for each examples and each of D dimension of the code. codes_shape = codes.shape.as_list() noise_shape = [1] * len(codes_shape) noise_shape[0], noise_shape[-2] = tf.shape(codes)[0], self._D codes = tf.nn.dropout(codes, keep_prob=1. - code_dropout, noise_shape=noise_shape) if output_embb: if not self._emb_baseline: raise ValueError( "output_embb can only be True when emb_baseline=True") embs = tf.nn.embedding_lookup(self._embb, inputs) if output_logits: return codes, code_embs, embs, logits else: return codes, code_embs, embs else: if output_logits: return codes, code_embs, logits else: return codes, code_embs
edge_index = edge_index.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, weight_decay=weight_decay) smallest_loss = 1e20 embeddings_np = None best_at = 0 for epoch in tqdm(range(num_epochs)): model.train() model.zero_grad() logits, embeddings = model(features, edge_index) assign_tensor = gumbel_softmax(embeddings, temp=args.temp, hard=args.hard) #assign_tensor = F.softmax(embeddings,-1) assign_tensor_t = torch.transpose(assign_tensor, 0, 1) super_adj = assign_tensor_t @ adj @ assign_tensor # A' = S^T*A*S vol = super_adj.sum(1) diag = torch.diagonal(super_adj) norm_cut = (vol - diag) / (vol + 1e-20) #print(torch.max(norm_cut), torch.min(norm_cut)) loss = norm_cut.sum( ) #+ torch.sqrt(((norm_cut-norm_cut.mean())**2).sum()) *10 loss.backward() optimizer.step() # if loss.item() < smallest_loss: if epoch % 100 == 0: #import pdb;pdb.set_trace()
w = torch.FloatTensor(4, 2) init.xavier_normal(w) w = Variable(w, requires_grad=True) optimizer = optim.Adam([w], lr=1e-1) BATCH_SIZE = 30 st = torch.FloatTensor(BATCH_SIZE, 4) dreward = torch.Tensor([-1]).expand(BATCH_SIZE) for ep in range(1000): st.uniform_(-0.05, 0.05) vst = Variable(st) reward = Variable(torch.zeros(BATCH_SIZE)) multiplier = 1 notdone = Variable(torch.ones(BATCH_SIZE)) for i in range(800): logits = vst @ w y = gumbel_softmax(logits, tau=1, hard=True) vst, r, d = step_cartpole(vst, y) tester = notdone * (1 - d) * r * multiplier reward.add_(tester) notdone = (d == 0).float() multiplier *= 0.99 if (d.data > 0).all(): break if ep % 5 == 0: print("Num steps", i) optimizer.zero_grad() reward.backward(dreward) optimizer.step()
model = model.cuda() adj = adj.cuda() features = features.cuda() edge_index = edge_index.cuda() optimizer = torch.optim.Adam(filter(lambda p : p.requires_grad, model.parameters()), lr=lr, weight_decay=weight_decay) smallest_loss = 1e20 embeddings_np = None best_at = 0 for epoch in tqdm(range(num_epochs)): model.train() model.zero_grad() embeddings = model(features, edge_index) assign_tensor = gumbel_softmax(embeddings, temp=0.1,hard=True) assign_tensor_t = torch.transpose(assign_tensor, 0, 1) super_adj = assign_tensor_t @ adj @ assign_tensor # A' = S^T*A*S vol = super_adj.sum(1) diag = torch.diagonal(super_adj) norm_cut = (vol - diag)/(vol+1e-20) loss = norm_cut.sum() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 2.0) optimizer.step() if loss.item() < smallest_loss: smallest_loss = loss.item() embeddings_np = embeddings.cpu().detach().numpy() X_train = embeddings_np[idx_train] Y_train = labels[idx_train]
def train(epoch, args): featureNet.train() maskNet.train() fcNet.train() train_loss = 0 classification_loss = 0 correct = 0 total = 0 batch_idx = 0 ds = ImageDataset(args.dataset, dataset_load, 'data/casia_landmark.txt', name=args.net + ':train', bs=args.bs, shuffle=True, nthread=6, imagesize=128) global n_iter while True: if batch_idx % 100 == 0: print(batch_idx) print(batch_idx) n_iter += 1 img, label = ds.get() if img is None: break inputs = torch.from_numpy(img).float() targets = torch.from_numpy(label[:, 0]).long() if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizerMask.zero_grad() inputs, targets = Variable(inputs), Variable(targets) features = featureNet(inputs) mask = maskNet(features) mask = gumbel_softmax(mask) # print(mask.size()) maskedFeatures = torch.mul(mask, features) # print(features.shape, mask.shape, maskedFeatures.shape) outputs = fcNet(maskedFeatures) outputs1 = outputs[0] # 0=cos_theta 1=phi_theta _, predicted = torch.max(outputs1.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() # training the advNet: lossAdv = criterion(outputs, targets) # print(conv2d(mask, laplacianKernel, stride=1, groups=1).size()) lossCompact = torch.sum( conv2d(mask, laplacianKernel, stride=1, groups=1)) # lossSize #L1 norm of the mask to make the mask sparse. if use_cuda: lossSize = F.l1_loss(mask, target=torch.ones(mask.size()).cuda(), size_average=False) else: lossSize = F.l1_loss(mask, target=torch.ones(mask.size()), size_average=False) # print("advnet:", - criterion2(outputs1, targets).data/10, lossCompact.data/1000000, lossSize.data/10000) writer.add_scalar('Loss/adv-classification', -criterion2(outputs1, targets) / 100, n_iter) writer.add_scalar('Loss/adv-compactness', lossCompact / 1000000, n_iter) writer.add_scalar('Loss/adv-size', lossSize / 10000, n_iter) loss = -criterion2( outputs1, targets) / 100 + lossCompact / 1000000 + lossSize / 10000 writer.add_scalar('Accuracy/adv-totalLoss', loss, n_iter) lossd = loss.data loss.backward(retain_graph=True) optimizerMask.step() optimizerFC.zero_grad() lossC = criterion(outputs, targets) lossClassification = lossC.data lossC.backward() optimizerFC.step() classification_loss += lossClassification train_loss += loss.data # print("classification loss:", classification_loss / (batch_idx + 1)) writer.add_scalar('Loss/classn-loss', classification_loss / (batch_idx + 1), n_iter) writer.add_scalar('Loss/adv-avgloss', train_loss / (batch_idx + 1), n_iter) # printoneline(dt(),'Te=%d Loss=%.4f | AccT=%.4f%% (%d/%d) %.4f %.2f %d\n' # % (epoch,train_loss/(batch_idx+1), 100.0*correct/total, correct, total, # lossd, criterion.lamb, criterion.it)) writer.add_scalar('Accuracy/classification', 100 * correct / total, n_iter) # writer.add_scalar writer.add_scalar('Accuracy/correct', correct, n_iter) batch_idx += 1 # break print('')