def train_scene_discriminator(x): netC.zero_grad() if has_cuda: target = torch.cuda.FloatTensor(opt.batch_size, 1) else: target = torch.FloatTensor(opt.batch_size, 1) x1 = x[0] x2 = x[1] h_p1 = netEP(x1).detach() h_p2 = netEP(x2).detach() half = int(opt.batch_size/2) if has_cuda: rp = torch.randperm(half).cuda() else: rp = torch.randperm(half).cpu() h_p2[:half] = h_p2[rp] target[:half] = 1 target[half:] = 0 out = netC([h_p1, h_p2]) bce = bce_criterion(out, Variable(target)) bce.backward() optimizerC.step() acc =out[:half].gt(0.5).sum() + out[half:].le(0.5).sum() return bce.data.cpu().numpy(), acc.data.cpu().numpy()/opt.batch_size
def main(): parser = argparse.ArgumentParser(description="parse args") parser.add_argument('-n', '--num-epochs', default=1000, type=int) parser.add_argument('-b', '--batch-size', default=N, type=int) parser.add_argument('--cuda', action='store_true') args = parser.parse_args() data = build_linear_dataset(N, p) if args.cuda: # make tensors and modules CUDA data = data.cuda() softplus.cuda() regression_model.cuda() for j in range(args.num_epochs): if args.batch_size == N: # use the entire data set epoch_loss = svi.step(data) else: # mini batch epoch_loss = 0.0 perm = torch.randperm(N) if not args.cuda else torch.randperm(N).cuda() # shuffle data data = data[perm] # get indices of each batch all_batches = get_batch_indices(N, args.batch_size) for ix, batch_start in enumerate(all_batches[:-1]): batch_end = all_batches[ix + 1] batch_data = data[batch_start: batch_end] epoch_loss += svi.step(batch_data) if j % 100 == 0: print("epoch avg loss {}".format(epoch_loss/float(N)))
def random(nin, nout, nto): nker = nto * nout tbl = torch.Tensor(nker, 2) fi = torch.randperm(nin) frcntr = 0 nfi = math.floor(nin / nto) # number of distinct nto chunks totbl = tbl.select(1, 1) frtbl = tbl.select(1, 0) fitbl = fi.narrow(0, 0, (nfi * nto)) # part of fi that covers distinct chunks ufrtbl = frtbl.unfold(0, nto, nto) utotbl = totbl.unfold(0, nto, nto) ufitbl = fitbl.unfold(0, nto, nto) # start fill_ing frtbl for i in range(nout): # fro each unit in target map ufrtbl.select(0, i).copy_(ufitbl.select(0, frcntr)) frcntr += 1 if frcntr - 1 == nfi: # reset fi fi.copy_(torch.randperm(nin)) frcntr = 1 for tocntr in range(utotbl.size(0)): utotbl.select(0, tocntr).fill_(tocntr) return tbl
def main(args): pyro.clear_param_store() data = build_linear_dataset(N, p) if args.cuda: # make tensors and modules CUDA data = data.cuda() softplus.cuda() regression_model.cuda() for j in range(args.num_epochs): if args.batch_size == N: # use the entire data set epoch_loss = svi.step(data) else: # mini batch epoch_loss = 0.0 perm = torch.randperm(N) if not args.cuda else torch.randperm(N).cuda() # shuffle data data = data[perm] # get indices of each batch all_batches = get_batch_indices(N, args.batch_size) for ix, batch_start in enumerate(all_batches[:-1]): batch_end = all_batches[ix + 1] batch_data = data[batch_start: batch_end] epoch_loss += svi.step(batch_data) if j % 100 == 0: print("epoch avg loss {}".format(epoch_loss/float(N)))
def mixup_data(x, y, alpha=1.0, use_cuda=True): if alpha>0.: lam = np.random.beta(alpha, alpha) else: lam = 1. batch_size = x.size()[0] if use_cuda: index = torch.randperm(batch_size).cuda() else: index = torch.randperm(batch_size) mixed_x = lam*x + (1-lam)*x[index,:] y_a, y_b = y, y[index] return mixed_x, y_a, y_b, lam
def pretrain(self, train_data, corrupter, tester): src, rel, dst = train_data n_train = len(src) optimizer = Adam(self.mdl.parameters()) #optimizer = SGD(self.mdl.parameters(), lr=1e-4) n_epoch = self.config.n_epoch n_batch = self.config.n_batch best_perf = 0 for epoch in range(n_epoch): epoch_loss = 0 rand_idx = t.randperm(n_train) src = src[rand_idx] rel = rel[rand_idx] dst = dst[rand_idx] src_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst) src_cuda = src.cuda() rel_cuda = rel.cuda() dst_cuda = dst.cuda() src_corrupted = src_corrupted.cuda() dst_corrupted = dst_corrupted.cuda() for s0, r, t0, s1, t1 in batch_by_num(n_batch, src_cuda, rel_cuda, dst_cuda, src_corrupted, dst_corrupted, n_sample=n_train): self.mdl.zero_grad() loss = t.sum(self.mdl.pair_loss(Variable(s0), Variable(r), Variable(t0), Variable(s1), Variable(t1))) loss.backward() optimizer.step() self.mdl.constraint() epoch_loss += loss.data[0] logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train) if (epoch + 1) % self.config.epoch_per_test == 0: test_perf = tester() if test_perf > best_perf: self.save(os.path.join(config().task.dir, self.config.model_file)) best_perf = test_perf return best_perf
def optimize_model(model, x, y, x_test, y_test, batch_size=32, learning_rate=1e-4, weight_decay=1e-4): optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) N = y.size(0) num_one_epoch = np.floor(N / batch_size).astype(np.int) num_epoch = np.floor(3000/num_one_epoch).astype(np.int) for epoch in range(num_epoch): index = torch.randperm(N) for t in range(num_one_epoch): idx_start = t*batch_size idx_end = (t+1)*batch_size y_pred = model(x[index[idx_start:idx_end], :]) loss = torch.nn.MSELoss()(y_pred, y[index[idx_start:idx_end]]) # print(epoch, t, loss.data[0]) optimizer.zero_grad() loss.backward() optimizer.step() y_pred = model(x) loss = torch.nn.MSELoss()(y_pred, y) y_test_pred = model(x_test) test_loss = torch.nn.MSELoss()(y_test_pred, y_test) # print(test_loss.data[0]) print(loss.data[0], test_loss.data[0]) return loss.data[0], test_loss.data[0]
def train_valid_splitter(x, y, split, shuffle=True): ''' Generate training and validation tensors from whole dataset data and label tensors :param x: Data tensor for whole dataset :type x: torch.Tensor :param y: Label tensor for whole dataset :type y: torch.Tensor :param split: Fraction of dataset to be used for validation :type split: float :param shuffle: If True randomize tensor order before splitting else do not randomize :type shuffle: bool :return: Training and validation tensors (training data, training labels, validation data, validation labels) :rtype: tuple ''' num_samples_x = x.size()[0] num_valid_samples = math.floor(num_samples_x * split) if shuffle: indicies = torch.randperm(num_samples_x) x, y = x[indicies], y[indicies] x_val, y_val = x[:num_valid_samples], y[:num_valid_samples] x, y = x[num_valid_samples:], y[num_valid_samples:] return x, y, x_val, y_val
def _generate_perms_and_inverses(feature_size, num_perms): perms = [torch.randperm(feature_size) for _ in range(num_perms)] inv_perms = [torch.cat([(perm == i).nonzero() for i in range(feature_size)], 0).squeeze() for perm in perms] return perms, inv_perms
def drop_exp_2(r_feat_val, r_feat_train, pred): # incep_score, mode_score, fid n_mode = len(Counter(pred)) scores = np.zeros((n_mode, 3)) t_feat = r_feat_train.clone() collapsed_order = torch.randperm(n_mode).long() index = torch.arange(0, r_feat_train.size(0)).long() collapsed = torch.zeros(r_feat_train.size(0)).byte() Mxx = distance(r_feat_val, r_feat_val, sqrt=True) for i in range(n_mode): # Compute Score Mxy = distance(r_feat_val, t_feat, sqrt=True) Myy = distance(t_feat, t_feat, sqrt=True) scores[i, 0] = inception_score(t_feat) scores[i, 1] = mode_score(t_feat, r_feat_val) scores[i, 2] = fid(t_feat, r_feat_val) # Do drop -- fill dropped slots with remaining samples c = collapsed_order[i] collapsed[pred.eq(c)] = 1 cidx = index[collapsed.eq(1)] ncidx = index[collapsed.ne(1)] if ncidx.dim() == 0 or cidx.dim() == 0 or ncidx.size(0) == 0: continue for j in cidx: copy_idx = np.random.randint(0, ncidx.size(0)) t_feat[j] = t_feat[ncidx[copy_idx]] return scores
def sparse_(tensor, sparsity, std=0.01): r"""Fills the 2D input `Tensor` as a sparse matrix, where the non-zero elements will be drawn from the normal distribution :math:`\mathcal{N}(0, 0.01)`, as described in "Deep learning via Hessian-free optimization" - Martens, J. (2010). Args: tensor: an n-dimensional `torch.Tensor` sparsity: The fraction of elements in each column to be set to zero std: the standard deviation of the normal distribution used to generate the non-zero values Examples: >>> w = torch.empty(3, 5) >>> nn.init.sparse_(w, sparsity=0.1) """ if tensor.ndimension() != 2: raise ValueError("Only tensors with 2 dimensions are supported") rows, cols = tensor.shape num_zeros = int(math.ceil(sparsity * rows)) with torch.no_grad(): tensor.normal_(0, std) for col_idx in range(cols): row_indices = torch.randperm(rows) zero_indices = row_indices[:num_zeros] tensor[zero_indices, col_idx] = 0 return tensor
def __call__(self, *inputs): order = th.randperm(inputs[0].dim()) outputs = [] for idx, _input in enumerate(inputs): _input = _input.index_select(0, order) outputs.append(_input) return outputs if idx > 1 else outputs[0]
def collapse_exp_1(r_feat_val, r_feat, c_feat, pred): # emd, mmd, acc_t, acc_f n_mode = c_feat.size(0) c_feat_repeat = c_feat[pred] scores = np.zeros((n_mode, 4)) t_feat = r_feat.clone() index = torch.arange(0, 2000).long() collapsed_order = torch.randperm(n_mode).long() Mxx = distance(r_feat_val, r_feat_val, sqrt=False) for i in range(n_mode): # Compute Score Mxy = distance(r_feat_val, t_feat, sqrt=False) Myy = distance(t_feat, t_feat, sqrt=False) scores[i, 0] = wasserstein(Mxy, True) scores[i, 1] = mmd(Mxx, Mxy, Myy, 1) s = knn(Mxx, Mxy, Myy, 1, True) scores[i, 2], scores[i, 3] = s.acc_t, s.acc_f # Do collapse c = collapsed_order[i] cidx = index[pred.eq(c)] t_feat[cidx] = c_feat_repeat[cidx] return scores
def pretrain(self, train_data, corrupter, tester): src, rel, dst = train_data n_train = len(src) n_epoch = self.config.n_epoch n_batch = self.config.n_batch optimizer = Adam(self.mdl.parameters(), weight_decay=self.weight_decay) best_perf = 0 for epoch in range(n_epoch): epoch_loss = 0 if epoch % self.config.sample_freq == 0: rand_idx = t.randperm(n_train) src = src[rand_idx] rel = rel[rand_idx] dst = dst[rand_idx] src_corrupted, rel_corrupted, dst_corrupted = corrupter.corrupt(src, rel, dst) src_corrupted = src_corrupted.cuda() rel_corrupted = rel_corrupted.cuda() dst_corrupted = dst_corrupted.cuda() for ss, rs, ts in batch_by_num(n_batch, src_corrupted, rel_corrupted, dst_corrupted, n_sample=n_train): self.mdl.zero_grad() label = t.zeros(len(ss)).type(t.LongTensor).cuda() loss = t.sum(self.mdl.softmax_loss(Variable(ss), Variable(rs), Variable(ts), label)) loss.backward() optimizer.step() epoch_loss += loss.data[0] logging.info('Epoch %d/%d, Loss=%f', epoch + 1, n_epoch, epoch_loss / n_train) if (epoch + 1) % self.config.epoch_per_test == 0: test_perf = tester() if test_perf > best_perf: self.save(os.path.join(config().task.dir, self.config.model_file)) best_perf = test_perf return best_perf
def val(spatial_size, Scale, precomputeStride): d = pickle.load(open('pickle/test.pickle', 'rb')) d = torchnet.dataset.ListDataset(d) randperm = torch.randperm(len(d)) def perm(idx, size): return randperm[idx] def merge(tbl): inp = scn.InputBatch(2, spatial_size) center = spatial_size.float().view(1, 2) / 2 p = torch.LongTensor(2) v = torch.FloatTensor([1, 0, 0]) for char in tbl['input']: inp.addSample() for stroke in char: stroke = stroke.float() * (Scale - 0.01) / 255 - 0.5 * (Scale - 0.01) stroke += center.expand_as(stroke) scn.dim_fn( 2, 'drawCurve')( inp.metadata.ffi, inp.features, stroke) inp.precomputeMetadata(precomputeStride) return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1} bd = torchnet.dataset.BatchDataset(d, 183, perm=perm, merge=merge) tdi = scn.threadDatasetIterator(bd) def iter(): randperm = torch.randperm(len(d)) return tdi() return iter
def test(self): if opt['model'] == 'CharCNN': X_train = self.dataset.df_train['text_parsed'].values X_test = self.dataset.df_test['text_parsed'].values else: X_train = self.dataset.df_train['ids'].values X_test = self.dataset.df_test['ids'].values Y_train = self.dataset.df_train['label'].values Y_test = self.dataset.df_test['label'].values m_train = len(X_train) permutation = torch.randperm(m_train) accuracies = [] for start_idx in range(0, m_train, opt['batch_size']): indices = permutation[start_idx:start_idx + opt['batch_size']] if opt['model'] == 'CharCNN': X_train_batch, X_train_mask_batch, Y_train_batch = self.create_batch_char(X_train, Y_train, indices) else: X_train_batch, X_train_mask_batch, Y_train_batch = self.create_batch(X_train, Y_train, indices) Y_predict = self.model(X_train_batch, X_train_mask_batch) loss = self.loss(Y_predict, Y_train_batch) accuracy, _ = self.calculate_accuracy(Y_train_batch, Y_predict) accuracies.append(accuracy) print(loss.cpu().data.numpy(), accuracy) del X_train_batch, X_train_mask_batch, Y_train_batch, Y_predict print(sum(accuracies)/len(accuracies))
def drop_exp_1(r_feat_val, r_feat_train, pred): # emd, mmd, acc_t, acc_f n_mode = len(Counter(pred)) scores = np.zeros((n_mode, 4)) t_feat = r_feat_train.clone() collapsed_order = torch.randperm(n_mode).long() index = torch.arange(0, r_feat_train.size(0)).long() collapsed = torch.zeros(r_feat_train.size(0)).byte() Mxx = distance(r_feat_val, r_feat_val, sqrt=True) for i in range(n_mode): # Compute Score Mxy = distance(r_feat_val, t_feat, sqrt=True) Myy = distance(t_feat, t_feat, sqrt=True) scores[i, 0] = wasserstein(Mxy, False) scores[i, 1] = mmd(Mxx, Mxy, Myy, 1) s = knn(Mxx, Mxy, Myy, 1, True) scores[i, 2], scores[i, 3] = s.acc_t, s.acc_f # Do drop -- fill dropped slots with remaining samples c = collapsed_order[i] collapsed[pred.eq(c)] = 1 cidx = index[collapsed.eq(1)] ncidx = index[collapsed.ne(1)] if ncidx.dim() == 0 or cidx.dim() == 0 or ncidx.size(0) == 0: continue for j in cidx: copy_idx = np.random.randint(0, ncidx.size(0)) t_feat[j] = t_feat[ncidx[copy_idx]] return scores
def __iter__(self): rand_num = torch.randperm(self.num_per_batch).view(-1,1) * self.batch_size self.rand_num = rand_num.expand(self.num_per_batch, self.batch_size) + self.range self.rand_num_view = self.rand_num.view(-1) if self.leftover_flag: self.rand_num_view = torch.cat((self.rand_num_view, self.leftover),0) return iter(self.rand_num_view)
def __call__(self, data, subsample=True): # deterministically shuffle based on epoch g = torch.Generator() g.manual_seed(self.epoch) indices = list(torch.randperm(len(data), generator=g)) if not subsample: return [data[i] for i in indices] return [data[i] for i in self.subsample(indices)]
def _shuffle_training_data(self): """ Shuffles the training data. :return: None """ num_examples = len(self.train_x) shuffled_indices = torch.randperm(num_examples) self.train_x = self.train_x[shuffled_indices] self.train_y = self.train_y[shuffled_indices]
def __iter__(self): indices = torch.randperm(self.num_samples) ret = [] for i in indices: pid = self.pids[i] t = self.index_dic[pid] if len(t) >= self.num_instances: t = np.random.choice(t, size=self.num_instances, replace=False) else: t = np.random.choice(t, size=self.num_instances, replace=True) ret.extend(t) return iter(ret)
def random_split(dataset, lengths): """ Randomly split a dataset into non-overlapping new datasets of given lengths. Arguments: dataset (Dataset): Dataset to be split lengths (sequence): lengths of splits to be produced """ if sum(lengths) != len(dataset): raise ValueError("Sum of input lengths does not equal the length of the input dataset!") indices = randperm(sum(lengths)) return [Subset(dataset, indices[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths)]
def sample(self): """ :returns: a random subsample of `range(size)` :rtype: torch.autograd.Variable of torch.LongTensor """ subsample_size = self.subsample_size if subsample_size is None or subsample_size > self.size: subsample_size = self.size if subsample_size == self.size: result = Variable(torch.LongTensor(list(range(self.size)))) else: result = Variable(torch.randperm(self.size)[:self.subsample_size]) return result.cuda() if self.use_cuda else result
def _train_on_instance_mixup(self, z, x, **kwargs): """Perform mixup in the pixel space""" self._train() x.requires_grad = True # for dnorm # Train the generator. self.optim['g'].zero_grad() alpha = self.sample_lambda(x.size(0)) fake = self.g(z) xz = Variable(alpha*x.data + (1.-alpha)*fake.data) if self.mixup_ff: perm = torch.randperm(fake.size(0)).view(-1).long() fake_perm = fake[perm] xz_ff = Variable(alpha*fake.data + (1.-alpha)*fake_perm.data) _, d_fake = self.d(fake) gen_loss = self.g_loss(d_fake) if (kwargs['iter']-1) % self.update_g_every == 0: gen_loss.backward() self.optim['g'].step() # Train the discriminator. self.optim['d'].zero_grad() _, d_xz = self.d(xz.detach()) _, d_real = self.d(x) _, d_fake = self.d(fake.detach()) d_loss = self.d_loss_fake(d_xz) + self.d_loss_real(d_real) + \ self.d_loss_fake(d_fake) if self.mixup_ff: _, d_xz_ff = self.d(xz_ff.detach()) d_loss += self.d_loss_fake(d_xz_ff) d_loss.backward() self.optim['d'].step() ################################## # Also compute the gradient norm. # Grad norm for D_REAL _, d_real = self.d(x) g_norm_x = self.grad_norm(d_real, x) if self.dnorm > 0.: self.optim['d'].zero_grad() (g_norm_x*self.dnorm).backward() self.optim['d'].step() self.optim['d'].zero_grad() ################################## losses = { 'g_loss': gen_loss.data.item(), 'd_loss': d_loss.data.item(), 'd_real_norm': g_norm_x.data.item(), } outputs = { 'x': x.detach(), 'gz': fake.detach(), } return losses, outputs
def prepare(dataset): real_idx = torch.randperm(len(dataset)).long() r_imgs = torch.stack([dataset[i][0] for i in tqdm(real_idx[:2000])], 0) r2_imgs = torch.stack([dataset[i][0] for i in tqdm(real_idx[2000:4000])], 0) kmeans = KMeans(n_clusters=50, n_jobs=12) X = r_imgs.view(2000, -1).numpy() kmeans.fit(X) centers = torch.from_numpy(kmeans.cluster_centers_).view(-1, 3, 64, 64).float() r_feat = get_features(r_imgs) r2_feat = get_features(r2_imgs) c_feat = get_features(centers) pred = distance(r_imgs, centers, False).min(1)[1].squeeze_() return r_imgs, r2_imgs, centers, r_feat, r2_feat, c_feat, pred
def __init__(self, *args, idx=None, split=.8, **kwargs): super().__init__(*args, **kwargs) self.idx = idx if idx is not None else torch.randperm(len(self)) tensors_ = [] for i in range(len(self.tensors)): if split > 0: tensors_.append( self.tensors[i][self.idx][:int(split * len(self))]) else: tensors_.append( self.tensors[i][self.idx][int(split * len(self)) - 1:]) self.tensors = tuple(tensors_)
def set_model_permutations(self): self.model_permutations = [] self.model_unpermutations = [] for n in range(1, self.N): permutation = list(range(2 ** (n - 1))) if n > 1: while permutation == list(range(2 ** (n - 1))): permutation = torch.randperm(2 ** (n - 1)).numpy().tolist() self.model_permutations.append(permutation) unpermutation = list(range(len(permutation))) for i in range(len(permutation)): unpermutation[permutation[i]] = i self.model_unpermutations.append(unpermutation)
def forward(self, x): lrt_mean = 0.0 if self.bias is not None: lrt_mean = self.bias sigma2 = self.sigma * self.sigma if self.permute_sigma: sigma2 = sigma2.view(-1)[torch.randperm(self.weight.shape).cuda()].view(self.weight.shape) lrt_std = Variable.sqrt(1e-16 + self.op_nobias(x * x, sigma2)) if self.training: eps = Variable(lrt_std.data.new(lrt_std.size()).normal_()) else: eps = 0.0 return lrt_mean + lrt_std * eps
def forward(self, x): if self.zero_mean: lrt_mean = self.op_bias(x, 0.0 * self.weight) else: lrt_mean = self.op_bias(x, self.weight) sigma2 = Variable.exp(self.log_alpha) * self.weight * self.weight if self.permute_sigma: sigma2 = sigma2.view(-1)[torch.randperm(self.weight.nelement()).cuda()].view(self.weight.shape) lrt_std = Variable.sqrt(1e-16 + self.op_nobias(x * x, sigma2)) if self.training: eps = Variable(lrt_std.data.new(lrt_std.size()).normal_()) else: eps = 0.0 return lrt_mean + lrt_std * eps
def __iter__(self): # deterministically shuffle based on epoch g = torch.Generator() g.manual_seed(self.epoch) indices = list(torch.randperm(len(self.dataset), generator=g)) # add extra samples to make it evenly divisible indices += indices[:(self.total_size - len(indices))] assert len(indices) == self.total_size # subsample offset = self.num_samples * self.rank indices = indices[offset:offset + self.num_samples] assert len(indices) == self.num_samples return iter(indices)
def shuffle(self): data = list(zip(self.src, self.tgt)) self.src, self.tgt = zip(*[data[i] for i in torch.randperm(len(data))])
# In[6]: t_c = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0] # Temperatura en grados celsios t_u = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4] # Unidades desconocidas t_c = torch.tensor(t_c).unsqueeze( 1) # Agregamos una dimension para tener B x N_inputs t_u = torch.tensor(t_u).unsqueeze( 1) # Agregamos una dimension para tener B x N_inputs n_samples = t_u.shape[0] n_val = int(0.2 * n_samples) shuffled_indices = torch.randperm(n_samples) train_indices = shuffled_indices[:-n_val] val_indices = shuffled_indices[-n_val:] train_t_u = t_u[train_indices] train_t_c = t_c[train_indices] val_t_u = t_u[val_indices] val_t_c = t_c[val_indices] train_t_un = 0.1 * train_t_u val_t_un = 0.1 * val_t_u # In[15]:
def get_data(dataset, num_bits, train=True, valid_frac=None): train_dataset = None valid_dataset = None test_dataset = None if train: assert valid_frac is not None if dataset == 'imagenet-64-fast': root = dataset_root('imagenet64_fast') c, h, w = (3, 64, 64) if train: train_dataset = data.ImageNet64Fast(root=root, train=True, download=True, transform=Preprocess(num_bits)) num_train = len(train_dataset) valid_size = int(np.floor(num_train * valid_frac)) train_size = num_train - valid_size train_dataset, valid_dataset = random_split( train_dataset, (train_size, valid_size)) else: test_dataset = data.ImageNet64Fast(root=root, train=False, download=True, transform=Preprocess(num_bits)) elif dataset == 'cifar-10-fast' or dataset == 'cifar-10': root = dataset_root('cifar-10') c, h, w = (3, 32, 32) if dataset == 'cifar-10-fast': dataset_class = data.CIFAR10Fast train_transform = tvt.Compose( [RandomHorizontalFlipTensor(), Preprocess(num_bits)]) test_transform = Preprocess(num_bits) else: dataset_class = datasets.CIFAR10 train_transform = tvt.Compose([ tvt.RandomHorizontalFlip(), tvt.ToTensor(), Preprocess(num_bits) ]) test_transform = tvt.Compose( [tvt.ToTensor(), Preprocess(num_bits)]) if train: train_dataset = dataset_class(root=root, train=True, download=True, transform=train_transform) valid_dataset = dataset_class( root=root, train=True, transform=test_transform # Note different transform. ) num_train = len(train_dataset) indices = torch.randperm(num_train).tolist() valid_size = int(np.floor(valid_frac * num_train)) train_idx, valid_idx = indices[valid_size:], indices[:valid_size] train_dataset = Subset(train_dataset, train_idx) valid_dataset = Subset(valid_dataset, valid_idx) else: test_dataset = dataset_class(root=root, train=False, download=True, transform=test_transform) elif dataset == 'imagenet-32' or dataset == 'imagenet-64': if dataset == 'imagenet-32': root = dataset_root('imagenet32') c, h, w = (3, 32, 32) dataset_class = data.ImageNet32 else: root = dataset_root('imagenet64') c, h, w = (3, 64, 64) dataset_class = data.ImageNet64 if train: train_dataset = dataset_class( root=root, train=True, download=True, transform=tvt.Compose([tvt.ToTensor(), Preprocess(num_bits)])) num_train = len(train_dataset) valid_size = int(np.floor(num_train * valid_frac)) train_size = num_train - valid_size train_dataset, valid_dataset = random_split( train_dataset, (train_size, valid_size)) else: test_dataset = dataset_class( root=root, train=False, download=True, transform=tvt.Compose([tvt.ToTensor(), Preprocess(num_bits)])) elif dataset == 'celeba-hq-64-fast': root = dataset_root('celeba_hq_64_fast') c, h, w = (3, 64, 64) train_transform = tvt.Compose( [RandomHorizontalFlipTensor(), Preprocess(num_bits)]) test_transform = Preprocess(num_bits) if train: train_dataset = data.CelebAHQ64Fast(root=root, train=True, download=True, transform=train_transform) valid_dataset = data.CelebAHQ64Fast( root=root, train=True, transform=test_transform # Note different transform. ) num_train = len(train_dataset) indices = torch.randperm(num_train).tolist() valid_size = int(np.floor(valid_frac * num_train)) train_idx, valid_idx = indices[valid_size:], indices[:valid_size] train_dataset = Subset(train_dataset, train_idx) valid_dataset = Subset(valid_dataset, valid_idx) else: test_dataset = data.CelebAHQ64Fast(root=root, train=False, download=True, transform=test_transform) elif dataset == 'mnist': root = dataset_root('mnist') c, h, w = (1, 28, 28) train_transform = tvt.Compose([tvt.ToTensor(), Preprocess(num_bits)]) test_transform = tvt.Compose([tvt.ToTensor(), Preprocess(num_bits)]) if train: train_dataset = datasets.MNIST(root=root, train=True, download=True, transform=train_transform) valid_dataset = datasets.MNIST( root=root, train=True, transform=test_transform # Note different transform. ) num_train = len(train_dataset) indices = torch.randperm(num_train).tolist() valid_size = int(np.floor(valid_frac * num_train)) train_idx, valid_idx = indices[valid_size:], indices[:valid_size] train_dataset = Subset(train_dataset, train_idx) valid_dataset = Subset(valid_dataset, valid_idx) else: test_dataset = datasets.MNIST(root=root, train=False, download=True, transform=test_transform) elif dataset == 'svhn': root = dataset_root('svhn') c, h, w = (3, 32, 32) train_transform = tvt.Compose([ tvt.ToTensor(), RandomHorizontalFlipTensor(), Preprocess(num_bits) ]) test_transform = tvt.Compose([tvt.ToTensor(), Preprocess(num_bits)]) if train: train_dataset = datasets.SVHN(root=root, split='train', download=True, transform=train_transform) valid_dataset = datasets.SVHN( root=root, split='train', transform=test_transform # Note different transform. ) num_train = len(train_dataset) indices = torch.randperm(num_train).tolist() valid_size = int(np.floor(valid_frac * num_train)) train_idx, valid_idx = indices[valid_size:], indices[:valid_size] train_dataset = Subset(train_dataset, train_idx) valid_dataset = Subset(valid_dataset, valid_idx) else: test_dataset = datasets.SVHN(root=root, split='test', download=True, transform=test_transform) else: raise RuntimeError('Unknown dataset') if train: return train_dataset, valid_dataset, (c, h, w) else: return test_dataset, (c, h, w)
def train(epoch, data): net.train().to(device) # zero the parameter gradients optimizer.zero_grad() inputs, labels = data # print(type(inputs)) inputs = torch.from_numpy(np.asarray(inputs).astype(np.float32)) permutation = torch.randperm(inputs.size()[0]) running_loss = 0 # print(inputs.size()[0]) count = 0 batch_losses = [] for batch_idx in range(0, inputs.size()[0], BATCH_SIZE): t0 = time.time() count += 1 optimizer.zero_grad() indices = permutation[batch_idx:batch_idx + BATCH_SIZE] batch_x, batch_y = inputs[indices], labels[indices] # print(batch_x.shape) batch_x = batch_x.reshape(batch_x.size()[0], 1, batch_x.size()[1], batch_x.size()[2]) # print("###### ", batch_x.shape) outputs = net(batch_x.to(device)).to(device) # print(outputs.shape) loss = criterion(outputs.to(device), batch_y.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() batch_losses.append(loss.item()) sys.stdout.write('\r') sys.stdout.write(" Train data epoch %d [%-100s] %d/%d \t Loss:%f" % (epoch, '=' * int( (batch_idx / inputs.size()[0]) * 100), batch_idx, inputs.size()[0], loss.item())) sys.stdout.flush() time.sleep(0.25) if batch_idx % inputs.size()[0] == 0: test_output = net(batch_x.to(device)).to(device) pred_y = torch.argmax(test_output, dim=1) print( float( np.array([(x == y) for x, y in zip(batch_y, pred_y) ]).astype(int).sum()) / float(batch_y.size()[0])) accuracy = float( np.array([(x == y) for x, y in zip(batch_y, pred_y) ]).astype(int).sum()) / float(batch_y.size()[0]) print( "numerateur:", float( np.array([(x == y) for x, y in zip(batch_y, pred_y) ]).astype(int).sum())) print('Epoch: ', epoch, '| train loss: %.4f' % loss.cpu().data.numpy(), '| train accuracy: %.2f' % accuracy) print("\n") print('Epoch {}, loss {}, took {} seconds'.format(epoch, loss.item(), time.time() - t0)) print("\n")
def main(): resnet = resnet34(pretrained=True, progress=True) extractor = nn.Sequential(*list(resnet.children())[:-1]).eval().cuda() resnet = list(resnet.children())[-1].eval().cuda() # Load an existing DeepMDS model or train a new one. if os.path.exists('DeepMDS/weights.pt'): with open('DeepMDS/layerSizes.pkl', 'rb') as f: layerSizes = pickle.load(f) deepMDS = DeepMDS(layerSizes) deepMDS.load_state_dict(th.load('DeepMDS/weights.pt')) deepMDS = deepMDS.cuda() else: layerSizes = (512, 256, 128, 64, 32) with open('DeepMDS/layerSizes.pkl', 'wb') as f: pickle.dump(layerSizes, f) deepMDS = DeepMDS(layerSizes) deepMDS = deepMDS.train().cuda() numLayers = len(layerSizes) epochs = 10 batchSize = 1024 lr = .005 optimizer = th.optim.Adam(deepMDS.parameters(), lr=lr) while True: X = getDataLoader(512) embeddings = [] with th.no_grad(): for x, _ in tqdm(X): x = x.cuda() embeddings.append(extractor(x).squeeze(2).squeeze(2)) X = th.cat(embeddings) # embeddings, _ = loadImageNetEmbeddings() # X = th.cuda.FloatTensor(embeddings).cuda() N = len(X) numBatches = N // batchSize for layersToTrain in range(1, numLayers + 1): print() print('Training layers less than:', layersToTrain) for epoch in range(1, epochs + 1): lossSum = 0. print('epoch:', epoch, '\tloss:', end=' ') X = X[th.randperm(N)] for b in range(numBatches): x = X[b * batchSize:(b + 1) * batchSize] activations = deepMDS.trainForward(x, layersToTrain) dist_in1 = th.norm(x[:batchSize // 2] - x[batchSize // 2:], dim=1, keepdim=True) dist_in2 = th.norm(x[0::2] - x[1::2], dim=1, keepdim=True) losses = [] for y in activations: crit = criterion(dist_in1, y[:batchSize // 2], y[batchSize // 2:]) if crit: losses += [crit] crit = criterion(dist_in2, y[0::2], y[1::2]) if crit: losses += [crit] for py, y in zip(activations, activations[1:]): din = th.norm(py[:batchSize // 2] - py[batchSize // 2:], dim=1, keepdim=True) crit = criterion(din, y[:batchSize // 2], y[batchSize // 2:]) if crit: losses += [crit] din = th.norm(py[0::2] - py[1::2], dim=1, keepdim=True) crit = criterion(din, y[0::2], y[1::2]) if crit: losses += [crit] loss = 0. if len(losses): loss = sum(losses) / len(losses) if loss: optimizer.zero_grad() loss.backward() optimizer.step() lossSum += float(loss) print(lossSum / numBatches) th.save(deepMDS.state_dict(), 'DeepMDS/weights.pt') classifier = nn.Linear(layerSizes[-1], 1000).cuda() # either train both mds and a linear layer net = nn.Sequential(deepMDS, classifier).cuda() # or just a new linear layer # net = classifier if os.path.exists('DeepMDS/classifierWeights.pt'): net.load_state_dict(th.load('DeepMDS/classifierWeights.pt')) lossF = nn.CrossEntropyLoss() # lr = .00987654321 lr = .00287654321 optimizer = th.optim.Adam(net.parameters(), lr) epochs = 20 batchSize = 256 while True: X = getDataLoader(batchSize) embeddings = [] with th.no_grad(): for x, _ in tqdm(X): x = x.cuda() embeddings.append(extractor(x).squeeze(2).squeeze(2)) X = th.cat(embeddings) del x, embeddings # embeddings, _ = loadImageNetEmbeddings() # X = th.cuda.FloatTensor(embeddings).cuda() numBatches = len(X) // batchSize for epoch in range(1, epochs): avg_loss = 0. trainacc = 0. N = X.shape[0] X = X[th.randperm(N)] for b in range(numBatches): with th.no_grad(): emb = X[b * batchSize:(b + 1) * batchSize] y = resnet(emb).argmax(axis=1) # forward # yhat = net(deepMDS(emb)) yhat = net(emb) # compute error loss = lossF(yhat, y) avg_loss += float(loss) # backprop optimizer.zero_grad() loss.backward() optimizer.step() trainacc += float( (yhat.argmax(axis=1) == y).sum()) / float(batchSize) print('epoch', epoch) print('TrainAcc:', trainacc / numBatches) th.save(net.state_dict(), 'DeepMDS/classifierWeights.pt') del loss, emb, y, yhat
def makeData(srcFile, tgtFile, train_oracle_file, train_src_rouge_file, srcDicts, tgtDicts): src, tgt = [], [] src_raw = [] src_rouge = [] oracle = [] sizes = [] count, ignored = 0, 0 logger.info('Processing %s & %s ...' % (srcFile, tgtFile)) srcF = open(srcFile, encoding='utf-8') tgtF = open(tgtFile, encoding='utf-8') oracleF = open(train_oracle_file, encoding='utf-8') src_rougeF = open(train_src_rouge_file, encoding='utf-8') while True: sline = srcF.readline() tline = tgtF.readline() oline = oracleF.readline() src_rouge_line = src_rougeF.readline() # normal end of file if sline == "" and tline == "": break # source or target does not have same number of lines if sline == "" or tline == "" or src_rouge_line == "": logger.info( 'WARNING: source and target do not have the same number of sentences' ) break sline = sline.strip() tline = tline.strip() oline = oline.strip() src_rouge_line = src_rouge_line.strip() # source and/or target are empty if sline == "" or tline == "" or ('None' in oline) or ('nan' in src_rouge_line): logger.info('WARNING: ignoring an empty line (' + str(count + 1) + ')') continue srcSents = sline.split('##SENT##')[:max_doc_len] tgtSents = tline.split('##SENT##') rouge_gains = src_rouge_line.split('\t')[1:] srcWords = [x.split(' ')[:seq_length] for x in srcSents] tgtWords = ' '.join(tgtSents) oracle_combination = make_tuple(oline.split('\t')[0]) # oracle_combination = [(x + 1) for x in oracle_combination] + [0] oracle_combination = [x for x in oracle_combination] # no sentinel index_out_of_range = [x >= max_doc_len for x in oracle_combination] if any(index_out_of_range): logger.info('WARNING: oracle exceeds max_doc_len, ignoring (' + str(count + 1) + ')') continue src_raw.append(srcSents) src.append([ srcDicts.convertToIdx(word, neusum.Constants.UNK_WORD) for word in srcWords ]) tgt.append(tgtWords) oracle.append(torch.LongTensor(oracle_combination)) rouge_gains = [[float(gain) for gain in x.split(' ')] for x in rouge_gains] # rouge_gains = [torch.FloatTensor(x) for x in rouge_gains] # rouge_gains = [(x - torch.min(x)) / (torch.max(x) - torch.min(x)) for x in rouge_gains][:1] rouge_gains = [numpy.array(x) for x in rouge_gains] rouge_gains = [(x - numpy.min(x)) / (numpy.max(x) - numpy.min(x)) for x in rouge_gains] rouge_gains = [ torch.from_numpy(np_softmax(x, norm_lambda)).float() for x in rouge_gains ] src_rouge.append(rouge_gains) sizes += [len(srcWords)] count += 1 if count % report_every == 0: logger.info('... %d sentences prepared' % count) srcF.close() tgtF.close() oracleF.close() src_rougeF.close() if shuffle == 1: logger.info('... shuffling sentences') perm = torch.randperm(len(src)) src = [src[idx] for idx in perm] src_raw = [src_raw[idx] for idx in perm] tgt = [tgt[idx] for idx in perm] oracle = [oracle[idx] for idx in perm] src_rouge = [src_rouge[idx] for idx in perm] sizes = [sizes[idx] for idx in perm] logger.info('... sorting sentences by size') _, perm = torch.sort(torch.Tensor(sizes)) src = [src[idx] for idx in perm] src_raw = [src_raw[idx] for idx in perm] tgt = [tgt[idx] for idx in perm] oracle = [oracle[idx] for idx in perm] src_rouge = [src_rouge[idx] for idx in perm] logger.info( 'Prepared %d sentences (%d ignored due to length == 0 or > %d)' % (len(src), ignored, seq_length)) return src, src_raw, tgt, oracle, src_rouge
def __call__(self, data: Union[Data, HeteroData]): edge_types = self.edge_types rev_edge_types = self.rev_edge_types train_data, val_data, test_data = copy(data), copy(data), copy(data) if isinstance(data, HeteroData): if edge_types is None: raise ValueError( "The 'RandomLinkSplit' transform expects 'edge_types' to" "be specified when operating on 'HeteroData' objects") if not isinstance(edge_types, list): edge_types = [edge_types] rev_edge_types = [rev_edge_types] stores = [data[edge_type] for edge_type in edge_types] train_stores = [train_data[edge_type] for edge_type in edge_types] val_stores = [val_data[edge_type] for edge_type in edge_types] test_stores = [test_data[edge_type] for edge_type in edge_types] else: rev_edge_types = [None] stores = [data._store] train_stores = [train_data._store] val_stores = [val_data._store] test_stores = [test_data._store] for item in zip(stores, train_stores, val_stores, test_stores, rev_edge_types): store, train_store, val_store, test_store, rev_edge_type = item is_undirected = self.is_undirected is_undirected &= not store.is_bipartite() is_undirected &= (rev_edge_type is None or store._key == data[rev_edge_type]._key) edge_index = store.edge_index if is_undirected: mask = edge_index[0] <= edge_index[1] perm = mask.nonzero(as_tuple=False).view(-1) perm = perm[torch.randperm(perm.size(0), device=perm.device)] else: device = edge_index.device perm = torch.randperm(edge_index.size(1), device=device) num_val = self.num_val if isinstance(num_val, float): num_val = int(num_val * perm.numel()) num_test = self.num_test if isinstance(num_test, float): num_test = int(num_test * perm.numel()) num_train = perm.numel() - num_val - num_test if num_train <= 0: raise ValueError("Insufficient number of edges for training") train_edges = perm[:num_train] val_edges = perm[num_train:num_train + num_val] test_edges = perm[num_train + num_val:] train_val_edges = perm[:num_train + num_val] num_disjoint = self.disjoint_train_ratio if isinstance(num_disjoint, float): num_disjoint = int(num_disjoint * train_edges.numel()) if num_train - num_disjoint <= 0: raise ValueError("Insufficient number of edges for training") # Create data splits: self._split(train_store, train_edges[num_disjoint:], is_undirected, rev_edge_type) self._split(val_store, train_edges, is_undirected, rev_edge_type) self._split(test_store, train_val_edges, is_undirected, rev_edge_type) # Create negative samples: num_neg_train = 0 if self.add_negative_train_samples: if num_disjoint > 0: num_neg_train = int(num_disjoint * self.neg_sampling_ratio) else: num_neg_train = int(num_train * self.neg_sampling_ratio) num_neg_val = int(num_val * self.neg_sampling_ratio) num_neg_test = int(num_test * self.neg_sampling_ratio) num_neg = num_neg_train + num_neg_val + num_neg_test size = store.size() if store._key is None or store._key[0] == store._key[-1]: size = size[0] neg_edge_index = negative_sampling(edge_index, size, num_neg_samples=num_neg, method='sparse') # Create labels: if num_disjoint > 0: train_edges = train_edges[:num_disjoint] self._create_label( store, train_edges, neg_edge_index[:, num_neg_val + num_neg_test:], out=train_store, ) self._create_label( store, val_edges, neg_edge_index[:, :num_neg_val], out=val_store, ) self._create_label( store, test_edges, neg_edge_index[:, num_neg_val:num_neg_val + num_neg_test], out=test_store, ) return train_data, val_data, test_data
def main(): parser = argparse.ArgumentParser( description="OGBL-Citation2 (Cluster-GCN)") parser.add_argument("--device", type=int, default=0) parser.add_argument("--log_steps", type=int, default=1) parser.add_argument("--num_partitions", type=int, default=15000) parser.add_argument("--num_workers", type=int, default=12) parser.add_argument("--num_layers", type=int, default=3) parser.add_argument("--hidden_channels", type=int, default=256) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--batch_size", type=int, default=256) parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--epochs", type=int, default=200) parser.add_argument("--eval_steps", type=int, default=10) parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() print(args) device = f"cuda:{args.device}" if torch.cuda.is_available() else "cpu" device = torch.device(device) dataset = PygLinkPropPredDataset(name="ogbl-citation2") split_edge = dataset.get_edge_split() data = dataset[0] data.edge_index = to_undirected(data.edge_index, data.num_nodes) cluster_data = ClusterData( data, num_parts=args.num_partitions, recursive=False, save_dir=dataset.processed_dir, ) loader = ClusterLoader( cluster_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, ) # We randomly pick some training samples that we want to evaluate on: torch.manual_seed(12345) idx = torch.randperm(split_edge["train"]["source_node"].numel())[:86596] split_edge["eval_train"] = { "source_node": split_edge["train"]["source_node"][idx], "target_node": split_edge["train"]["target_node"][idx], "target_node_neg": split_edge["valid"]["target_node_neg"], } model = GCN( data.x.size(-1), args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout, ).to(device) predictor = LinkPredictor(args.hidden_channels, args.hidden_channels, 1, args.num_layers, args.dropout).to(device) evaluator = Evaluator(name="ogbl-citation2") logger = Logger(args.runs, args) for run in range(args.runs): model.reset_parameters() predictor.reset_parameters() optimizer = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters()), lr=args.lr) for epoch in range(1, 1 + args.epochs): loss = train(model, predictor, loader, optimizer, device) print(f"Run: {run + 1:02d}, Epoch: {epoch:02d}, Loss: {loss:.4f}") if epoch > 49 and epoch % args.eval_steps == 0: result = test( model, predictor, data, split_edge, evaluator, batch_size=64 * 1024, device=device, ) logger.add_result(run, result) train_mrr, valid_mrr, test_mrr = result print(f"Run: {run + 1:02d}, " f"Epoch: {epoch:02d}, " f"Loss: {loss:.4f}, " f"Train: {train_mrr:.4f}, " f"Valid: {valid_mrr:.4f}, " f"Test: {test_mrr:.4f}") print("ClusterGCN") logger.print_statistics(run) print("ClusterGCN") logger.print_statistics()
# shape = (64, 64, 4) # denoise_model = Models.get_denoise_model(shape) # ===================================== Experiment 6 ===================================== # Data used for training and testing are contained into an external hard disk 'Seagate Expansion Drive' Inputs = np.load( '/media/federico/Seagate Expansion Drive/DataProject/EDS_Data/10_diffShapesBEST/Params_DataSet.npy' ) Labels = np.load( '/media/federico/Seagate Expansion Drive/DataProject/EDS_Data/10_diffShapesBEST/Labels_DataSet.npy' ) # Generate random train and test dataset split = 0.75 random_indices_poly = torch.randperm(len(Inputs)) train_split_poly = int(split * len(Inputs)) train_random_indices_poly = random_indices_poly[:train_split_poly] test_random_indices_poly = random_indices_poly[train_split_poly:] # Class for loading Polygons sequence from a sequence folder denoise_generator_poly = GP.DenoiseHPatchesPoly_Exp6( random_indices_poly=train_random_indices_poly, inputs=Inputs, labels=Labels, batch_size=50) denoise_generator_val_poly = GP.DenoiseHPatchesPoly_Exp6( random_indices_poly=test_random_indices_poly, inputs=Inputs, labels=Labels, batch_size=50)
def makeData(srcFile, tgtFile, srcDicts, tgtDicts): src, tgt = [], [] sizes = [] count, ignored = 0, 0 print('Processing %s & %s ...' % (srcFile, tgtFile)) srcF = codecs.open(srcFile, "r", "utf-8") tgtF = codecs.open(tgtFile, "r", "utf-8") while True: sline = srcF.readline() tline = tgtF.readline() # normal end of file if sline == "" and tline == "": break # source or target does not have same number of lines if sline == "" or tline == "": print( 'WARNING: source and target do not have the same number of sentences' ) break sline = sline.strip() tline = tline.strip() # source and/or target are empty if sline == "" or tline == "": print('WARNING: ignoring an empty line (' + str(count + 1) + ')') continue srcWords = sline.split() tgtWords = tline.split() if len(srcWords) <= opt.seq_length and len(tgtWords) <= opt.seq_length: srcTensor, sunky = srcDicts.convertToIdx(srcWords, onmt.Constants.UNK_WORD) tgtTensor, tunky = tgtDicts.convertToIdx(tgtWords, onmt.Constants.UNK_WORD, onmt.Constants.BOS_WORD, onmt.Constants.EOS_WORD) if (not sunky and not tunky) or not opt.remove_unk: src += [srcTensor] tgt += [tgtTensor] sizes += [len(srcWords)] else: ignored += 1 else: ignored += 1 count += 1 if count % opt.report_every == 0: print('... %d sentences prepared' % count) srcF.close() tgtF.close() if opt.shuffle == 1: print('... shuffling sentences') perm = torch.randperm(len(src)) src = [src[idx] for idx in perm] tgt = [tgt[idx] for idx in perm] sizes = [sizes[idx] for idx in perm] print('... sorting sentences by size') _, perm = torch.sort(torch.Tensor(sizes)) src = [src[idx] for idx in perm] tgt = [tgt[idx] for idx in perm] print('Prepared %d sentences (%d ignored due to length == 0 or > %d)' % (len(src), ignored, opt.seq_length)) return src, tgt
def __iter__(self): return iter(th.randperm(self.num_samples).long())
def Reset(self): self.unuse_index = torch.randperm(self.num_train_sample).tolist()
def train(**options): logs_dir = os.path.join(options['logdir'], options['name']) writer = SummaryWriter(log_dir=logs_dir) f_train_x = h5py.File(os.path.join(options['data_path'], 'train_x.hdf5'), 'r') X_train = f_train_x['train_x'][:] f_train_x.close() f_train_y = h5py.File(os.path.join(options['data_path'], 'train_y.hdf5'), 'r') y_train = f_train_y['train_y'][:] f_train_y.close() f_val_x = h5py.File(os.path.join(options['data_path'], 'val_x.hdf5'), 'r') X_val = f_val_x['val_x'][:] f_val_x.close() f_val_y = h5py.File(os.path.join(options['data_path'], 'val_y.hdf5'), 'r') y_val = f_val_y['val_y'][:] f_val_y.close() # Define datasets train_dataset = ClassifierDataset( torch.from_numpy(X_train).float(), torch.from_numpy(y_train).long()) val_dataset = ClassifierDataset( torch.from_numpy(X_val).float(), torch.from_numpy(y_val).long()) if not options['under']: target_list = [] for _, t in train_dataset: target_list.append(t) target_list = torch.tensor(target_list) target_list = target_list[torch.randperm(len(target_list))] class_count = [i for i in get_class_distribution(y_train).values()] class_weights = 1. / torch.tensor(class_count, dtype=torch.float) print(class_weights) class_weights_all = class_weights[target_list] weighted_sampler = WeightedRandomSampler( weights=class_weights_all, num_samples=len(class_weights_all), replacement=True) # Run on CUDA if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) model = AuctionNet() # Check if should load checkpoints if options['checkpoints']: model.load_state_dict(torch.load(options['checkpoints'])) model.to(device) # Define data loaders if not options['under']: train_loader = DataLoader(dataset=train_dataset, num_workers=12, batch_size=options['batch_size'], sampler=weighted_sampler) else: train_loader = DataLoader(dataset=train_dataset, num_workers=12, batch_size=options['batch_size'], shuffle=True) val_loader = DataLoader(dataset=val_dataset, num_workers=12, batch_size=options['batch_size']) if not options['under']: criterion = torch.nn.CrossEntropyLoss(weight=class_weights.to(device)) else: criterion = torch.nn.CrossEntropyLoss() # optimizer = torch.optim.AdamW(model.parameters(), lr=options['lr'], weight_decay=1e-6, amsgrad=True) optimizer = torch.optim.AdamW(model.parameters()) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=10, verbose=True) accuracy_stats = {'train': [], "val": []} loss_stats = {'train': [], "val": []} print("Begin training.") for e in tqdm(range(1, options['num_epochs'] + 1)): # TRAINING train_epoch_loss = 0 train_epoch_acc = 0 model.train() for X_train_batch, y_train_batch in train_loader: X_train_batch, y_train_batch = X_train_batch.to( device), y_train_batch.to(device) optimizer.zero_grad() y_train_pred = model(X_train_batch) train_loss = criterion(y_train_pred, y_train_batch) train_acc = multi_acc(y_train_pred, y_train_batch) train_loss.backward() optimizer.step() train_epoch_loss += train_loss.item() train_epoch_acc += train_acc.item() # VALIDATION with torch.no_grad(): val_epoch_loss = 0 val_epoch_acc = 0 model.eval() for X_val_batch, y_val_batch in val_loader: X_val_batch, y_val_batch = X_val_batch.to( device), y_val_batch.to(device) y_val_pred = model(X_val_batch) val_loss = criterion(y_val_pred, y_val_batch) val_acc = multi_acc(y_val_pred, y_val_batch) val_epoch_loss += val_loss.item() val_epoch_acc += val_acc.item() val_epoch_loss /= len(val_loader) val_epoch_acc /= len(val_loader) train_epoch_loss /= len(train_loader) train_epoch_acc /= len(train_loader) scheduler.step(val_epoch_loss) loss_stats['train'].append(train_epoch_loss) loss_stats['val'].append(val_epoch_loss) accuracy_stats['train'].append(train_epoch_acc) accuracy_stats['val'].append(val_epoch_acc) writer.add_scalar("Test-loss-avg", val_epoch_loss, global_step=e) writer.add_scalar("Train-loss-avg", train_epoch_loss, global_step=e) writer.add_scalar("Test-accuracy", val_epoch_acc, global_step=e) writer.add_scalar("Train-accuracy", train_epoch_acc, global_step=e) print(f'Epoch {e + 0:03}: | Train Loss: {train_epoch_loss:.5f} | ' f'Val Loss: {val_epoch_loss:.5f} | ' f'Train Acc: {train_epoch_acc:.3f}| ' f'Val Acc: {val_epoch_acc:.3f}') torch.save( model.state_dict(), '/home/wingman2/models/pref/varijanta_under_2m_1/auction' + str(e) + ".pt")
def get_zipped_dataloaders( data_path: str, batch_size: int, num_worker=1, use_valid=False) -> (DataLoader, DataLoader, DataLoader): """ Returns dataloader instances of the ZippedDataset for training, validation and testing. data_path -- Path to the zip-file for the ZippedDataset batch_size -- Amount of samples contained per returned tensor of the dataset Keyword arguments: num_worker -- Used for the DataSet class, should be 1 or results in runtime errors use_valid -- If True, the validation and test set are seperate DataSets (default False) """ train_transforms = transforms.Compose([ transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) train_set = ZippedDataset(os.path.join(data_path, 'index-train.zip'), os.path.join(data_path, 'index-train.txt'), transform=train_transforms) val_set = ZippedDataset(os.path.join(data_path, 'index-val.zip'), os.path.join(data_path, 'index-val.txt')) if use_valid: num_sample_valid = int(len(train_set) * 0.1) train_set_index = torch.randperm(len(train_set)) train_loader = DataLoader(train_set, sampler=torch.utils.data.SubsetRandomSampler( train_set_index[:-num_sample_valid]), batch_size=batch_size, num_workers=num_worker, pin_memory=True) val_loader = DataLoader(train_set, sampler=torch.utils.data.SubsetRandomSampler( train_set_index[-num_sample_valid:]), batch_size=batch_size, num_workers=num_worker, pin_memory=True) test_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, pin_memory=True) else: train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, pin_memory=True) val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True, pin_memory=True) test_loader = val_loader return train_loader, val_loader, test_loader
def process_train_MAM_data(spec, config=None): """Process training data for the masked acoustic model""" dr = config['downsample_rate'] if config is not None else DR hidden_size = config['hidden_size'] if config is not None else HIDDEN_SIZE mask_proportion = config[ 'mask_proportion'] if config is not None else MASK_PROPORTION mask_consecutive = config[ 'mask_consecutive'] if config is not None else MASK_CONSECUTIVE with torch.no_grad(): if len( spec ) == 2: # if self.duo_feature: dataloader will output `source_spec` and `target_spec` source_spec = spec[0] target_spec = spec[1] elif len(spec) == 1: source_spec = spec[0] target_spec = copy.deepcopy(spec[0]) else: raise NotImplementedError( 'Input spec sould be either (spec,) or (target_spec, source_spec), where `spec` has shape BxTxD.' ) # Down sample spec_masked = down_sample_frames( source_spec, dr) # (batch_size, seq_len, mel_dim * dr) spec_stacked = down_sample_frames( target_spec, dr) # (batch_size, seq_len, mel_dim * dr) assert (spec_masked.shape[1] == spec_stacked.shape[1] ), 'Input and output spectrogram should have the same shape' # Record length for each uttr spec_len = np.sum(np.sum(spec_stacked.data.numpy(), axis=-1) != 0, axis=-1) spec_len = [int(sl) for sl in spec_len] batch_size = spec_stacked.shape[0] seq_len = spec_stacked.shape[1] pos_enc = position_encoding( seq_len, hidden_size, batch_size) # (batch_size, seq_len, hidden_size) mask_label = np.zeros_like(spec_stacked) attn_mask = np.ones((batch_size, seq_len)) # (batch_size, seq_len) for idx in range(len(spec_stacked)): # determine whether to mask / random / or do nothing to the frame dice = torch.rand(1).data.cpu() valid_index_range = int( spec_len[idx] - mask_consecutive - 1) # compute valid len for consecutive masking proportion = int(spec_len[idx] * mask_proportion // mask_consecutive) chosen_index = torch.randperm(valid_index_range).data.cpu().numpy( )[: proportion] # draw `proportion` samples from the range (0, valid_index_range) and without replacement # mask to zero if bool(dice < 0.8): for i in range(mask_consecutive): spec_masked[idx][chosen_index + i] = 0 # replace to random frames elif bool(dice >= 0.8) and bool(dice < 0.9): random_index = torch.randperm( valid_index_range).data.cpu().numpy()[:proportion] for i in range(mask_consecutive): spec_masked[idx][chosen_index + i] = spec_masked[idx][random_index + i] # do nothing else: pass # the gradients will be calculated on all chosen frames mask_label[idx][chosen_index] = 1 # zero vectors for padding dimension pos_enc[idx][spec_len[idx]:] = 0 attn_mask[idx][spec_len[idx]:] = 0 spec_masked = spec_masked.to(dtype=torch.float32) pos_enc = torch.FloatTensor(pos_enc).to(dtype=torch.float32) mask_label = torch.ByteTensor(mask_label).to(dtype=torch.uint8) attn_mask = torch.FloatTensor(attn_mask).to(dtype=torch.float32) spec_stacked = spec_stacked.to(dtype=torch.float32) return spec_masked, pos_enc, mask_label, attn_mask, spec_stacked
einet.eval() train_ll = EinsumNetwork.eval_loglikelihood_batched(einet, train_x, batch_size=batch_size) valid_ll = EinsumNetwork.eval_loglikelihood_batched(einet, valid_x, batch_size=batch_size) test_ll = EinsumNetwork.eval_loglikelihood_batched(einet, test_x, batch_size=batch_size) print("[{}] train LL {} valid LL {} test LL {}".format( epoch_count, train_ll / train_N, valid_ll / valid_N, test_ll / test_N)) einet.train() ##### idx_batches = torch.randperm(train_N, device=device).split(batch_size) total_ll = 0.0 for idx in idx_batches: batch_x = train_x[idx, :] outputs = einet.forward(batch_x) ll_sample = EinsumNetwork.log_likelihoods(outputs) log_likelihood = ll_sample.sum() log_likelihood.backward() einet.em_process_batch() total_ll += log_likelihood.detach().item() einet.em_update() if fashion_mnist:
def swd(image1, image2, n_pyramids=None, slice_size=7, n_descriptors=128, n_repeat_projection=128, proj_per_repeat=4, device="cpu", return_by_resolution=False, pyramid_batchsize=128): # n_repeat_projectton * proj_per_repeat = 512 # Please change these values according to memory usage. # original = n_repeat_projection=4, proj_per_repeat=128 assert image1.size() == image2.size() assert image1.ndim == 4 and image2.ndim == 4 if n_pyramids is None: n_pyramids = int(np.rint(np.log2(image1.size(2) // 16))) with torch.no_grad(): # minibatch laplacian pyramid for cuda memory reasons pyramid1 = minibatch_laplacian_pyramid(image1, n_pyramids, pyramid_batchsize, device=device) pyramid2 = minibatch_laplacian_pyramid(image2, n_pyramids, pyramid_batchsize, device=device) result = [] for i_pyramid in range(n_pyramids + 1): # indices n = (pyramid1[i_pyramid].size(2) - 6) * (pyramid1[i_pyramid].size(3) - 6) indices = torch.randperm(n)[:n_descriptors] # extract patches on CPU # patch : 2rank (n_image*n_descriptors, slice_size**2*ch) p1 = extract_patches(pyramid1[i_pyramid], indices, slice_size=slice_size, device="cpu") p2 = extract_patches(pyramid2[i_pyramid], indices, slice_size=slice_size, device="cpu") p1, p2 = p1.to(device), p2.to(device) distances = [] for j in range(n_repeat_projection): # random rand = torch.randn(p1.size(1), proj_per_repeat).to( device) # (slice_size**2*ch) rand = rand / torch.std(rand, dim=0, keepdim=True) # noramlize # projection proj1 = torch.matmul(p1, rand) proj2 = torch.matmul(p2, rand) proj1, _ = torch.sort(proj1, dim=0) proj2, _ = torch.sort(proj2, dim=0) d = torch.abs(proj1 - proj2) distances.append(torch.mean(d)) # swd result.append(torch.mean(torch.stack(distances))) # average over resolution result = torch.stack(result) * 1e3 if return_by_resolution: return result.cpu() else: return torch.mean(result).cpu()
def add_whole_word_mask(self, source, p): is_word_start = self.word_starts(source) num_to_mask = int(math.ceil(is_word_start.float().sum() * p)) num_inserts = 0 if num_to_mask == 0: return source if self.mask_span_distribution is not None: lengths = self.mask_span_distribution.sample(sample_shape=(num_to_mask,)) # Make sure we have enough to mask cum_length = torch.cumsum(lengths, 0) while cum_length[-1] < num_to_mask: lengths = torch.cat([lengths, self.mask_span_distribution.sample(sample_shape=(num_to_mask,))], dim=0) cum_length = torch.cumsum(lengths, 0) # Trim to masking budget i = 0 while cum_length[i] < num_to_mask: i += 1 lengths[i] = num_to_mask - (0 if i == 0 else cum_length[i - 1]) num_to_mask = i + 1 lengths = lengths[:num_to_mask] # Handle 0-length mask (inserts) separately lengths = lengths[lengths > 0] num_inserts = num_to_mask - lengths.size(0) num_to_mask -= num_inserts if num_to_mask == 0: return self.add_insertion_noise(source, num_inserts / source.size(0)) assert (lengths > 0).all() else: lengths = torch.ones((num_to_mask,)).long() assert is_word_start[-1] == 0 word_starts = is_word_start.nonzero() indices = word_starts[torch.randperm(word_starts.size(0))[:num_to_mask]].squeeze(1) mask_random = torch.FloatTensor(num_to_mask).uniform_() < self.random_ratio source_length = source.size(0) assert source_length - 1 not in indices to_keep = torch.ones(source_length, dtype=torch.bool) is_word_start[-1] = 255 # acts as a long length, so spans don't go over the end of doc if self.replace_length == 0: to_keep[indices] = 0 else: # keep index, but replace it with [MASK] source[indices] = self.mask_idx source[indices[mask_random]] = torch.randint(1, len(self.vocab), size=(mask_random.sum(),)) if self.mask_span_distribution is not None: assert len(lengths.size()) == 1 assert lengths.size() == indices.size() lengths -= 1 while indices.size(0) > 0: assert lengths.size() == indices.size() lengths -= is_word_start[indices + 1].long() uncompleted = lengths >= 0 indices = indices[uncompleted] + 1 mask_random = mask_random[uncompleted] lengths = lengths[uncompleted] if self.replace_length != -1: # delete token to_keep[indices] = 0 else: # keep index, but replace it with [MASK] source[indices] = self.mask_idx source[indices[mask_random]] = torch.randint(1, len(self.vocab), size=(mask_random.sum(),)) else: # A bit faster when all lengths are 1 while indices.size(0) > 0: uncompleted = is_word_start[indices + 1] == 0 indices = indices[uncompleted] + 1 mask_random = mask_random[uncompleted] if self.replace_length != -1: # delete token to_keep[indices] = 0 else: # keep index, but replace it with [MASK] source[indices] = self.mask_idx source[indices[mask_random]] = torch.randint(1, len(self.vocab), size=(mask_random.sum(),)) assert source_length - 1 not in indices source = source[to_keep] if num_inserts > 0: source = self.add_insertion_noise(source, num_inserts / source.size(0)) return source
def add_permuted_noise(self, tokens, p): num_words = len(tokens) num_to_permute = math.ceil(((num_words * 2) * p) / 2.0) substitutions = torch.randperm(num_words - 2)[:num_to_permute] + 1 tokens[substitutions] = tokens[substitutions[torch.randperm(num_to_permute)]] return tokens
def get_indices_for_classes(data, data_classes): # Creates a list of indices of samples from the dataset, corresponding to given classes indices = torch.FloatTensor( list((data.tensors[1].long() == class_).tolist() for class_ in data_classes)).sum(0).nonzero().long().squeeze() return indices[torch.randperm(len(indices))]
def train_func_MI(params, lstm, large, fc, domain_fc, criterion, optimizer, raw_train_set, train_set, teach_lstm=None, teach_fc=None): train_loss = 0 domain_loss = 0 output_cluster_loss = 0 mmd = MMD_loss() if teach_lstm != None: raw_set = pred_label(raw_train_set[1], teach_lstm, teach_fc, params.num_to_return) centers = get_centers(train_set + raw_set, lstm) #+ raw_set, lstm) anchor = Anchor1(params.embed_size, params.nclass) data = [] for train in raw_train_set: data.append( DataLoader(train, batch_size=params.batch_size, shuffle=True, collate_fn=generate_batch)) enu_data = [] for d in data[0]: enu_data.append(d) data[0] = enu_data enu_data = [] for d in data[1]: enu_data.append(d) data[1] = enu_data source_labeled_data = DataLoader(train_set, batch_size=params.text_batch_size, shuffle=True, collate_fn=generate_batch) enu_data = [] for d in source_labeled_data: enu_data.append(d) source_labeled_data = enu_data if teach_lstm != None: raw_set_batch = int( float(len(raw_set)) * params.text_batch_size / len(train_set)) raw_set = DataLoader(raw_set, batch_size=raw_set_batch, shuffle=True, collate_fn=generate_batch) enu_data = [] for d in raw_set: enu_data.append(d) raw_set = enu_data else: raw_set = source_labeled_data random.shuffle(source_labeled_data) random.shuffle(data[0]) random.shuffle(data[1]) random.shuffle(raw_set) print(len(data[0])) print(len(data[1])) print(len(source_labeled_data)) print(len(raw_set)) for (text1, cls1), (text2, cls2), (text3, cls3), (text4, cls4) in zip( data[0], data[1], source_labeled_data, raw_set): batch_ul, N_, dim_ = text1.size() batch_l, _, _ = text3.size() optimizer.zero_grad() text1, cls1 = text1.to(device), cls1.to(device) text2, cls2 = text2.to(device), cls2.to(device) text3, cls3 = text3.to(device), cls3.to(device) text4, cls4 = text4.to(device), cls4.to(device) z1 = lstm(text1) #(batch, hidden_size) z2 = lstm(text2) z3 = lstm(text3) z4 = lstm(text4) # 1. cluster, 2. pesudo loss if teach_lstm != None and params.cluster_lamda >= 0: output_z4 = anchor(z4, centers, cls4) #pred_t_ul) output_z3 = anchor(z3, centers, cls3) cluster_loss = output_z4 + output_z3 #+ anchor(z2, centers_t, pred_t_ul) + anchor(z3, centers_s, cls3) #+= criterion(output_z3, cls3) output_cluster_loss += cluster_loss if params.pseudo_t_lamda >= 0: output_z4_super = fc(z4) pseudo_t_loss = criterion(output_z4_super, cls4) #pred_t_ul) text1 = torch.sum(text1, dim=1).view(batch_ul, dim_) text2 = torch.sum(text2, dim=1).view(batch_ul, dim_) text3 = torch.sum(text3, dim=1).view(batch_l, dim_) neg_n = params.neg_n # I (x1, z1) + I (x1, z2) cos = nn.CosineSimilarity(dim=1, eps=1e-6) # I (x1, z1) z_ave_1 = text1 z_z_ave_1_score = cos(large[0](z1), z_ave_1).view(-1, 1) z_z_ave_1_shuffle_score = [] z_ave_2 = text2 z_z_ave_2_score = cos(large[0](z2), z_ave_2).view(-1, 1) z_z_ave_2_shuffle_score = [] local_loss = 0 if params.mi_lamda_t != 0 and params.mi_lamda_s != 0: for i in range(neg_n): r = torch.randperm(z1.size(0)) z_z_ave_1_shuffle_score.append( cos(large[0](z1), z_ave_1[r]).view( -1, 1)) #(global_dis(z, z_ave[r]).view(-1,1)) r = torch.randperm(z2.size(0)) z_z_ave_2_shuffle_score.append( cos(large[0](z2), z_ave_2[r]).view(-1, 1)) local_loss += -torch.mean( z_z_ave_1_score - z_z_ave_1_shuffle_score[i] ) * params.mi_lamda_s - torch.mean( z_z_ave_2_score - z_z_ave_2_shuffle_score[i]) * params.mi_lamda_t elif params.mi_lamda_t == 0 and params.mi_lamda_s != 0: for i in range(neg_n): r = torch.randperm(z1.size(0)) z_z_ave_1_shuffle_score.append( cos(large[0](z1), z_ave_1[r]).view( -1, 1)) #(global_dis(z, z_ave[r]).view(-1,1)) local_loss += -torch.mean( z_z_ave_1_score - z_z_ave_1_shuffle_score[i]) * params.mi_lamda_s elif params.mi_lamda_t != 0 and params.mi_lamda_s == 0: for i in range(neg_n): r = torch.randperm(z2.size(0)) z_z_ave_2_shuffle_score.append( cos(large[0](z2), z_ave_2[r]).view(-1, 1)) ll = (z_z_ave_2_score - z_z_ave_2_shuffle_score[i]) local_loss += -torch.mean(ll) * params.mi_lamda_t if params.lamda != 0: if params.domain_mode == 'kl': z_s = torch.mean(z1, dim=0).view(-1) z_s = F.softmax(z_s, -1) z_t = torch.mean(z2, dim=0).view(-1) z_t = F.softmax(z_t, -1) div_loss = torch.nn.KLDivLoss(size_average=True)(z_s.log(), z_t)\ + torch.nn.KLDivLoss(size_average=True)(z_t.log(), z_s) elif params.domain_mode == 'mmd': z_s = z1 z_t = z2 div_loss = mmd(z_s, z_t) elif params.domain_mode == 'adv': z_s = grad_reverse(z1) z_t = grad_reverse(z2) domain_output_s = domain_fc(z_s) domain_output_t = domain_fc(z_t) div_loss = criterion(domain_output_s, cls1) + criterion( domain_output_t, cls2) else: div_loss = 0 ### supervised loss output = fc(z3) super_loss = criterion(output, cls3) if teach_lstm == None: whole_loss = local_loss + params.lamda * div_loss + super_loss #+ entropy_loss * params.entropy_lamda #+ whole_contract_loss * params.contract_lamda else: whole_loss = local_loss + params.lamda * div_loss + super_loss + pseudo_t_loss * params.pseudo_t_lamda + cluster_loss * params.cluster_lamda if params.mi_lamda_t == 0 and params.mi_lamda_s == 0: train_loss = 0 else: train_loss += local_loss.item( ) #(local_loss.item() - params.alpha * loss_d.item()) if params.lamda == 0: domain_loss = 0 else: domain_loss += div_loss.item() whole_loss.backward() optimizer.step() if teach_lstm != None: print('cluster_loss: %lf' % output_cluster_loss.item()) return train_loss / len(train_set), domain_loss / len(train_set)
def train(args, train_dataset, model, tokenizer): global extracted_grads """ Train the model """ if args.local_rank in [-1, 0]: tb_writer = SummaryWriter() if args.mix_option == 1: logger.info("Random Mixup") else: logger.info("No Mixup") args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) processor = processors[args.task_name]() attacker = get_attacker(args.attacker) train_dataloader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True) if args.max_steps > 0: t_total = args.max_steps args.num_train_epochs = args.max_steps // ( len(train_dataloader) // args.gradient_accumulation_steps) + 1 else: t_total = len( train_dataloader ) // args.gradient_accumulation_steps * args.num_train_epochs # Prepare optimizer and schedule (linear warmup and decay) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0 }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total) # Check if saved optimizer or scheduler states exist if os.path.isfile(os.path.join( args.model_name_or_path, "optimizer.pt")) and os.path.isfile( os.path.join(args.model_name_or_path, "scheduler.pt")): # Load in optimizer and scheduler states optimizer.load_state_dict( torch.load(os.path.join(args.model_name_or_path, "optimizer.pt"))) scheduler.load_state_dict( torch.load(os.path.join(args.model_name_or_path, "scheduler.pt"))) if args.fp16: try: from apex import amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) # multi-gpu training (should be after apex fp16 initialization) if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Distributed training (should be after apex fp16 initialization) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True, ) # Train! logger.info("***** Running training *****") logger.info(" Num Epochs = %d", args.num_train_epochs) logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) logger.info( " Total train batch size (w. parallel, distributed & accumulation) = %d", args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1), ) logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) logger.info(" Total optimization steps = %d", t_total) global_step = 0 epochs_trained = 0 steps_trained_in_current_epoch = 0 tr_loss, logging_loss = 0.0, 0.0 model.zero_grad() train_iterator = trange( epochs_trained, int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0], ) set_seed(args) # Added here for reproductibility ## Add Mixup in Batch epoch = 0 for _ in train_iterator: epoch += 1 if epoch > 1 and args.iterative: ## augment the current train dataset with new batch of adversarial exampels generated by the currect model orig_data = load_custom_dataset(os.path.join( args.data_dir, "train.tsv"), all_data=True, number=args.num_adv) clsf = ModelClassifier(tokenizer, model, args) attack_eval = OpenAttack.attack_evals.DefaultAttackEval( attacker, clsf, progress_bar=True) adv_egs = attack_eval.eval(orig_data, visualize=False, return_examples=True) adv_examples = processor._create_examples(adv_egs, "adv_train") logger.info( "Epoch: {}, Number of adversarial examples added to training: {}" .format(epoch, len(adv_examples))) adv_dataset = convert_examples_dataset(args, adv_examples, tokenizer) train_dataset = ConcatDataset([train_dataset, adv_dataset]) ## start training on augmented data (we will shuffle the training data) # train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset) train_dataloader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True) logger.info("Current Num examples = %d", len(train_dataset)) epoch_iterator = train_dataloader for step, batch in enumerate(epoch_iterator): # Skip past any already trained steps if resuming training if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue model.train() batch = tuple(t.to(args.device) for t in batch) ## normal training ## for now, just ignore token type ids input_ids = batch[0] #(bsz, len) attention_mask = batch[1] batch_size = input_ids.size(0) length = input_ids.size(1) labels = batch[3] #(bsz,) logits, outputs = model(input_ids, attention_mask) #(bsz, num_labels) # x_embeddings = outputs[2] # (bsz, len, dim) # x_embeddings.register_hook(save_grad("x_emb")) # logger.info("#outputs 1: " + str(len(outputs[-1]))) L_ori = nn.CrossEntropyLoss()(logits.view(-1, args.num_labels), labels.view(-1)) ## RandomMix if args.mix_option == 1: idx = torch.randperm(batch_size) input_ids_2 = input_ids[idx] labels_2 = labels[idx] attention_mask_2 = attention_mask[idx] ## convert the labels to one-hot labels = torch.zeros(batch_size, args.num_labels).to(args.device).scatter_( 1, labels.view(-1, 1), 1) labels_2 = torch.zeros(batch_size, args.num_labels).to( args.device).scatter_(1, labels_2.view(-1, 1), 1) l = np.random.beta(args.alpha, args.alpha) # l = max(l, 1-l) ## not needed when only using labeled examples mixed_labels = l * labels + (1 - l) * labels_2 mix_layer = np.random.choice(args.mix_layers_set, 1)[0] mix_layer = mix_layer - 1 logits, outputs = model(input_ids, attention_mask, input_ids_2, attention_mask_2, l, mix_layer) probs = torch.softmax(logits, dim=1) #(bsz, num_labels) L_mix = F.kl_div(probs.log(), mixed_labels, None, None, 'batchmean') loss = L_ori + L_mix else: loss = L_ori if args.n_gpu > 1: loss = loss.mean( ) # mean() to average on multi-gpu parallel training if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps tr_loss += loss.item() if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), args.max_grad_norm) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 if args.local_rank in [ -1, 0 ] and args.logging_steps > 0 and global_step % args.logging_steps == 0: logs = {} if ( args.local_rank == -1 and args.evaluate_during_training ): # Only evaluate when single GPU otherwise metrics may not average well results = evaluate(args, model, tokenizer) for key, value in results.items(): eval_key = "eval_{}".format(key) logs[eval_key] = value loss_scalar = (tr_loss - logging_loss) / args.logging_steps learning_rate_scalar = scheduler.get_lr()[0] logs["learning_rate"] = learning_rate_scalar logs["loss"] = loss_scalar logging_loss = tr_loss for key, value in logs.items(): tb_writer.add_scalar(key, value, global_step) # print(json.dumps({**logs, **{"step": global_step}})) logging.info("Global Step: " + str(global_step)) logging.info("Loss: " + str(loss_scalar)) if args.max_steps > 0 and global_step > args.max_steps: train_iterator.close() break ## save the final epoch only if args.local_rank in [-1, 0]: # Save model checkpoint output_dir = os.path.join(args.output_dir, "final-checkpoint") if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = (model.module if hasattr(model, "module") else model ) # Take care of distributed/parallel training model_to_save.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) torch.save(args, os.path.join(output_dir, "training_args.bin")) logger.info("Saving model checkpoint to %s", output_dir) torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt")) torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt")) logger.info("Saving optimizer and scheduler states to %s", output_dir) if args.local_rank in [-1, 0]: tb_writer.close() return global_step, tr_loss / global_step
def reset_permutation(self): perm = len(self.data_source) if self.shuffle: perm = torch.randperm(perm) self._perm = perm.tolist()
def __iter__(self): for i in range(self.n_episodes): yield torch.randperm(self.n_classes)[:self.n_way]
# Set device DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("Using", DEVICE) # Load previous model and loss history model, hyperparameters, loss_history = RecurrentTemporalPrediction.load( args.path, DEVICE) # Set epochs with passed argument print("Loaded model from", args.path) # Shuffled weights weights = model.rnn.weight_hh_l0 shape = weights.shape weights = weights.reshape(-1) weights = weights[torch.randperm(weights.shape[0])] weights_ = np.random.normal(loc=torch.mean(weights).item(), scale=torch.std(weights).item(), size=(1500, 1500)) model.rnn.weight_hh_l0 = torch.nn.Parameter(torch.Tensor(weights_)) # Get data loader for test data data_loader = model.data_loader([args.dataset], split='all') print("Loaded dataset from", args.dataset) # Save history here test_history = { 'loss': [], 'MSE_1': [], 'MSE_2': [], 'L1': [],
print("Epoch %d/%d" % (t + 1, args.epochs)) data_iterator = read_data_tensors(args.dataset_path, args.wv_path, batch_size=args.batch_size, maxlen=args.maxlen) for item_number, (x, texts) in enumerate(data_iterator): x = torch.from_numpy(x) # extracting bad samples from the very same batch; not sure if this is OK, so todo negative_samples = torch.stack( tuple([ x[torch.randperm(x.shape[0])[:args.neg_samples]] for _ in range(args.batch_size) ])) # prediction y_pred = model(x, negative_samples) # error computation loss = criterion(y_pred, y) optimizer.zero_grad() loss.backward() optimizer.step() # scheduler.step(epoch=t) if item_number % 1000 == 0:
def forward(self, x, adj, *args): B, S, C, H, W = x.size() x = x.view(B * S, C, H, W) x4_1, x4_2 = self.featuremaps(x) _, c, h, w = x4_1.shape # global branch x4_1 = x4_1.view(B, S, c, h, w).transpose(1, 2).contiguous() g_f = self.global_avg_pool(x4_1).view(B, -1) g_bn = self.global_bottleneck(g_f) # attention branch v_f = list() for idx, n in enumerate(self.total_split_list): v_f.append(self.parts_avgpool[idx](x4_2).view(B, S, c, n)) v_f = torch.cat(v_f, dim=3) f = v_f.transpose(2, 3).contiguous().view(B, S * self.total_split, c) # graph propagation for i in range(self.num_gb): f = self.graph_layers[i](f, adj) f = f.view(B, S, self.total_split, c) f_fuse = self._attention_op(f) att_f = f_fuse.mean(dim=1).view(B, -1) att_bn = self.att_bottleneck(att_f) if not self.training: return torch.cat([g_bn, att_bn], dim=1) g_out = self.global_classifier(g_bn) att_out = self.att_classifier(att_bn) # consistent if self.consistent_loss and self.training: satt_f_list = list() satt_out_list = list() # random select sub frames assert S >= 5 for num_frame in [S-3, S-2, S-1]: sub_index = torch.randperm(S)[:num_frame] sub_index = torch.sort(sub_index)[0] sub_index = sub_index.long().to(f.device) sf = torch.gather(f, dim=1, index=sub_index.view(1, num_frame, 1, 1).repeat(B, 1, self.total_split, c)) sf_fuse = self._attention_op(sf) satt_f = sf_fuse.mean(dim=1).view(B, -1) satt_bn = self.att_bottleneck(satt_f) satt_out = self.att_classifier(satt_bn) satt_f_list.append(satt_f) satt_out_list.append(satt_out) if self.loss == {'xent'}: out_list = [g_out, att_out] if self.consistent_loss: out_list.extend(satt_out_list) return out_list elif self.loss == {'xent', 'htri'}: out_list = [g_out, att_out] f_list = [g_f, att_f] if self.consistent_loss: out_list.extend(satt_out_list) f_list.extend(satt_f_list) return out_list, f_list else: raise KeyError('Unsupported loss: {}'.format(self.loss))
def recurrent_generator(self, advantages, num_mini_batch): num_processes = self.rewards.size(1) assert num_processes >= num_mini_batch, ( "PPO requires the number of processes ({}) " "to be greater than or equal to the number of " "PPO mini batches ({}).".format(num_processes, num_mini_batch) ) num_envs_per_batch = num_processes // num_mini_batch perm = torch.randperm(num_processes) for start_ind in range(0, num_processes, num_envs_per_batch): observations_batch = defaultdict(list) recurrent_hidden_states_batch = [] actions_batch = [] value_preds_batch = [] return_batch = [] masks_batch = [] old_action_log_probs_batch = [] adv_targ = [] for offset in range(num_envs_per_batch): ind = perm[start_ind + offset] for sensor in self.observations: observations_batch[sensor].append( self.observations[sensor][:-1, ind] ) recurrent_hidden_states_batch.append( self.recurrent_hidden_states[0:1, ind] ) actions_batch.append(self.actions[:, ind]) value_preds_batch.append(self.value_preds[:-1, ind]) return_batch.append(self.returns[:-1, ind]) masks_batch.append(self.masks[:-1, ind]) old_action_log_probs_batch.append( self.action_log_probs[:, ind] ) adv_targ.append(advantages[:, ind]) T, N = self.num_steps, num_envs_per_batch # These are all tensors of size (T, N, -1) for sensor in observations_batch: observations_batch[sensor] = torch.stack( observations_batch[sensor], 1 ) actions_batch = torch.stack(actions_batch, 1) value_preds_batch = torch.stack(value_preds_batch, 1) return_batch = torch.stack(return_batch, 1) masks_batch = torch.stack(masks_batch, 1) old_action_log_probs_batch = torch.stack( old_action_log_probs_batch, 1 ) adv_targ = torch.stack(adv_targ, 1) # States is just a (N, -1) tensor recurrent_hidden_states_batch = torch.stack( recurrent_hidden_states_batch, 1 ).view(N, -1) # Flatten the (T, N, ...) tensors to (T * N, ...) for sensor in observations_batch: observations_batch[sensor] = _flatten_helper( T, N, observations_batch[sensor] ) actions_batch = _flatten_helper(T, N, actions_batch) value_preds_batch = _flatten_helper(T, N, value_preds_batch) return_batch = _flatten_helper(T, N, return_batch) masks_batch = _flatten_helper(T, N, masks_batch) old_action_log_probs_batch = _flatten_helper( T, N, old_action_log_probs_batch ) adv_targ = _flatten_helper(T, N, adv_targ) yield ( observations_batch, recurrent_hidden_states_batch, actions_batch, value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ, )
from analysis import cnn from generate import scatter1d import torch import matplotlib.pyplot as plt from torchsummary import summary BATCH_SIZE = 50 data, target = scatter1d.gen_data(2000) data = torch.log10(data) ind = torch.randperm(data.size(2)) data2 = data[:,:,ind] # plt.scatter(data[0,0], data[0,1], s=2) # plt.show() target = torch.log10(target) model = cnn.Net().cuda() optim = torch.optim.Adam(model.parameters(), lr = 1e-4) summary(model, input_size=(2, 450), batch_size=BATCH_SIZE) cnn.train(model, optim, data, target, batch_size=BATCH_SIZE, validation_size=200, epochs=50)
def __iter__(self): if sequential: return iter(self.indices) else: return iter(self.indices[x] for x in torch.randperm(len(self.indices)).long())