def main(args, net=None): datadir = get_data_dir(args.db) outputdir = get_output_dir(args.db) use_cuda = torch.cuda.is_available() # Set the seed for reproducing the results random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if use_cuda: torch.cuda.manual_seed_all(args.manualSeed) torch.backends.cudnn.enabled = True cudnn.benchmark = True kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {} trainset = DCCPT_data(root=datadir, train=True, h5=args.h5) testset = DCCPT_data(root=datadir, train=False, h5=args.h5) # load from checkpoint if we're not given an external net load_checkpoint = True if net is None else False if net is None: net = dp.load_predefined_extract_net(args) totalset = torch.utils.data.ConcatDataset([trainset, testset]) dataloader = torch.utils.data.DataLoader(totalset, batch_size=100, shuffle=False, **kwargs) # copying model params from checkpoint if load_checkpoint: filename = os.path.join(outputdir, args.torchmodel) if os.path.isfile(filename): print("==> loading params from checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) net.load_state_dict(checkpoint['state_dict']) else: print("==> no checkpoint found at '{}'".format(filename)) raise ValueError if use_cuda: net.cuda() print('Extracting features ...') features, features_dr, labels = extract(dataloader, net, use_cuda) print('Done.\n') feat_path = os.path.join(datadir, args.feat) if args.h5: import h5py fo = h5py.File(feat_path + '.h5', 'w') fo.create_dataset('labels', data=labels) fo.create_dataset('Z', data=np.squeeze(features_dr)) fo.create_dataset('data', data=np.squeeze(features)) fo.close() else: fo = open(feat_path + '.pkl', 'wb') pickle.dump({'labels': labels, 'Z': np.squeeze(features_dr), 'data': np.squeeze(features)}, fo, protocol=2) fo.close() return features, features_dr, labels
def __edit_lua_cb(self, widget): import shutil path = os.path.join(GLib.get_user_config_dir(), "ibus", "libpinyin") os.path.exists(path) or os.makedirs(path) path = os.path.join(path, "user.lua") if not os.path.exists(path): src = os.path.join(config.get_data_dir(), "user.lua") shutil.copyfile(src, path) os.system("xdg-open %s" % path)
def main(args): datadir = get_data_dir(args.db) outputdir = get_output_dir(args.db) logger = None if args.tensorboard: # One should create folder for storing logs loggin_dir = os.path.join(outputdir, 'runs', 'pretraining') if not os.path.exists(loggin_dir): os.makedirs(loggin_dir) loggin_dir = os.path.join(loggin_dir, '%s' % (args.id)) if args.clean_log: remove_files_in_dir(loggin_dir) logger = Logger(loggin_dir) use_cuda = torch.cuda.is_available() # Set the seed for reproducing the results random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if use_cuda: torch.cuda.manual_seed_all(args.manualSeed) torch.backends.cudnn.enabled = True cudnn.benchmark = True kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {} trainset = DCCPT_data(root=datadir, train=True, h5=args.h5) testset = DCCPT_data(root=datadir, train=False, h5=args.h5) nepoch = int( np.ceil( np.array(args.niter * args.batchsize, dtype=float) / len(trainset))) step = int( np.ceil( np.array(args.step * args.batchsize, dtype=float) / len(trainset))) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batchsize, shuffle=True, **kwargs) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=True, **kwargs) return pretrain( args, outputdir, { 'nlayers': 4, 'dropout': 0.2, 'reluslope': 0.0, 'nepoch': nepoch, 'lrate': [args.lr], 'wdecay': [0.0], 'step': step }, use_cuda, trainloader, testloader, logger)
def compressed_data(dataset, n_samples, k, preprocess=None, algo='mknn', isPCA=None, format='mat'): datadir = get_data_dir(dataset) if format == 'pkl': labels, features = load_train_and_validation(load_data, datadir, n_samples) elif format == 'h5': labels, features = load_train_and_validation(load_data_h5py, datadir, n_samples) else: labels, features = load_train_and_validation(load_matdata, datadir, n_samples) features = feature_transformation(features, preprocessing=preprocess) # PCA is computed for Text dataset. Please refer RCC paper for exact details. features1 = features.copy() if isPCA is not None: pca = PCA(n_components=isPCA, svd_solver='full').fit(features) features1 = pca.transform(features) t0 = time() if algo == 'knn': weights = kNN(features1, k=k, measure='euclidean') else: weights = mkNN(features1, k=k, measure='cosine') print('The time taken for edge set computation is {}'.format(time() - t0)) filepath = os.path.join(datadir, 'pretrained') if format == 'h5': import h5py fo = h5py.File(filepath + '.h5', 'w') fo.create_dataset('X', data=features) fo.create_dataset('w', data=weights[:, :2]) fo.create_dataset('gtlabels', data=labels) fo.close() else: sio.savemat(filepath + '.mat', mdict={ 'X': features, 'w': weights[:, :2], 'gtlabels': labels })
def main(args): datadir = get_data_dir(args.db) featurefile = os.path.join(datadir, args.feat) graphfile = os.path.join(datadir, args.g) outputfile = os.path.join(datadir, args.out) if os.path.isfile(featurefile) and os.path.isfile(graphfile): if args.h5: data0 = h5py.File(featurefile, 'r') data1 = h5py.File(graphfile, 'r') data2 = h5py.File(outputfile + '.h5', 'w') else: fo = open(featurefile, 'rb') data0 = pickle.load(fo) data1 = sio.loadmat(graphfile) fo.close() x0 = data0['data'][:].astype(np.float32).reshape( (len(data0['labels'][:]), -1)) x1 = data1['X'][:].astype(np.float32).reshape( (len(data1['gtlabels'].T), -1)) a, b = np.where(x0 - x1) assert not a.size joined_data = { 'gtlabels': data0['labels'][:], 'X': data0['data'][:].astype(np.float32), 'Z': data0['Z'][:].astype(np.float32), 'w': data1['w'][:].astype(np.float32) } if args.h5: data2.create_dataset('gtlabels', data=data0['labels'][:]) data2.create_dataset('X', data=data0['data'][:].astype(np.float32)) data2.create_dataset('Z', data=data0['Z'][:].astype(np.float32)) data2.create_dataset('w', data=data1['w'][:].astype(np.float32)) data0.close() data1.close() data2.close() else: sio.savemat(outputfile + '.mat', joined_data) return joined_data else: print('one or both the files not found') raise FileNotFoundError
def makeDCCinp(args): # pretrained.mat or pretrained.h5 must be placed under the ../data/"db"/ directory. "db" stands for dataset datadir = get_data_dir(args.db) datafile = 'pretrained' if args.h5: datafile = os.path.join(datadir, datafile + '.h5') else: datafile = os.path.join(datadir, datafile + '.mat') assert os.path.exists( datafile), 'Training data not found at `{:s}`'.format(datafile) if args.h5: import h5py raw_data = h5py.File(datafile, 'r') else: raw_data = sio.loadmat(datafile, mat_dtype=True) data = raw_data['X'][:].astype(np.float32) Z = raw_data['Z'][:].astype(np.float32) # correct special case where Z is N x 1 and it gets loaded as 1 x N if Z.shape[0] == 1: Z = np.transpose(Z) labels = np.squeeze(raw_data['gtlabels'][:]) pairs = raw_data['w'][:, :2].astype(int) if args.h5: raw_data.close() print('\n Loaded `{:s}` dataset for finetuning'.format(args.db)) numpairs = pairs.shape[0] numsamples = data.shape[0] # Creating pairwise weights and individual sample sample for reconstruction loss term R = csr_matrix( (np.ones(numpairs, dtype=np.float32), (pairs[:, 0], pairs[:, 1])), shape=(numsamples, numsamples)) R = R + R.transpose() nconn = np.squeeze(np.array(np.sum(R, 1))) weights = np.average(nconn) / np.sqrt( nconn[pairs[:, 0]] * nconn[pairs[:, 1]]) pairs = np.hstack((pairs, np.atleast_2d(weights).transpose())) return data, labels, pairs, Z, nconn
def __init_user_data(self): #page User Data self.__page_user_data.show() self.__frame_lua_script = self.__builder.get_object("frameLuaScript") path = os.path.join(config.get_data_dir(), 'user.lua') if not os.access(path, os.R_OK): self.__frame_lua_script.hide() self.__edit_lua = self.__builder.get_object("EditLua") self.__edit_lua.connect("clicked", self.__edit_lua_cb) self.__import_dictionary = self.__builder.get_object("ImportDictionary") self.__import_dictionary.connect("clicked", self.__import_dictionary_cb) self.__clear_user_data = self.__builder.get_object("ClearUserData") self.__clear_user_data.connect("clicked", self.__clear_user_data_cb, "user") self.__clear_all_data = self.__builder.get_object("ClearAllData") self.__clear_all_data.connect("clicked", self.__clear_user_data_cb, "all")
def test_retval(self): chk = RetValChecker() exp = Explorer(chk) bugs = exp.explore_parallel(config.get_data_dir("return-value")) assert (len(bugs) == 1)
def test_missing_unlock(self): chk = CausalityChecker() exp = Explorer(chk) bugs = exp.explore_parallel(config.get_data_dir("missing-unlock")) assert (len(bugs) == 1)
def main(args, net=None): global oldassignment datadir = get_data_dir(args.db) outputdir = get_output_dir(args.db) logger = None if args.tensorboard: # One should create folder for storing logs loggin_dir = os.path.join(outputdir, 'runs', 'DCC') if not os.path.exists(loggin_dir): os.makedirs(loggin_dir) loggin_dir = os.path.join(loggin_dir, '%s' % (args.id)) if args.clean_log: remove_files_in_dir(loggin_dir) logger = Logger(loggin_dir) use_cuda = torch.cuda.is_available() # Set the seed for reproducing the results random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if use_cuda: torch.cuda.manual_seed_all(args.manualSeed) torch.backends.cudnn.enabled = True cudnn.benchmark = True startepoch = 0 kwargs = {'num_workers': 5, 'pin_memory': True} if use_cuda else {} # setting up dataset specific objects trainset = DCCPT_data(root=datadir, train=True, h5=args.h5) testset = DCCPT_data(root=datadir, train=False, h5=args.h5) numeval = len(trainset) + len(testset) # extracting training data from the pretrained.mat file data, labels, pairs, Z, sampweight = makeDCCinp(args) # For simplicity, I have created placeholder for each datasets and model load_pretraining = True if net is None else False if net is None: net = dp.load_predefined_extract_net(args) # reshaping data for some datasets if args.db == 'cmnist': data = data.reshape((-1, 1, 28, 28)) elif args.db == 'ccoil100': data = data.reshape((-1, 3, 128, 128)) elif args.db == 'cytf': data = data.reshape((-1, 3, 55, 55)) elif args.db == 'cyale': data = data.reshape((-1, 1, 168, 192)) totalset = torch.utils.data.ConcatDataset([trainset, testset]) # computing and initializing the hyperparams _sigma1, _sigma2, _lambda, _delta, _delta1, _delta2, lmdb, lmdb_data = computeHyperParams(pairs, Z) oldassignment = np.zeros(len(pairs)) stopping_threshold = int(math.ceil(cfg.STOPPING_CRITERION * float(len(pairs)))) # Create dataset and random batch sampler for Finetuning stage trainset = DCCFT_data(pairs, data, sampweight) batch_sampler = DCCSampler(trainset, shuffle=True, batch_size=args.batchsize) # copying model params from Pretrained (SDAE) weights file if load_pretraining: load_weights(args, outputdir, net) # creating objects for loss functions, U's are initialized to Z here # Criterion1 corresponds to reconstruction loss criterion1 = DCCWeightedELoss(size_average=True) # Criterion2 corresponds to sum of pairwise and data loss terms criterion2 = DCCLoss(Z.shape[0], Z.shape[1], Z, size_average=True) if use_cuda: net.cuda() criterion1 = criterion1.cuda() criterion2 = criterion2.cuda() # setting up data loader for training and testing phase trainloader = torch.utils.data.DataLoader(trainset, batch_sampler=batch_sampler, **kwargs) testloader = torch.utils.data.DataLoader(totalset, batch_size=args.batchsize, shuffle=False, **kwargs) # setting up optimizer - the bias params should have twice the learning rate w.r.t. weights params bias_params = filter(lambda x: ('bias' in x[0]), net.named_parameters()) bias_params = list(map(lambda x: x[1], bias_params)) nonbias_params = filter(lambda x: ('bias' not in x[0]), net.named_parameters()) nonbias_params = list(map(lambda x: x[1], nonbias_params)) optimizer = optim.Adam([{'params': bias_params, 'lr': 2*args.lr}, {'params': nonbias_params}, {'params': criterion2.parameters(), 'lr': args.lr}, ], lr=args.lr, betas=(0.99, 0.999)) # this is needed for WARM START if args.resume: filename = outputdir+'/FTcheckpoint_%d.pth.tar' % args.level if os.path.isfile(filename): print("==> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) net.load_state_dict(checkpoint['state_dict']) criterion2.load_state_dict(checkpoint['criterion_state_dict']) startepoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) _sigma1 = checkpoint['sigma1'] _sigma2 = checkpoint['sigma2'] _lambda = checkpoint['lambda'] _delta = checkpoint['delta'] _delta1 = checkpoint['delta1'] _delta2 = checkpoint['delta2'] else: print("==> no checkpoint found at '{}'".format(filename)) raise ValueError # This is the actual Algorithm flag = 0 for epoch in range(startepoch, args.nepoch): if logger: logger.log_value('sigma1', _sigma1, epoch) logger.log_value('sigma2', _sigma2, epoch) logger.log_value('lambda', _lambda, epoch) train(trainloader, net, optimizer, criterion1, criterion2, epoch, use_cuda, _sigma1, _sigma2, _lambda, logger) Z, U, change_in_assign, assignment = test(testloader, net, criterion2, epoch, use_cuda, _delta, pairs, numeval, flag, logger) if flag: # As long as the change in label assignment < threshold, DCC continues to run. # Note: This condition is always met in the very first epoch after the flag is set. # This false criterion is overwritten by checking for the condition twice. if change_in_assign > stopping_threshold: flag += 1 if flag == 4: break if((epoch+1) % args.M == 0): _sigma1 = max(_delta1, _sigma1 / 2) _sigma2 = max(_delta2, _sigma2 / 2) if _sigma2 == _delta2 and flag == 0: # Start checking for stopping criterion flag = 1 # Save checkpoint index = (epoch // args.M) * args.M save_checkpoint({'epoch': epoch+1, 'state_dict': net.state_dict(), 'criterion_state_dict': criterion2.state_dict(), 'optimizer': optimizer.state_dict(), 'sigma1': _sigma1, 'sigma2': _sigma2, 'lambda': _lambda, 'delta': _delta, 'delta1': _delta1, 'delta2': _delta2, }, index, filename=outputdir) output = {'Z': Z, 'U': U, 'gtlabels': labels, 'w': pairs, 'cluster':assignment} sio.savemat(os.path.join(outputdir, 'features'), output)
def test_SSL(self): chk = CondChecker() exp = Explorer(chk) bugs = exp.explore_parallel(config.get_data_dir("SSL")) assert (len(bugs) == 2) # (X, Y), (Y, X)
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--data', dest='db', type=str, default='mnist', help='name of the dataset') args = parser.parse_args() np.random.seed(cfg.RNG_SEED) random.seed(cfg.RNG_SEED) datadir = get_data_dir(args.db) strpath = osp.join(datadir, 'traindata.mat') if not os.path.exists(strpath): if args.db == 'mnist': make_mnist_data(datadir) elif args.db == 'reuters': make_reuters_data(datadir, 10000) elif args.db == 'ytf': make_misc_data(datadir, 'YTFrgb.pkl', [55, 55, 3]) elif args.db == 'coil100': make_misc_data(datadir, 'coil100rgb.pkl', [128, 128, 3]) elif args.db == 'yale': make_misc_data(datadir, 'yale_DoG.pkl', [168, 192, 1]) elif args.db == 'rcv1': make_misc_data(datadir, 'reuters.pkl', [1, 1, 2000])
import extract_feature import copyGraph import DCC class IdentityNet(nn.Module): """Substitute for the autoencoder for visualization and debugging just the clustering part""" def __init__(self): super(IdentityNet, self).__init__() def forward(self, x): # internal encoding is x and output is also just x return x, x datadir = get_data_dir(dp.easy.name) N = 600 # first create the data X, labels = make_data.make_easy_visual_data(datadir, N) # visualize data # we know there are 3 classes for c in range(3): x = X[labels == c, :] plt.scatter(x[:, 0], x[:, 1], label=str(c)) plt.legend() plt.show() # then construct mkNN graph k = 50
def main(): global args args = parser.parse_args() datadir = get_data_dir(args.db) outputdir = get_output_dir(args.db) use_cuda = torch.cuda.is_available() # Set the seed for reproducing the results random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if use_cuda: torch.cuda.manual_seed_all(args.manualSeed) torch.backends.cudnn.enabled = True cudnn.benchmark = True reluslope = 0.0 kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {} trainset = DCCPT_data(root=datadir, train=True, h5=args.h5) testset = DCCPT_data(root=datadir, train=False, h5=args.h5) if args.db == 'mnist': net = extract_sdae_mnist(slope=reluslope, dim=args.dim) elif args.db == 'reuters' or args.db == 'reuters10k' or args.db == 'rcv1': net = extract_sdae_reuters(slope=reluslope, dim=args.dim) elif args.db == 'ytf': net = extract_sdae_ytf(slope=reluslope, dim=args.dim) elif args.db == 'coil100': net = extract_sdae_coil100(slope=reluslope, dim=args.dim) elif args.db == 'yale': net = extract_sdae_yale(slope=reluslope, dim=args.dim) elif args.db == 'cmnist': net = extract_convsdae_mnist(slope=reluslope) elif args.db == 'ccoil100': net = extract_convsdae_coil100(slope=reluslope) elif args.db == 'cytf': net = extract_convsdae_ytf(slope=reluslope) elif args.db == 'cyale': net = extract_convsdae_yale(slope=reluslope) totalset = torch.utils.data.ConcatDataset([trainset, testset]) dataloader = torch.utils.data.DataLoader(totalset, batch_size=100, shuffle=False, **kwargs) # copying model params from checkpoint filename = os.path.join(outputdir, args.torchmodel) if os.path.isfile(filename): print("==> loading params from checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) net.load_state_dict(checkpoint['state_dict']) else: print("==> no checkpoint found at '{}'".format(filename)) raise if use_cuda: net.cuda() print('Extracting features ...') features, features_dr, labels = extract(dataloader, net, use_cuda) print('Done.\n') feat_path = os.path.join(datadir, args.feat) if args.h5: import h5py fo = h5py.File(feat_path + '.h5', 'w') fo.create_dataset('labels', data=labels) fo.create_dataset('Z', data=np.squeeze(features_dr)) fo.create_dataset('data', data=np.squeeze(features)) fo.close() else: fo = open(feat_path + '.pkl', 'wb') cPickle.dump({'labels': labels, 'Z': np.squeeze(features_dr), 'data': np.squeeze(features)}, fo, protocol=2) fo.close()
def test_intovfl(self): chk = IntOvflChecker() exp = Explorer(chk) bugs = exp.explore_parallel(config.get_data_dir("integer-overflow")) assert (len(bugs) == 1)
def test_FSB(self): chk = FSBChecker() exp = Explorer(chk) bugs = exp.explore_parallel(config.get_data_dir("format-string-bug")) assert (len(bugs) == 1)
def test_memleak(self): chk = CausalityChecker() exp = Explorer(chk) bugs = exp.explore_parallel(config.get_data_dir("memory-leak")) assert (len(bugs) == 1)
def test_arg(self): chk = ArgChecker() exp = Explorer(chk) bugs = exp.explore_parallel(config.get_data_dir("argument")) assert (len(bugs) == 1)
def main(): global args, oldassignment args = parser.parse_args() datadir = get_data_dir(args.db) outputdir = get_output_dir(args.db) if args.tensorboard: # One should create folder for storing logs loggin_dir = os.path.join(outputdir, 'runs', 'DCC') if not os.path.exists(loggin_dir): os.makedirs(loggin_dir) configure(os.path.join(loggin_dir, '%s' % (args.id))) use_cuda = torch.cuda.is_available() # Set the seed for reproducing the results random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if use_cuda: torch.cuda.manual_seed_all(args.manualSeed) torch.backends.cudnn.enabled = True cudnn.benchmark = True reluslope = 0.0 startepoch = 0 kwargs = {'num_workers': 5, 'pin_memory': True} if use_cuda else {} # setting up dataset specific objects trainset = DCCPT_data(root=datadir, train=True, h5=args.h5) testset = DCCPT_data(root=datadir, train=False, h5=args.h5) numeval = len(trainset) + len(testset) # For simplicity, I have created placeholder for each datasets and model if args.db == 'mnist': net_s = extract_sdae_mnist(slope=reluslope, dim=args.dim) net_z = extract_sdae_mnist(slope=reluslope, dim=args.dim) else: print("db not supported: '{}'".format(args.db)) raise totalset = torch.utils.data.ConcatDataset([trainset, testset]) # extracting training data from the pretrained.mat file data, labels, pairs, Z, sampweight = makeDCCinp(args) # computing and initializing the hyperparams _sigma1, _sigma2, _lambda, _delta, _delta1, _delta2, lmdb, lmdb_data = computeHyperParams(pairs, Z, args.step) oldassignment = np.zeros(len(pairs)) stopping_threshold = int(math.ceil(cfg.STOPPING_CRITERION * float(len(pairs)))) # Create dataset and random batch sampler for Finetuning stage trainset = DCCFT_data(pairs, data, sampweight) batch_sampler = DCCSampler(trainset, shuffle=True, batch_size=args.batchsize) # setting up data loader for training and testing phase trainloader = torch.utils.data.DataLoader(trainset, batch_sampler=batch_sampler, **kwargs) testloader = torch.utils.data.DataLoader(totalset, batch_size=args.batchsize, shuffle=False, **kwargs) if args.step == 1: pretraining_filename = os.path.join(outputdir, args.torchmodel_pretraining) if os.path.isfile(pretraining_filename): print("==> loading params from pretraining checkpoint '{}'".format(pretraining_filename)) pretraining_checkpoint = torch.load(pretraining_filename) else: print("==> no pretraining checkpoint found at '{}'".format(pretraining_filename)) raise # setting up optimizer - the bias params should have twice the learning rate w.r.t. weights params bias_params = filter(lambda x: ('bias' in x[0]), net_s.named_parameters()) bias_params = list(map(lambda x: x[1], bias_params)) nonbias_params = filter(lambda x: ('bias' not in x[0]), net_s.named_parameters()) nonbias_params = list(map(lambda x: x[1], nonbias_params)) # copying model params from Pretrained (SDAE) weights file net_s.load_state_dict(pretraining_checkpoint['state_dict']) criterion_sc = DCCLoss(Z.shape[0], Z.shape[1], Z, size_average=True) optimizer_sc = optim.Adam([{'params': bias_params, 'lr': 2*args.lr}, {'params': nonbias_params}, {'params': criterion_sc.parameters(), 'lr': args.lr}, ], lr=args.lr, betas=(0.99, 0.999)) criterion_rec = DCCWeightedELoss(size_average=True) # OLD if use_cuda: net_s.cuda() criterion_sc = criterion_sc.cuda() criterion_rec = criterion_rec.cuda() # this is needed for WARM START if args.resume: filename = outputdir+'/FTcheckpoint_%d.pth.tar' % args.level if os.path.isfile(filename): print("==> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) net_s.load_state_dict(checkpoint['state_dict_s']) criterion_sc.load_state_dict(checkpoint['criterion_state_dict_sc']) startepoch = checkpoint['epoch'] optimizer_sc.load_state_dict(checkpoint['optimizer_sc']) _sigma1 = checkpoint['sigma1'] _sigma2 = checkpoint['sigma2'] _lambda = checkpoint['lambda'] _delta = checkpoint['delta'] _delta1 = checkpoint['delta1'] _delta2 = checkpoint['delta2'] else: print("==> no checkpoint found at '{}'".format(filename)) raise # This is the actual Algorithm flag = 0 for epoch in range(startepoch, args.nepoch): print('sigma1', _sigma1, epoch) print('sigma2', _sigma2, epoch) print('lambda', _lambda, epoch) if args.tensorboard: log_value('sigma1', _sigma1, epoch) log_value('sigma2', _sigma2, epoch) log_value('lambda', _lambda, epoch) train_step_1(trainloader, net_s, optimizer_sc, criterion_rec, criterion_sc, epoch, use_cuda, _sigma1, _sigma2, _lambda) Z, U, change_in_assign, assignment = test(testloader, net_s, criterion_sc, epoch, use_cuda, _delta, pairs, numeval, flag) if flag: # As long as the change in label assignment < threshold, DCC continues to run. # Note: This condition is always met in the very first epoch after the flag is set. # This false criterion is overwritten by checking for the condition twice. if change_in_assign > stopping_threshold: flag += 1 if((epoch+1) % args.M == 0): _sigma1 = max(_delta1, _sigma1 / 2) _sigma2 = max(_delta2, _sigma2 / 2) if _sigma2 == _delta2 and flag == 0: # Start checking for stopping criterion flag = 1 # Save checkpoint index = (epoch // args.M) * args.M save_checkpoint({'epoch': epoch+1, 'state_dict_s': net_s.state_dict(), 'criterion_state_dict_sc': criterion_sc.state_dict(), 'optimizer_sc': optimizer_sc.state_dict(), 'sigma1': _sigma1, 'sigma2': _sigma2, 'lambda': _lambda, 'delta': _delta, 'delta1': _delta1, 'delta2': _delta2, }, index, filename=outputdir) sio.savemat(os.path.join(outputdir, 'features_s'), {'Z': Z, 'U': U, 'gtlabels': labels, 'w': pairs, 'cluster':assignment}) elif args.step == 2: filename = os.path.join(outputdir, args.torchmodel) if os.path.isfile(filename): print("==> loading params from checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) else: print("==> no checkpoint found at '{}'".format(filename)) raise # copying model params of s encoder from step 1 net_s.load_state_dict(checkpoint['state_dict_s']) # freezing net_s for param in net_s.parameters(): param.requires_grad = False net_d = DecoderNet(1) criterion_d = nn.MSELoss() # setting up optimizer - the bias params should have twice the learning rate w.r.t. weights params bias_params = filter(lambda x: ('bias' in x[0]), net_z.named_parameters()) bias_params = list(map(lambda x: x[1], bias_params)) nonbias_params = filter(lambda x: ('bias' not in x[0]), net_z.named_parameters()) nonbias_params = list(map(lambda x: x[1], nonbias_params)) criterion_zc = DCCLoss(Z.shape[0], Z.shape[1], Z, size_average=True) optimizer_zc = optim.Adam([{'params': bias_params, 'lr': 2*args.lr}, {'params': nonbias_params}, {'params': criterion_zc.parameters(), 'lr': args.lr}, ], lr=args.lr, betas=(0.99, 0.999)) optimizer_d = torch.optim.Adam(net_d.parameters(), lr=0.001) criterion_rec = DCCWeightedELoss(size_average=True) if use_cuda: net_d.cuda() net_s.cuda() net_z.cuda() criterion_zc = criterion_zc.cuda() criterion_d = criterion_d.cuda() criterion_rec = criterion_rec.cuda() flag = 0 for epoch in range(startepoch, args.nepoch): print('sigma1', _sigma1, epoch) print('sigma2', _sigma2, epoch) print('lambda', _lambda, epoch) if args.tensorboard: log_value('sigma1', _sigma1, epoch) log_value('sigma2', _sigma2, epoch) log_value('lambda', _lambda, epoch) train_step_2(trainloader, net_s, net_z, net_d, optimizer_zc, optimizer_d, criterion_rec, criterion_zc, criterion_d, epoch, use_cuda, _sigma1, _sigma2, _lambda) Z, U, change_in_assign, assignment = test(testloader, net_z, criterion_zc, epoch, use_cuda, _delta, pairs, numeval, flag) if flag: # As long as the change in label assignment < threshold, DCC continues to run. # Note: This condition is always met in the very first epoch after the flag is set. # This false criterion is overwritten by checking for the condition twice. if change_in_assign > stopping_threshold: flag += 1 if((epoch+1) % args.M == 0): _sigma1 = max(_delta1, _sigma1 / 2) _sigma2 = max(_delta2, _sigma2 / 2) if _sigma2 == _delta2 and flag == 0: # Start checking for stopping criterion flag = 1 # Save checkpoint index = (epoch // args.M) * args.M save_checkpoint({'epoch': epoch+1, 'state_dict_s': net_s.state_dict(), 'state_dict_z': net_z.state_dict(), 'state_dict_d': net_d.state_dict(), 'criterion_state_dict_zc': criterion_zc.state_dict(), 'optimizer_zc': optimizer_zc.state_dict(), 'sigma1': _sigma1, 'sigma2': _sigma2, 'lambda': _lambda, 'delta': _delta, 'delta1': _delta1, 'delta2': _delta2, }, index, filename=outputdir) sio.savemat(os.path.join(outputdir, 'features_z'), {'Z': Z, 'U': U, 'gtlabels': labels, 'w': pairs, 'cluster':assignment}) else: raise(ValueError("step not recognized!"))