print('similarity: ',similarity) return similarity ######################################################################################################################## # Inits print('Inits...') net=network.Net(inputsize,taskcla,nhid=args.nhid,args=args).cuda() utils.print_model_report(net) appr=approach.Appr(net,args=args) print(appr.criterion) utils.print_optimizer_config(appr.optimizer) print('-'*100) check_federated = approach.CheckFederated() similarity = [0] history_mask_back = [] history_mask_pre = [] similarities = [] for t,ncla in taskcla: print('*'*100) print('Task {:2d} ({:s})'.format(t,data[t]['name'])) print('*'*100) if 'mtl' in args.approach:
def train(args): # load settings define_seed(args) load_cuda_settings() load_experiment(args) load_network_and_approach(args) tstart = time.time() # Load print('Load data...') data, taskcla, inputsize = dataloader.get(seed=args.seed) print('Input size =', inputsize, '\nTask info =', taskcla) # Inits print('Inits...') net = network.Net(inputsize, taskcla) if use_cuda: net = net.cuda() params = utils.calculate_parameters(net) print('Num parameters = %s' % (params)) if args.print_stats: utils.print_model_report(net, params) appr = approach.Appr(net, nepochs=args.nepochs, lr=args.lr, args=args, use_cuda=use_cuda) if args.print_stats: utils.print_optimizer_config(appr.optimizer) print('-' * 100) try: # Loop tasks acc = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32) lss = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32) for t, ncla in taskcla: #print('*' * 100) #print('Task {:2d} ({:s})'.format(t, data[t]['name'])) #print('*' * 100) if args.approach == 'joint': # Get data. We do not put it to GPU if t == 0: xtrain = data[t]['train']['x'] ytrain = data[t]['train']['y'] xvalid = data[t]['valid']['x'] yvalid = data[t]['valid']['y'] task_t = t * torch.ones(xtrain.size(0)).int() task_v = t * torch.ones(xvalid.size(0)).int() task = [task_t, task_v] else: xtrain = torch.cat((xtrain, data[t]['train']['x'])) ytrain = torch.cat((ytrain, data[t]['train']['y'])) xvalid = torch.cat((xvalid, data[t]['valid']['x'])) yvalid = torch.cat((yvalid, data[t]['valid']['y'])) task_t = torch.cat( (task_t, t * torch.ones(data[t]['train']['y'].size(0)).int())) task_v = torch.cat( (task_v, t * torch.ones(data[t]['valid']['y'].size(0)).int())) task = [task_t, task_v] else: # Get data xtrain = data[t]['train']['x'] ytrain = data[t]['train']['y'] xvalid = data[t]['valid']['x'] yvalid = data[t]['valid']['y'] if use_cuda: xtrain = xtrain.cuda() ytrain = ytrain.cuda() xvalid = xvalid.cuda() yvalid = yvalid.cuda() task = t # Train appr.train(task, xtrain, ytrain, xvalid, yvalid) #print('-' * 100) # Test for u in range(t + 1): xtest = data[u]['test']['x'] ytest = data[u]['test']['y'] if use_cuda: xtest = xtest.cuda() ytest = ytest.cuda() test_loss, test_acc = appr.eval(u, xtest, ytest) #print('>>> Test on task {:2d} - {:15s}: loss={:.3f}, acc={:5.1f}% <<<'.format(u, data[u]['name'], test_loss, 100 * test_acc)) acc[t, u] = test_acc lss[t, u] = test_loss # Save print('Save at ' + args.output) np.savetxt(args.output, acc, '%.4f') except: traceback.print_exc() return net # Done print('*' * 100) print('Accuracies =') for i in range(acc.shape[0]): print('\t', end='') for j in range(acc.shape[1]): print('{:5.1f}% '.format(100 * acc[i, j]), end='') print() print('*' * 100) print('Done!') print('[Elapsed time = {:.1f} h]'.format( (time.time() - tstart) / (60 * 60))) if hasattr(appr, 'logs'): if appr.logs is not None: # save task names from copy import deepcopy appr.logs['task_name'] = {} appr.logs['test_acc'] = {} appr.logs['test_loss'] = {} for t, ncla in taskcla: appr.logs['task_name'][t] = deepcopy(data[t]['name']) appr.logs['test_acc'][t] = deepcopy(acc[t, :]) appr.logs['test_loss'][t] = deepcopy(lss[t, :]) # pickle import gzip import pickle with gzip.open(os.path.join(appr.logpath), 'wb') as output: pickle.dump(appr.logs, output, pickle.HIGHEST_PROTOCOL) return net
optimizer = torch.optim.SGD(myModel.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.decay) # Initilize the evaluators used to measure the performance of the system. t_classifier = trainer.EvaluatorFactory.get_evaluator("trainedClassifier") # Trainer object used for training myTrainer = trainer.TrainerFactory.get_trainer(myModel, args, optimizer, t_classifier, taskcla) ######################################################################################################################## utils.print_model_report(myModel) utils.print_optimizer_config(optimizer) print('-' * 100) # Loop tasks acc = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32) lss = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32) kwargs = {'num_workers': 8, 'pin_memory': True} for t, ncla in taskcla: print("tasknum:", t) # Add new classes to the train, and test iterator train_loader = train_dataset_loaders[t] test_loader = test_dataset_loaders[t] myTrainer.train(train_loader, test_loader, t)
def main(): args = get_args() ######################################################################################################################### log_name = '{}_{}_{}_{}_lamb_{}_lr_{}_batch_{}_epoch_{}'.format( args.date, args.dataset, args.trainer, args.seed, args.lamb, args.lr, args.batch_size, args.nepochs) if args.output == '': args.output = './result_data/' + log_name + '.txt' ######################################################################################################################## # Seed np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = True device = torch.device("cpu") # torch.backends.cudnn.benchmark = False if not os.path.isdir('dat'): print('Make directory for dataset') os.makedirs('dat') print('Load data...') data_dict = None dataset = data_handler.DatasetFactory.get_dataset(args.dataset) task_info = dataset.task_info print('\nTask info =', task_info) if not os.path.isdir('result_data'): print('Make directory for saving results') os.makedirs('result_data') if not os.path.isdir('trained_model'): print('Make directory for saving trained models') os.makedirs('trained_model') # Args -- Experiment # Loader used for training data shuffle_idx = shuffle(np.arange(dataset.classes), random_state=args.seed) # list of dataloaders: it consists of dataloaders for each task train_dataset_loaders = data_handler.make_ContinualLoaders( dataset.train_data, dataset.train_labels, task_info, transform=dataset.train_transform, shuffle_idx=shuffle_idx, data_dict=data_dict, ) test_dataset_loaders = data_handler.make_ContinualLoaders( dataset.test_data, dataset.test_labels, task_info, transform=dataset.test_transform, shuffle_idx=shuffle_idx, data_dict=data_dict, ) # Get the required model myModel = networks.ModelFactory.get_model(args.dataset, args.trainer, task_info).to(device) # Define the optimizer used in the experiment optimizer = torch.optim.Adam(myModel.parameters(), lr=args.lr, weight_decay=args.decay) # Initilize the evaluators used to measure the performance of the system. t_classifier = trainer.EvaluatorFactory.get_evaluator("trainedClassifier") # Trainer object used for training myTrainer = trainer.TrainerFactory.get_trainer(myModel, args, optimizer, t_classifier, task_info) ######################################################################################################################## utils.print_model_report(myModel) utils.print_optimizer_config(optimizer) print('-' * 100) # Loop tasks acc = np.zeros((len(task_info), len(task_info)), dtype=np.float32) lss = np.zeros((len(task_info), len(task_info)), dtype=np.float32) for t, ncla in task_info: print("tasknum:", t) # Add new classes to the train, and test iterator train_loader = train_dataset_loaders[t] test_loader = test_dataset_loaders[t] myTrainer.train(train_loader, test_loader, t, device) for u in range(t + 1): test_loader = test_dataset_loaders[u] test_iterator = torch.utils.data.DataLoader(test_loader, 100, shuffle=False) test_loss, test_acc = t_classifier.evaluate( myTrainer.model, test_iterator, u, device) print( '>>> Test on task {:2d}: loss={:.3f}, acc={:5.1f}% <<<'.format( u, test_loss, 100 * test_acc)) acc[t, u] = test_acc lss[t, u] = test_loss print('Average accuracy={:5.1f}%'.format(100 * np.mean(acc[t, :t + 1]))) print('Save at ' + args.output) np.savetxt(args.output, acc, '%.4f') torch.save(myModel.state_dict(), './trained_model/' + log_name + '_task_{}.pt'.format(t)) print('*' * 100) print('Accuracies =') for i in range(acc.shape[0]): print('\t', end='') for j in range(acc.shape[1]): print('{:5.1f}% '.format(100 * acc[i, j]), end='') print() print('*' * 100) print('Done!')
def main(seed=0, experiment='', approach='', output='', name='', nepochs=200, lr=0.05, weight_init=None, test_mode=None, log_path=None, **parameters): '''Trains an experiment given the current settings. Args: seed (int): Random seed experiment (str): Name of the experiment to load - choices: ['mnist2','pmnist','cifar','mixture'] approach (str): Approach to take to training the experiment - choices: ['random','sgd','sgd-frozen','lwf','lfl','ewc','imm-mean','progressive','pathnet','imm-mode','sgd-restart','joint','hat','hat-test'] output (str): Path to store the output under name (str): Additional experiment name for grid search nepochs (int): Number of epochs to iterate through lr (float): Learning Rate to apply weight_init (str): String that defines how the weights are initialized - it can be splitted (with `:`) between convolution (first) and Linear (second) layers. Options: ["xavier", "uniform", "normal", "ones", "zeros", "kaiming"] test_mode (int): Defines how many tasks to iterate through log_path (str): Path to store detailed logs parameter (str): Approach dependent parameters ''' # check the output path if output == '': output = '../res/' + experiment + '_' + approach + '_' + str(seed) + ( ("_" + name) if len(name) > 0 else "") + '.txt' print('=' * 100) print('Arguments =') # args = { **parameters, "seed": seed, "experiment": experiment, "approach": approach, "output": output, "nepochs": nepochs, "lr": lr, "weight_init": weight_init } for arg in args: print("\t{:15}: {}".format(arg, args[arg])) print('=' * 100) ######################################################################################################################## # Seed np.random.seed(seed) torch.manual_seed(seed) # check if cuda available if torch.cuda.is_available(): torch.cuda.manual_seed(seed) else: print('[CUDA unavailable]') sys.exit() # Args -- Experiment if experiment == 'mnist2': from dataloaders import mnist2 as dataloader elif experiment == 'pmnist': from dataloaders import pmnist as dataloader elif experiment == 'cifar': from dataloaders import cifar as dataloader elif experiment == 'mixture': from dataloaders import mixture as dataloader # Args -- Approach if approach == 'random': from approaches import random as appr elif approach == 'sgd': from approaches import sgd as appr elif approach == 'sgd-restart': from approaches import sgd_restart as appr elif approach == 'sgd-frozen': from approaches import sgd_frozen as appr elif approach == 'lwf': from approaches import lwf as appr elif approach == 'lfl': from approaches import lfl as appr elif approach == 'ewc': from approaches import ewc as appr elif approach == 'imm-mean': from approaches import imm_mean as appr elif approach == 'imm-mode': from approaches import imm_mode as appr elif approach == 'progressive': from approaches import progressive as appr elif approach == 'pathnet': from approaches import pathnet as appr elif approach == 'hat-test': from approaches import hat_test as approach elif approach == 'hat': from approaches import hat as appr elif approach == 'joint': from approaches import joint as appr elif approach == 'dwa': from approaches import dwa as appr # Args -- Network if experiment in ['mnist2', 'pmnist']: if approach in ['hat', 'hat-test']: from networks import mlp_hat as network elif approach == 'dwa': from networks import mlp_dwa as network else: from networks import mlp as network else: if approach == 'lfl': from networks import alexnet_lfl as network elif approach == 'hat': from networks import alexnet_hat as network elif approach == 'progressive': from networks import alexnet_progressive as network elif approach == 'pathnet': from networks import alexnet_pathnet as network elif approach == 'hat-test': from networks import alexnet_hat_test as network elif approach == 'dwa': from networks import alexnet_dwa as network else: from networks import alexnet as network ######################################################################################################################## # Load print('Load data...') data, taskcla, inputsize = dataloader.get(seed=seed) print('Input size =', inputsize, '\nTask info =', taskcla) # Init the network and put on gpu print('Inits...') # handle input parameters for dwa approaches if approach == "dwa": params = {} for key in parameters: if key in dwa_net_params: params[key] = parameters[key] net = network.Net(inputsize, taskcla, **params).cuda() else: net = network.Net(inputsize, taskcla).cuda() utils.print_model_report(net) # setup network weights if weight_init is not None: # retrieve init data inits = weight_init.split(":") conv_init = inits[0].split(",") conv_bias = conv_init[1] if len(conv_init) > 1 else "zeros" conv_init = conv_init[0] linear_init = inits[-1].split(",") linear_bias = linear_init[1] if len(linear_init) > 1 else "zeros" linear_init = linear_init[0] init_funcs = { "xavier": lambda x: torch.nn.init.xavier_uniform_(x, gain=1.0), "kaiming": lambda x: torch.nn.init.kaiming_normal_( x, nonlinearity="relu", mode='fan_in'), "normal": lambda x: torch.nn.init.normal_(x, mean=0., std=1.), "uniform": lambda x: torch.nn.init.uniform_(x, a=0., b=1.), "ones": lambda x: x.data.fill_(1.), "zeros": lambda x: x.data.fill_(0.) } print( "Init network weights:\n\tlinear weights: {}\n\tlinear bias: {}\n\tconv weights: {}\n\tconv bias: {}" .format(linear_init, linear_bias, conv_init, conv_bias)) # setup init function def init_weights(m): if type(m) == torch.nn.Linear or type(m) == Linear_dwa: init_funcs[linear_init](m.weight) init_funcs[linear_bias](m.bias) if type(m) == torch.nn.Conv2d or type(m) == Conv2d_dwa: init_funcs[conv_init](m.weight) init_funcs[conv_bias](m.bias) # TODO: check for masks # apply to network net.apply(init_weights) # setup the approach params = parameters if approach == 'dwa': params = {} for key in parameters: if key not in dwa_net_params: params[key] = parameters[key] appr = appr.Appr(net, nepochs=nepochs, lr=lr, log_path=log_path, **params) print(appr.criterion) utils.print_optimizer_config(appr.optimizer) print('-' * 100) # Loop tasks acc = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32) lss = np.zeros((len(taskcla), len(taskcla)), dtype=np.float32) i = 0 for t, ncla in taskcla: # check if in test mode and finish after 1 task i += 1 if test_mode is not None and i > test_mode: print("INFO: In Test-Mode - breaking after Task {}".format( test_mode)) break print('*' * 100) print('Task {:2d} ({:s})'.format(t, data[t]['name'])) print('*' * 100) if approach == 'joint': # Get data. We do not put it to GPU if t == 0: xtrain = data[t]['train']['x'] ytrain = data[t]['train']['y'] xvalid = data[t]['valid']['x'] yvalid = data[t]['valid']['y'] task_t = t * torch.ones(xtrain.size(0)).int() task_v = t * torch.ones(xvalid.size(0)).int() task = [task_t, task_v] else: xtrain = torch.cat((xtrain, data[t]['train']['x'])) ytrain = torch.cat((ytrain, data[t]['train']['y'])) xvalid = torch.cat((xvalid, data[t]['valid']['x'])) yvalid = torch.cat((yvalid, data[t]['valid']['y'])) task_t = torch.cat( (task_t, t * torch.ones(data[t]['train']['y'].size(0)).int())) task_v = torch.cat( (task_v, t * torch.ones(data[t]['valid']['y'].size(0)).int())) task = [task_t, task_v] else: # Get data xtrain = data[t]['train']['x'].cuda() ytrain = data[t]['train']['y'].cuda() xvalid = data[t]['valid']['x'].cuda() yvalid = data[t]['valid']['y'].cuda() task = t # Train appr.train(task, xtrain, ytrain, xvalid, yvalid) print('-' * 100) # Free some cache print("INFO: Free cuda cache") torch.cuda.empty_cache() # Test for u in range(t + 1): xtest = data[u]['test']['x'].cuda() ytest = data[u]['test']['y'].cuda() test_loss, test_acc, metric_str = appr.eval(u, xtest, ytest) print( '>>> Test on task {:2d} - {:15s}: loss={:.3f}, acc={:5.1f}%{} <<<' .format(u, data[u]['name'], test_loss, 100 * test_acc, metric_str)) acc[t, u] = test_acc lss[t, u] = test_loss # check for introspection method (and logs enabled) if hasattr(appr, 'introspect') and appr.logs is not None and ( t + 1 >= len(taskcla)): # randomly select from dataset idx = torch.randperm(xtest.size(0)) xrand = xtest[idx[:10]] yrand = ytest[idx[:10]] # compute out = appr.introspect(u, xrand, yrand) # pickle ouptut print('Store task {} analytics'.format(data[u]['name'])) with gzip.open( os.path.join( appr.logpath, os.path.basename(output) + ".task{}_{}.analysis".format(u, data[u]['name'])), 'wb') as intro_file: pickle.dump(out, intro_file, pickle.HIGHEST_PROTOCOL) # check if result directory exists if not os.path.exists(os.path.dirname(output)): print("create output dir") os.makedirs(os.path.dirname(output)) # Save print('Save at {}'.format(output)) np.savetxt(output, acc, '%.4f') # Done print('*' * 100) print('Accuracies =') for i in range(acc.shape[0]): print('\t', end='') for j in range(acc.shape[1]): print('{:5.1f}% '.format(100 * acc[i, j]), end='') print() print('*' * 100) print('Done!') print('[Elapsed time = {:.1f} h]'.format( (time.time() - tstart) / (60 * 60))) # optionally: store logs if hasattr(appr, 'logs'): if appr.logs is not None: #save task names from copy import deepcopy appr.logs['task_name'] = {} appr.logs['test_acc'] = {} appr.logs['test_loss'] = {} for t, ncla in taskcla: appr.logs['task_name'][t] = deepcopy(data[t]['name']) appr.logs['test_acc'][t] = deepcopy(acc[t, :]) appr.logs['test_loss'][t] = deepcopy(lss[t, :]) #pickle with gzip.open( os.path.join(appr.logpath, os.path.basename(output) + "_logs.gzip"), 'wb') as log_file: pickle.dump(appr.logs, log_file, pickle.HIGHEST_PROTOCOL) # store the model (full and light versions) model_file = os.path.join(appr.logpath, os.path.basename(output) + ".model") torch.save(net, model_file) model_file = os.path.join(appr.logpath, os.path.basename(output) + ".weights") torch.save(net.state_dict(), model_file)