def load_model(self, args, config): if args['model_path'] is not None: net_old = Learner.Learner(config) # logger.info("Loading model from path %s", args["model_path"]) self.net = torch.load(args['model_path'] + "/net.model", map_location="cpu") for (n1, old_model), (n2, loaded_model) in zip( net_old.named_parameters(), self.net.named_parameters()): print(n1, n2, old_model.learn, old_model.meta) loaded_model.learn = old_model.learn loaded_model.meta = old_model.meta else: self.net = Learner.Learner(config)
def load_model(args, config): if args['model_path'] is not None: net_old = Learner.Learner(config) # logger.info("Loading model from path %s", args["model_path"]) net = torch.load(args['model_path'], map_location="cpu") for (n1, old_model), (n2, loaded_model) in zip(net_old.named_parameters(), net.named_parameters()): # print(n1, n2, old_model.adaptation, old_model.meta) loaded_model.adaptation = old_model.adaptation loaded_model.meta = old_model.meta net.reset_vars() else: net = Learner.Learner(config) return net
def load_model(self, args, config, context_config): if args['model_path'] is not None and False: pass assert (False) else: self.net = Learner.Learner(config, context_config)
def load_model(self, args, config, context_config, device="cpu"): if args['model_path'] is not None and False: pass assert (False) else: self.net = Learner.Learner(config, context_config, type="representation", device=device)
def __init__(self, args, config): super(MetaLearingClassification, self).__init__() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.update_step = args.update_step self.net = Learner.Learner(config) self.optimizer = optim.Adam(self.net.parameters(), lr=self.meta_lr)
def __init__(self, args, config): """ :param args: """ super(MetaLearnerRegression, self).__init__() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.update_step = args.update_step self.net = Learner.Learner(config) self.optimizer = optim.Adam(self.net.parameters(), lr=self.meta_lr) self.meta_optim = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, [1500, 2500, 3500], 0.1)
def __init__(self, args, config): super(MetaLearnerRegression, self).__init__() self.init_stuff(args) self.net = Learner.Learner(config) #print(self.net.parameters()) #print('hey') #print(self.net.vars) #sys.exit() self.init_opt()
def __init__(self, args, config): """ :param args: """ super(MetaLearnerRegression, self).__init__() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.update_step = args.update_step self.net = Learner.Learner(config) #this is the actual network architecture self.optimizer = optim.Adam(self.net.parameters(), lr=self.meta_lr) #use Adam to optimie OML objetive self.meta_optim = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, [1500, 2500, 3500], 0.3) #decay learning rate based on epoch number
def __init__(self, args, config): print('initializing MLearner!') super(MetaLearingClassification, self).__init__() self.init_stuff(args) self.net = Learner.Learner(config) #print(self.net.parameters()) #print('hey') #print(self.net.vars) #sys.exit() self.init_opt()
def __init__(self, args, config): #print('hey Im starting') super(MetaLearingClassification, self).__init__() self.init_stuff(args) self.net = Learner.Learner(config, args.init_plasticity) #print(self.net.parameters()) #print('hey') #print(self.net.vars) #sys.exit() self.init_opt()
def __init__(self, args, config, treatment): super(MetaLearingClassification, self).__init__() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.update_step = args.update_step if treatment == "Neuromodulation": neuromodulation = True else: neuromodulation = False self.net = Learner.Learner(config, neuromodulation) self.optimizer = optim.Adam(self.net.parameters(), lr=self.meta_lr) self.meta_iteration = 0 self.inputNM = True self.nodeNM = False self.layers_to_fix = []
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) random.seed(args.seed) my_experiment = experiment(args.name, args, "../results/", args.commit) logger = logging.getLogger('experiment') logger.setLevel(logging.INFO) total_clases = [900] keep = list(range(total_clases[0])) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, path=args.data_path, all=True), keep) iterator_sorted = torch.utils.data.DataLoader(utils.iterator_sorter_omni( dataset, False, classes=total_clases), batch_size=128, shuffle=True, num_workers=2) iterator = iterator_sorted print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') maml = torch.load(args.model, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model("na", args.dataset) maml = learner.Learner(config) maml = maml.to(device) reps = [] counter = 0 fig, axes = plt.subplots(9, 4) with torch.no_grad(): for img, target in iterator: print(counter) img = img.to(device) target = target.to(device) # print(target) rep = maml(img, vars=None, bn_training=False, feature=True) rep = rep.view((-1, 32, 72)).detach().cpu().numpy() rep_instance = rep[0] if args.binary: rep_instance = (rep_instance > 0).astype(int) if args.max: rep = rep / np.max(rep) else: rep = (rep > 0).astype(int) if counter < 36: print("Adding plot") axes[int(counter / 4), counter % 4].imshow(rep_instance, cmap=args.color) axes[int(counter / 4), counter % 4].set_yticklabels([]) axes[int(counter / 4), counter % 4].set_xticklabels([]) axes[int(counter / 4), counter % 4].set_aspect('equal') counter += 1 reps.append(rep) plt.subplots_adjust(wspace=0.0, hspace=0.0) plt.savefig(my_experiment.path + "instance_" + str(counter) + ".pdf", format="pdf") plt.clf() rep = np.concatenate(reps) averge_activation = np.mean(rep, 0) plt.imshow(averge_activation, cmap=args.color) plt.colorbar() plt.clim(0, np.max(averge_activation)) plt.savefig(my_experiment.path + "average_activation.pdf", format="pdf") plt.clf() instance_sparsity = np.mean((np.sum(np.sum(rep, 1), 1)) / (64 * 36)) print("Instance sparisty = ", instance_sparsity) my_experiment.results["instance_sparisty"] = str(instance_sparsity) lifetime_sparsity = (np.sum(rep, 0) / len(rep)).flatten() mean_lifetime = np.mean(lifetime_sparsity) print("Lifetime sparsity = ", mean_lifetime) my_experiment.results["lifetime_sparisty"] = str(mean_lifetime) dead_neuros = float(np.sum( (lifetime_sparsity == 0).astype(int))) / len(lifetime_sparsity) print("Dead neurons percentange = ", dead_neuros) my_experiment.results["dead_neuros"] = str(dead_neuros) plt.hist(lifetime_sparsity) plt.savefig(my_experiment.path + "histogram.pdf", format="pdf") my_experiment.store_json()
def main(args): torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') np.random.seed(args.seed) my_experiment = experiment(args.name, args, "../results/") dataset = df.DatasetFactory.get_dataset(args.dataset) if args.dataset == "CIFAR100": args.classes = list(range(50)) if args.dataset == "omniglot": iterator = torch.utils.data.DataLoader(utils.remove_classes_omni( dataset, list(range(963))), batch_size=256, shuffle=True, num_workers=1) else: iterator = torch.utils.data.DataLoader(utils.remove_classes( dataset, args.classes), batch_size=256, shuffle=True, num_workers=1) logger.info(str(args)) config = mf.ModelFactory.get_model("na", args.dataset) maml = learner.Learner(config).to(device) opt = torch.optim.Adam(maml.parameters(), lr=args.lr) for e in range(args.epoch): correct = 0 for img, y in iterator: if e == 20: opt = torch.optim.Adam(maml.parameters(), lr=0.00001) logger.info("Changing LR from %f to %f", 0.0001, 0.00001) img = img.to(device) y = y.to(device) pred = maml(img) feature = F.relu(maml(img, feature=True)) avg_feature = feature.mean(0) beta = args.beta beta_hat = avg_feature loss_rec = ((beta / (beta_hat + 0.0001)) - torch.log(beta / (beta_hat + 0.0001)) - 1) # loss_rec = (beta / (beta_hat) loss_rec = loss_rec * (beta_hat > beta).float() loss_sparse = loss_rec if args.l1: loss_sparse = feature.mean(0) loss_sparse = loss_sparse.mean() opt.zero_grad() loss = F.cross_entropy(pred, y) loss_sparse.backward(retain_graph=True) loss.backward() opt.step() correct += (pred.argmax(1) == y).sum().float() / len(y) logger.info("Accuracy at epoch %d = %s", e, str(correct / len(iterator))) torch.save(maml, my_experiment.path + "model.net")
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) random.seed(args.seed) my_experiment = experiment(args.name, args, "/data5/jlindsey/continual/results", args.commit) writer = SummaryWriter(my_experiment.path + "tensorboard") logger = logging.getLogger('experiment') logger.setLevel(logging.INFO) total_clases = 10 frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) logger.info("Frozen layers = %s", " ".join(frozen_layers)) # final_results_all = [] total_clases = [10, 50, 75, 100, 150, 200] if args.twentyclass: total_clases = [20, 50] if args.fiveclass: total_clases = [5] for tot_class in total_clases: avg_perf = 0.0 lr_list = [0.03] for aoo in range(0, args.runs): keep = np.random.choice(list(range(200)), tot_class, replace=False) print('keep', keep) if args.dataset == "omniglot": dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, background=False), keep) print('lenbefore', len(dataset.data)) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter_omni(dataset, False, classes=total_clases), batch_size=1, shuffle=args.iid, num_workers=2) print("LEN", len(iterator_sorted), len(dataset.data)) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=not args.test, background=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=1) elif args.dataset == "CIFAR100": keep = np.random.choice(list(range(50, 100)), tot_class) dataset = utils.remove_classes(df.DatasetFactory.get_dataset(args.dataset, train=True), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter(dataset, False, classes=tot_class), batch_size=16, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes(df.DatasetFactory.get_dataset(args.dataset, train=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False, num_workers=1) # sampler = ts.MNISTSampler(list(range(0, total_clases)), dataset) # print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') results_mem_size = {} for mem_size in [args.memory]: max_acc = -10 max_lr = -10 for lr in lr_list: print(lr) # for lr in [0.001, 0.0003, 0.0001, 0.00003, 0.00001]: maml = torch.load(args.model, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model("na", args.dataset) maml = learner.Learner(config) # maml = MetaLearingClassification(args, config).to(device).net maml = maml.to(device) for name, param in maml.named_parameters(): param.learn = True for name, param in maml.named_parameters(): # logger.info(name) if name in frozen_layers: # logger.info("Freeezing name %s", str(name)) param.learn = False # logger.info(str(param.requires_grad)) else: if args.reset: w = nn.Parameter(torch.ones_like(param)) # logger.info("W shape = %s", str(len(w.shape))) if len(w.shape) > 1: torch.nn.init.kaiming_normal_(w) else: w = nn.Parameter(torch.zeros_like(param)) param.data = w param.learn = True frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) ''' torch.nn.init.kaiming_normal_(maml.parameters()[-2]) w = nn.Parameter(torch.zeros_like(maml.parameters()[-1])) maml.parameters()[-1].data = w for n, a in maml.named_parameters(): n = n.replace(".", "_") # logger.info("Name = %s", n) if n == "vars_14": w = nn.Parameter(torch.ones_like(a)) # logger.info("W shape = %s", str(w.shape)) torch.nn.init.kaiming_normal_(w) a.data = w if n == "vars_15": w = nn.Parameter(torch.zeros_like(a)) a.data = w ''' correct = 0 for img, target in iterator: with torch.no_grad(): img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info("Pre-epoch accuracy %s", str(correct / len(iterator))) filter_list = ["vars.0", "vars.1", "vars.2", "vars.3", "vars.4", "vars.5"] logger.info("Filter list = %s", ",".join(filter_list)) list_of_names = list( map(lambda x: x[1], list(filter(lambda x: x[0] not in filter_list, maml.named_parameters())))) list_of_params = list(filter(lambda x: x.learn, maml.parameters())) list_of_names = list(filter(lambda x: x[1].learn, maml.named_parameters())) if args.scratch or args.no_freeze: print("Empty filter list") list_of_params = maml.parameters() # for x in list_of_names: logger.info("Unfrozen layer = %s", str(x[0])) opt = torch.optim.Adam(list_of_params, lr=lr) fast_weights = maml.vars if args.randomize_plastic_weights: maml.randomize_plastic_weights() if args.zero_plastic_weights: maml.zero_plastic_weights() for iter in range(0, args.epoch): iter_count = 0 imgs = [] ys = [] for img, y in iterator_sorted: #print(iter_count, y) if iter_count % 15 >= args.shots: iter_count += 1 continue iter_count += 1 img = img.to(device) y = y.to(device) imgs.append(img) ys.append(y) if not args.batch_learning: pred = maml(img, vars=fast_weights) opt.zero_grad() loss = F.cross_entropy(pred, y) grad = torch.autograd.grad(loss, fast_weights) # fast_weights = list(map(lambda p: p[1] - self.update_lr * p[0], zip(grad, self.net.parameters()))) if args.plastic_update: fast_weights = list( map(lambda p: p[1] - p[0] * p[2] if p[1].learn else p[1], zip(grad, fast_weights, maml.vars_plasticity))) else: fast_weights = list( map(lambda p: p[1] - args.update_lr * p[0] if p[1].learn else p[1], zip(grad, fast_weights))) for params_old, params_new in zip(maml.parameters(), fast_weights): params_new.learn = params_old.learn if args.batch_learning: y = torch.cat(ys, 0) img = torch.cat(imgs, 0) pred = maml(img, vars=fast_weights) opt.zero_grad() loss = F.cross_entropy(pred, y) grad = torch.autograd.grad(loss, fast_weights) # fast_weights = list(map(lambda p: p[1] - self.update_lr * p[0], zip(grad, self.net.parameters()))) if args.plastic_update: fast_weights = list( map(lambda p: p[1] - p[0] * p[2] if p[1].learn else p[1], zip(grad, fast_weights, maml.vars_plasticity))) else: fast_weights = list( map(lambda p: p[1] - args.update_lr * p[0] if p[1].learn else p[1], zip(grad, fast_weights))) for params_old, params_new in zip(maml.parameters(), fast_weights): params_new.learn = params_old.learn #loss.backward() #opt.step() logger.info("Result after one epoch for LR = %f", lr) correct = 0 for img, target in iterator: img = img.to(device) target = target.to(device) logits_q = maml(img, vars=fast_weights, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info(str(correct / len(iterator))) if (correct / len(iterator) > max_acc): max_acc = correct / len(iterator) max_lr = lr lr_list = [max_lr] results_mem_size[mem_size] = (max_acc, max_lr) avg_perf += max_acc / args.runs print('avg perf', avg_perf * args.runs / (1+aoo)) logger.info("Final Max Result = %s", str(max_acc)) writer.add_scalar('/finetune/best_' + str(aoo), max_acc, tot_class) final_results_all.append((tot_class, results_mem_size)) print("A= ", results_mem_size) logger.info("Final results = %s", str(results_mem_size)) my_experiment.results["Final Results"] = final_results_all my_experiment.store_json() np.save('evals/final_results_'+args.orig_name+'.npy', final_results_all) print("FINAL RESULTS = ", final_results_all) writer.close()
def main(): p = params.Parser() total_seeds = len(p.parse_known_args()[0].seed) rank = p.parse_known_args()[0].rank all_args = vars(p.parse_known_args()[0]) print("All args = ", all_args) args = utils.get_run(vars(p.parse_known_args()[0]), rank) utils.set_seed(args['seed']) my_experiment = experiment(args['name'], args, "../results/", commit_changes=False, rank=0, seed=1) gpu_to_use = rank % args["gpus"] if torch.cuda.is_available(): device = torch.device('cuda:' + str(gpu_to_use)) logger.info("Using gpu : %s", 'cuda:' + str(gpu_to_use)) else: device = torch.device('cpu') dataset = df.DatasetFactory.get_dataset(args['dataset'], background=True, train=True, path=args["path"], all=True) iterator = torch.utils.data.DataLoader(dataset, batch_size=256, shuffle=True, num_workers=0) logger.info(str(args)) config = mf.ModelFactory.get_model("na", args["dataset"]) maml = learner.Learner(config).to(device) for k, v in maml.named_parameters(): print(k, v.requires_grad) opt = torch.optim.Adam(maml.parameters(), lr=args["lr"]) for e in range(args["epoch"]): correct = 0 for img, y in tqdm(iterator): img = img.to(device) y = y.to(device) pred = maml(img) opt.zero_grad() loss = F.cross_entropy(pred, y.long()) loss.backward() opt.step() correct += (pred.argmax(1) == y).sum().float() / len(y) logger.info("Accuracy at epoch %d = %s", e, str(correct / len(iterator))) torch.save(maml, my_experiment.path + "model.net")
def main(args): torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') np.random.seed(args.seed) my_experiment = experiment(args.name, args, "./results/") args.classes = list(range(64)) # args.traj_classes = list(range(int(64 / 2), 963)) dataset = imgnet.MiniImagenet(args.dataset_path, mode='train') dataset_test = imgnet.MiniImagenet(args.dataset_path, mode='test') # Iterators used for evaluation iterator_test = torch.utils.data.DataLoader(dataset_test, batch_size=5, shuffle=True, num_workers=1) iterator = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=True, num_workers=1) # logger.info(str(args)) config = mf.ModelFactory.get_model("na", args.dataset) maml = learner.Learner(config).to(device) opt = torch.optim.Adam(maml.parameters(), lr=args.lr) for e in range(args.epoch): correct = 0 for img, y in tqdm(iterator): if e == 50: opt = torch.optim.Adam(maml.parameters(), lr=0.00001) logger.info("Changing LR from %f to %f", 0.0001, 0.00001) img = img.to(device) y = y.to(device) pred = maml(img) feature = maml(img, feature=True) loss_rep = torch.abs(feature).sum() opt.zero_grad() loss = F.cross_entropy(pred, y) # loss_rep.backward(retain_graph=True) # logger.info("L1 norm = %s", str(loss_rep.item())) loss.backward() opt.step() correct += (pred.argmax(1) == y).sum().float() / len(y) logger.info("Accuracy at epoch %d = %s", e, str(correct / len(iterator))) # correct = 0 # with torch.no_grad(): # for img, y in tqdm(iterator_test): # # img = img.to(device) # y = y.to(device) # pred = maml(img) # feature = maml(img, feature=True) # loss_rep = torch.abs(feature).sum() # # correct += (pred.argmax(1) == y).sum().float() / len(y) # logger.info("Accuracy Test at epoch %d = %s", e, str(correct / len(iterator_test))) torch.save(maml, my_experiment.path + "baseline_pretraining_imagenet.net")
def main(args): torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') np.random.seed(args.seed) my_experiment = experiment(args.name, args, "../results/") dataset = df.DatasetFactory.get_dataset(args.dataset) dataset_test = df.DatasetFactory.get_dataset(args.dataset, train=False) if args.dataset == "CUB": args.classes = list(range(100)) if args.dataset == "CIFAR100": args.classes = list(range(50)) if args.dataset == "omniglot": iterator = torch.utils.data.DataLoader(utils.remove_classes_omni( dataset, list(range(963))), batch_size=256, shuffle=True, num_workers=1) iterator_test = torch.utils.data.DataLoader(utils.remove_classes_omni( dataset_test, list(range(963))), batch_size=256, shuffle=True, num_workers=1) else: iterator = torch.utils.data.DataLoader(utils.remove_classes( dataset, args.classes), batch_size=12, shuffle=True, num_workers=1) iterator_test = torch.utils.data.DataLoader(utils.remove_classes( dataset_test, args.classes), batch_size=12, shuffle=True, num_workers=1) logger.info(str(args)) config = mf.ModelFactory.get_model("na", args.dataset) maml = learner.Learner(config).to(device) opt = torch.optim.Adam(maml.parameters(), lr=args.lr) for e in range(args.epoch): correct = 0 for img, y in tqdm(iterator): if e == 50: opt = torch.optim.Adam(maml.parameters(), lr=0.00001) logger.info("Changing LR from %f to %f", 0.0001, 0.00001) img = img.to(device) y = y.to(device) pred = maml(img) feature = maml(img, feature=True) loss_rep = torch.abs(feature).sum() opt.zero_grad() loss = F.cross_entropy(pred, y) # loss_rep.backward(retain_graph=True) # logger.info("L1 norm = %s", str(loss_rep.item())) loss.backward() opt.step() correct += (pred.argmax(1) == y).sum().float() / len(y) logger.info("Accuracy at epoch %d = %s", e, str(correct / len(iterator))) correct = 0 with torch.no_grad(): for img, y in tqdm(iterator_test): img = img.to(device) y = y.to(device) pred = maml(img) feature = maml(img, feature=True) loss_rep = torch.abs(feature).sum() correct += (pred.argmax(1) == y).sum().float() / len(y) logger.info("Accuracy Test at epoch %d = %s", e, str(correct / len(iterator_test))) torch.save(maml, my_experiment.path + "model.net")
def load_model(self, args, config, context_config, device="cpu"): self.net = Learner.Learner(config, context_config, device=device)
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) random.seed(args.seed) my_experiment = experiment(args.name, args, "../results/", args.commit) writer = SummaryWriter(my_experiment.path + "tensorboard") logger = logging.getLogger('experiment') logger.setLevel(logging.INFO) total_clases = 10 frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) logger.info("Frozen layers = %s", " ".join(frozen_layers)) #for v in range(6): # frozen_layers.append("vars_bn.{0}".format(v)) final_results_all = [] temp_result = [] total_clases = args.schedule for tot_class in total_clases: lr_list = [ 0.001, 0.0006, 0.0004, 0.00035, 0.0003, 0.00025, 0.0002, 0.00015, 0.0001, 0.00009, 0.00008, 0.00006, 0.00003, 0.00001 ] lr_all = [] for lr_search in range(10): keep = np.random.choice(list(range(650)), tot_class, replace=False) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, background=False, path=args.dataset_path), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter_omni(dataset, False, classes=total_clases), batch_size=1, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=not args.test, background=False, path=args.dataset_path), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1) print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') results_mem_size = {} for mem_size in [args.memory]: max_acc = -10 max_lr = -10 for lr in lr_list: print(lr) maml = torch.load(args.model, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model("OML", args.dataset) maml = learner.Learner(config) # maml = MetaLearingClassification(args, config).to(device).net maml = maml.to(device) for name, param in maml.named_parameters(): param.learn = True for name, param in maml.named_parameters(): # logger.info(name) if name in frozen_layers: param.learn = False else: if args.reset: w = nn.Parameter(torch.ones_like(param)) # logger.info("W shape = %s", str(len(w.shape))) if len(w.shape) > 1: torch.nn.init.kaiming_normal_(w) else: w = nn.Parameter(torch.zeros_like(param)) param.data = w param.learn = True frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) torch.nn.init.kaiming_normal_(maml.parameters()[-2]) w = nn.Parameter(torch.zeros_like(maml.parameters()[-1])) maml.parameters()[-1].data = w if args.neuromodulation: weights2reset = ["vars_26"] biases2reset = ["vars_27"] else: weights2reset = ["vars_14"] biases2reset = ["vars_15"] for n, a in maml.named_parameters(): n = n.replace(".", "_") if n in weights2reset: w = nn.Parameter(torch.ones_like(a)).to(device) torch.nn.init.kaiming_normal_(w) a.data = w if n in biases2reset: w = nn.Parameter(torch.zeros_like(a)).to(device) a.data = w filter_list = ["vars.{0}".format(v) for v in range(6)] logger.info("Filter list = %s", ",".join(filter_list)) list_of_names = list( map( lambda x: x[1], list( filter(lambda x: x[0] not in filter_list, maml.named_parameters())))) list_of_params = list( filter(lambda x: x.learn, maml.parameters())) list_of_names = list( filter(lambda x: x[1].learn, maml.named_parameters())) if args.scratch or args.no_freeze: print("Empty filter list") list_of_params = maml.parameters() for x in list_of_names: logger.info("Unfrozen layer = %s", str(x[0])) opt = torch.optim.Adam(list_of_params, lr=lr) for _ in range(0, args.epoch): for img, y in iterator_sorted: img = img.to(device) y = y.to(device) pred = maml(img) opt.zero_grad() loss = F.cross_entropy(pred, y) loss.backward() opt.step() logger.info("Result after one epoch for LR = %f", lr) correct = 0 for img, target in iterator: img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info(str(correct / len(iterator))) if (correct / len(iterator) > max_acc): max_acc = correct / len(iterator) max_lr = lr lr_all.append(max_lr) results_mem_size[mem_size] = (max_acc, max_lr) logger.info("Final Max Result = %s", str(max_acc)) writer.add_scalar('/finetune/best_' + str(lr_search), max_acc, tot_class) temp_result.append((tot_class, results_mem_size)) print("A= ", results_mem_size) logger.info("Temp Results = %s", str(results_mem_size)) my_experiment.results["Temp Results"] = temp_result my_experiment.store_json() print("LR RESULTS = ", temp_result) from scipy import stats best_lr = float(stats.mode(lr_all)[0][0]) logger.info("BEST LR %s= ", str(best_lr)) for aoo in range(args.runs): keep = np.random.choice(list(range(650)), tot_class, replace=False) if args.dataset == "omniglot": dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, background=False), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter_omni(dataset, False, classes=total_clases), batch_size=1, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=not args.test, background=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1) elif args.dataset == "CIFAR100": keep = np.random.choice(list(range(50, 100)), tot_class) dataset = utils.remove_classes( df.DatasetFactory.get_dataset(args.dataset, train=True), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter(dataset, False, classes=tot_class), batch_size=16, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes( df.DatasetFactory.get_dataset(args.dataset, train=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False, num_workers=1) print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') results_mem_size = {} for mem_size in [args.memory]: max_acc = -10 max_lr = -10 lr = best_lr maml = torch.load(args.model, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model("MRCL", args.dataset) maml = learner.Learner(config) maml = maml.to(device) for name, param in maml.named_parameters(): param.learn = True for name, param in maml.named_parameters(): # logger.info(name) if name in frozen_layers: param.learn = False else: if args.reset: w = nn.Parameter(torch.ones_like(param)) if len(w.shape) > 1: torch.nn.init.kaiming_normal_(w) else: w = nn.Parameter(torch.zeros_like(param)) param.data = w param.learn = True frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) torch.nn.init.kaiming_normal_(maml.parameters()[-2]) w = nn.Parameter(torch.zeros_like(maml.parameters()[-1])) maml.parameters()[-1].data = w for n, a in maml.named_parameters(): n = n.replace(".", "_") if args.neuromodulation: weights2reset = ["vars_26"] biases2reset = ["vars_27"] else: weights2reset = ["vars_14"] biases2reset = ["vars_15"] for n, a in maml.named_parameters(): n = n.replace(".", "_") if n in weights2reset: w = nn.Parameter(torch.ones_like(a)).to(device) torch.nn.init.kaiming_normal_(w) a.data = w if n in biases2reset: w = nn.Parameter(torch.zeros_like(a)).to(device) a.data = w correct = 0 for img, target in iterator: with torch.no_grad(): img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info("Pre-epoch accuracy %s", str(correct / len(iterator))) filter_list = ["vars.{0}".format(v) for v in range(6)] logger.info("Filter list = %s", ",".join(filter_list)) list_of_names = list( map( lambda x: x[1], list( filter(lambda x: x[0] not in filter_list, maml.named_parameters())))) list_of_params = list( filter(lambda x: x.learn, maml.parameters())) list_of_names = list( filter(lambda x: x[1].learn, maml.named_parameters())) if args.scratch or args.no_freeze: print("Empty filter list") list_of_params = maml.parameters() for x in list_of_names: logger.info("Unfrozen layer = %s", str(x[0])) opt = torch.optim.Adam(list_of_params, lr=lr) for _ in range(0, args.epoch): for img, y in iterator_sorted: img = img.to(device) y = y.to(device) pred = maml(img) opt.zero_grad() loss = F.cross_entropy(pred, y) loss.backward() opt.step() logger.info("Result after one epoch for LR = %f", lr) correct = 0 for img, target in iterator: img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info(str(correct / len(iterator))) if (correct / len(iterator) > max_acc): max_acc = correct / len(iterator) max_lr = lr lr_list = [max_lr] results_mem_size[mem_size] = (max_acc, max_lr) logger.info("Final Max Result = %s", str(max_acc)) writer.add_scalar('/finetune/best_' + str(aoo), max_acc, tot_class) final_results_all.append((tot_class, results_mem_size)) print("A= ", results_mem_size) logger.info("Final results = %s", str(results_mem_size)) my_experiment.results["Final Results"] = final_results_all my_experiment.store_json() print("FINAL RESULTS = ", final_results_all) writer.close()
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) random.seed(args.seed) my_experiment = experiment(args.name, args, "../results/", args.commit) writer = SummaryWriter(my_experiment.path + "tensorboard") logger = logging.getLogger('experiment') logger.setLevel(logging.INFO) total_clases = 10 frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) logger.info("Frozen layers = %s", " ".join(frozen_layers)) # final_results_all = [] total_clases = [10, 50, 75, 100, 150, 200] for tot_class in total_clases: lr_list = [0.03, 0.01, 0.003, 0.001, 0.0003, 0.0001, 0.00003, 0.00001] for aoo in range(0, 20): keep = np.random.choice(list(range(200)), tot_class) if args.dataset == "omniglot": dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=True, background=False), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter_omni(dataset, False, classes=total_clases), batch_size=1, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes_omni( df.DatasetFactory.get_dataset("omniglot", train=not args.test, background=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=False, num_workers=1) elif args.dataset == "CIFAR100": keep = np.random.choice(list(range(50, 100)), tot_class) dataset = utils.remove_classes( df.DatasetFactory.get_dataset(args.dataset, train=True), keep) iterator_sorted = torch.utils.data.DataLoader( utils.iterator_sorter(dataset, False, classes=tot_class), batch_size=16, shuffle=args.iid, num_workers=2) dataset = utils.remove_classes( df.DatasetFactory.get_dataset(args.dataset, train=False), keep) iterator = torch.utils.data.DataLoader(dataset, batch_size=128, shuffle=False, num_workers=1) # sampler = ts.MNISTSampler(list(range(0, total_clases)), dataset) # print(args) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') results_mem_size = {} for mem_size in [args.memory]: max_acc = -10 max_lr = -10 for lr in lr_list: print(lr) # for lr in [0.001, 0.0003, 0.0001, 0.00003, 0.00001]: maml = torch.load(args.model, map_location='cpu') if args.scratch: config = mf.ModelFactory.get_model("na", args.dataset) maml = learner.Learner(config) # maml = MetaLearingClassification(args, config).to(device).net maml = maml.to(device) for name, param in maml.named_parameters(): param.learn = True for name, param in maml.named_parameters(): # logger.info(name) if name in frozen_layers: # logger.info("Freeezing name %s", str(name)) param.learn = False # logger.info(str(param.requires_grad)) else: if args.reset: w = nn.Parameter(torch.ones_like(param)) # logger.info("W shape = %s", str(len(w.shape))) if len(w.shape) > 1: torch.nn.init.kaiming_normal_(w) else: w = nn.Parameter(torch.zeros_like(param)) param.data = w param.learn = True frozen_layers = [] for temp in range(args.rln * 2): frozen_layers.append("vars." + str(temp)) torch.nn.init.kaiming_normal_(maml.parameters()[-2]) w = nn.Parameter(torch.zeros_like(maml.parameters()[-1])) maml.parameters()[-1].data = w for n, a in maml.named_parameters(): n = n.replace(".", "_") # logger.info("Name = %s", n) if n == "vars_14": w = nn.Parameter(torch.ones_like(a)) # logger.info("W shape = %s", str(w.shape)) torch.nn.init.kaiming_normal_(w) a.data = w if n == "vars_15": w = nn.Parameter(torch.zeros_like(a)) a.data = w correct = 0 for img, target in iterator: with torch.no_grad(): img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info("Pre-epoch accuracy %s", str(correct / len(iterator))) filter_list = [ "vars.0", "vars.1", "vars.2", "vars.3", "vars.4", "vars.5" ] logger.info("Filter list = %s", ",".join(filter_list)) list_of_names = list( map( lambda x: x[1], list( filter(lambda x: x[0] not in filter_list, maml.named_parameters())))) list_of_params = list( filter(lambda x: x.learn, maml.parameters())) list_of_names = list( filter(lambda x: x[1].learn, maml.named_parameters())) if args.scratch or args.no_freeze: print("Empty filter list") list_of_params = maml.parameters() # for x in list_of_names: logger.info("Unfrozen layer = %s", str(x[0])) opt = torch.optim.Adam(list_of_params, lr=lr) import module.replay as rep res_sampler = rep.ReservoirSampler(mem_size) for _ in range(0, args.epoch): for img, y in iterator_sorted: if mem_size > 0: res_sampler.update_buffer(zip(img, y)) res_sampler.update_observations(len(img)) img = img.to(device) y = y.to(device) img2, y2 = res_sampler.sample_buffer(16) img2 = img2.to(device) y2 = y2.to(device) img = torch.cat([img, img2], dim=0) y = torch.cat([y, y2], dim=0) else: img = img.to(device) y = y.to(device) pred = maml(img) opt.zero_grad() loss = F.cross_entropy(pred, y) loss.backward() opt.step() logger.info("Result after one epoch for LR = %f", lr) correct = 0 for img, target in iterator: img = img.to(device) target = target.to(device) logits_q = maml(img, vars=None, bn_training=False, feature=False) pred_q = (logits_q).argmax(dim=1) correct += torch.eq(pred_q, target).sum().item() / len(img) logger.info(str(correct / len(iterator))) if (correct / len(iterator) > max_acc): max_acc = correct / len(iterator) max_lr = lr lr_list = [max_lr] results_mem_size[mem_size] = (max_acc, max_lr) logger.info("Final Max Result = %s", str(max_acc)) writer.add_scalar('/finetune/best_' + str(aoo), max_acc, tot_class) # quit() final_results_all.append((tot_class, results_mem_size)) print("A= ", results_mem_size) logger.info("Final results = %s", str(results_mem_size)) my_experiment.results["Final Results"] = final_results_all my_experiment.store_json() print("FINAL RESULTS = ", final_results_all) writer.close()
def main(): p = params.Parser() total_seeds = len(p.parse_known_args()[0].seed) _args = p.parse_args() # rank = p.parse_known_args()[0].rank rank = _args.rank # all_args = vars(p.parse_known_args()[0]) print("All args = ", _args) args = utils.get_run(vars(_args), rank) utils.set_seed(args["seed"]) if args["log_root"]: log_root = osp.join("./results", args["log_root"]) + "/" else: log_root = osp.join("./results/") my_experiment = experiment( args["name"], args, log_root, commit_changes=False, rank=0, seed=args["seed"], ) writer = SummaryWriter(my_experiment.path + "tensorboard") gpu_to_use = rank % args["gpus"] if torch.cuda.is_available(): device = torch.device("cuda:" + str(gpu_to_use)) logger.info("Using gpu : %s", "cuda:" + str(gpu_to_use)) else: device = torch.device("cpu") print("Train dataset") dataset = df.DatasetFactory.get_dataset( args["dataset"], background=True, train=True, path=args["path"], all=True, resize=args["resize"], augment=args["augment"], prefetch_gpu=args["prefetch_gpu"], ) print("Val dataset") val_dataset = df.DatasetFactory.get_dataset( args["dataset"], background=True, train=True, path=args["path"], all=True, resize=args["resize"], prefetch_gpu=args["prefetch_gpu"], # augment=args["augment"], ) train_labels = np.arange(664) # class_labels = np.array(dataset.targets) class_labels = np.array(np.asarray(torch.as_tensor(dataset.targets, device="cpu"))) labels_mapping = { tl: (class_labels == tl).astype(int).nonzero()[0] for tl in train_labels } train_indices = [tl[:15] for tl in labels_mapping.values()] val_indices = [tl[15:] for tl in labels_mapping.values()] train_indices = [i for sublist in train_indices for i in sublist] val_indices = [i for sublist in val_indices for i in sublist] # indices = np.zeros_like(class_labels) # for a in train_labels: # indices = indices + (class_labels == a).astype(int) # val_indices = (indices == 0).astype(int) # indices = np.nonzero(indices)[0] trainset = torch.utils.data.Subset(dataset, train_indices) # print(indices) print("Total samples:", len(class_labels)) print("Train samples:", len(train_indices)) print("Val samples:", len(val_indices)) # val_labels = np.arange(664) # class_labels = np.array(dataset.targets) # val_indices = np.zeros_like(class_labels) # for a in train_labels: # val_indices = val_indices + (class_labels != a).astype(int) # val_indices = np.nonzero(val_indices)[0] valset = torch.utils.data.Subset(val_dataset, val_indices) train_iterator = torch.utils.data.DataLoader( trainset, batch_size=64, shuffle=True, num_workers=0, drop_last=True, ) val_iterator = torch.utils.data.DataLoader( valset, batch_size=256, shuffle=True, num_workers=0, drop_last=False, ) logger.info("Args:") logger.info(str(vars(_args))) logger.info(str(args)) config = mf.ModelFactory.get_model("na", args["dataset"], resize=args["resize"]) maml = learner.Learner(config).to(device) for k, v in maml.named_parameters(): print(k, v.requires_grad) # opt = torch.optim.Adam(maml.parameters(), lr=args["lr"]) opt = torch.optim.SGD( maml.parameters(), lr=args["lr"], momentum=0.9, weight_decay=5e-4, ) scheduler = torch.optim.lr_scheduler.MultiStepLR( opt, milestones=_args.schedule, gamma=0.1, ) best_val_acc = 0 # print(learner) # print(learner.eval(False)) histories = { "train": {"acc": [], "loss": [], "step": []}, "val": {"acc": [], "loss": [], "step": []}, } for e in range(args["epoch"]): correct = 0 total_loss = 0.0 maml.train() for img, y in tqdm(train_iterator): img = img.to(device) y = y.to(device) pred = maml(img) opt.zero_grad() loss = F.cross_entropy(pred, y.long()) loss.backward() opt.step() correct += (pred.argmax(1) == y).float().mean() total_loss += loss correct = correct.item() total_loss = total_loss.item() scheduler.step() val_correct = 0 val_total_loss = 0.0 maml.eval() for img, y in tqdm(val_iterator): img = img.to(device) y = y.to(device) with torch.no_grad(): pred = maml(img) opt.zero_grad() loss = F.cross_entropy(pred, y.long()) # loss.backward() # opt.step() val_correct += (pred.argmax(1) == y).sum().float() val_total_loss += loss * y.size(0) val_correct = val_correct.item() val_total_loss = val_total_loss.item() val_acc = val_correct / len(val_indices) val_loss = val_total_loss / len(val_indices) train_correct = correct / len(train_iterator) train_loss = total_loss / len(train_iterator) logger.info("Accuracy at epoch %d = %s", e, str(train_correct)) logger.info("Loss at epoch %d = %s", e, str(train_loss)) logger.info("Val Accuracy at epoch %d = %s", e, str(val_acc)) logger.info("Val Loss at epoch %d = %s", e, str(val_loss)) histories["train"]["acc"].append(train_correct) histories["train"]["loss"].append(train_loss) histories["val"]["acc"].append(val_acc) histories["val"]["loss"].append(val_loss) histories["train"]["step"].append(e + 1) histories["val"]["step"].append(e + 1) writer.add_scalar( "/train/accuracy", train_correct, e + 1, ) writer.add_scalar( "/train/loss", train_loss, e + 1, ) writer.add_scalar( "/val/accuracy", val_acc, e + 1, ) writer.add_scalar( "/train/loss", val_loss, e + 1, ) if val_acc > best_val_acc: best_val_acc = val_acc logger.info(f"\nNew best validation accuracy: {str(best_val_acc)}\n") torch.save(maml, my_experiment.path + "model_best.net") with open(my_experiment.path + "results.json", "w") as f: json.dump(histories, f) torch.save(maml, my_experiment.path + "last_model.net")