def run_one_side(model, optimizer, preds_left, pt_batch, way, shot, query, n_cluster, dataset, right_encoder_side): """ encoder_side should be 'v1' or 'v2'. It should match whichever view is 'right' here. """ loss = 0 sampler = CategoriesSampler(preds_left, pt_batch, way, shot + query) train_batches = [[dataset[dataset.trn_idx[idx]] for idx in indices] for indices in sampler] loss += do_pass(train_batches, shot, way, query, (model, optimizer), encoder=right_encoder_side) z_right, _ = transform(dataset, dataset.trn_idx, model, encoder=right_encoder_side) centroids = calc_centroids(z_right, preds_left, n_cluster) kmeans = sklearn.cluster.KMeans(n_clusters=n_cluster, init=centroids, max_iter=10, verbose=0) preds_right = kmeans.fit_predict(z_right) tst_z_right, _ = transform(dataset, dataset.tst_idx, model, encoder=right_encoder_side) tst_preds_right = kmeans.predict(tst_z_right) return loss, preds_right, tst_preds_right
def __init__(self, args): # Set the folder to save the records and checkpoints log_base_dir = './logs/' if not osp.exists(log_base_dir): os.mkdir(log_base_dir) meta_base_dir = osp.join(log_base_dir, 'meta') if not osp.exists(meta_base_dir): os.mkdir(meta_base_dir) save_path1 = '_'.join([args.dataset, args.model_type, 'maml']) save_path2 = ('shot' + str(args.shot) + '_way' + str(args.way) + '_query' + str(args.train_query) + '_lr' + str(args.meta_lr) + '_batch' + str(args.num_batch) + '_maxepoch' + str(args.max_epoch) + '_baselr' + str(args.base_lr) + '_updatestep' + str(args.update_step) + '_' + args.meta_label) args.save_path = meta_base_dir + '/' + save_path1 + '_' + save_path2 ensure_path(args.save_path) self.args = args self.trainset = Dataset('train', self.args, train_aug=True) self.train_sampler = CategoriesSampler( self.trainset.label, self.args.num_batch, self.args.way, self.args.shot + self.args.train_query) #self.train_loader = DataLoader(dataset=self.trainset, batch_sampler=self.train_sampler, num_workers=8, pin_memory=True) self.train_loader = None self.valset = Dataset('val', self.args) self.val_sampler = CategoriesSampler( self.valset.label, self.args.val_batch, self.args.way, self.args.shot + self.args.val_query) #self.val_loader = DataLoader(dataset=self.valset, batch_sampler=self.val_sampler, num_workers=8, pin_memory=True) self.val_loader = None self.model = MetaLearner(self.args).to(self.args.device) ##self.model.encoder.load_state_dict(torch.load(self.args.pre_load_path)) self.model = torch.nn.DataParallel(self.model) print(self.model) self.optimizer = optim.SGD(self.model.parameters(), lr=self.args.meta_lr, momentum=0.9, weight_decay=args.weight_decay) # or adam
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', default='0') parser.add_argument('--load', default='./save/proto-1/max-acc.pth') parser.add_argument('--batch', type=int, default=2000) parser.add_argument('--way', type=int, default=5) parser.add_argument('--shot', type=int, default=1) parser.add_argument('--query', type=int, default=30) parser.add_argument('--folds', type=int, default=2) args = parser.parse_args() pprint(vars(args)) set_gpu(args.gpu) dataset = MiniImageNet('test') sampler = CategoriesSampler(dataset.label, args.batch, args.way, args.folds * args.shot + args.query) loader = DataLoader(dataset, batch_sampler=sampler, num_workers=8, pin_memory=True) model = Convnet().cuda() model.load_state_dict(torch.load(args.load)) model.eval() ave_acc = Averager() s_label = torch.arange(args.train_way).repeat(args.shot).view(args.shot * args.train_way) s_onehot = torch.zeros(s_label.size(0), 20) s_onehot = s_onehot.scatter_(1, s_label.unsqueeze(dim=1), 1).cuda() for i, batch in enumerate(loader, 1): data, _ = [_.cuda() for _ in batch] k = args.way * args.shot
parser.add_argument('--shot', type=int, default=1) parser.add_argument('--query', type=int, default=15) parser.add_argument('--train-way', type=int, default=5) parser.add_argument('--test-way', type=int, default=5) parser.add_argument('--save-path', default='./save/proto-1') parser.add_argument('--gpu', default='0') parser.add_argument('--base_model', default='resnet18') parser.add_argument('--use_CTM', type=int, default=1) args = parser.parse_args() pprint(vars(args)) set_gpu(args.gpu) ensure_path(args.save_path) trainset = MiniImageNet('train') train_sampler = CategoriesSampler(trainset.label, 100, args.train_way, args.shot + args.query) train_loader = DataLoader(dataset=trainset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) valset = MiniImageNet('val') val_sampler = CategoriesSampler(valset.label, 400, args.test_way, args.shot + args.query) val_loader = DataLoader(dataset=valset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) # model = Convnet().cuda() # model = CTM_apadter(model, args).cuda()
def main(argv): # Read arguments passed (opts, args) = parser.parse_args(argv) # Reading config cfg = config(opts.config, debugging=False, additionalText="training_ERM_seen_resnet18") # Use CUDA # os.environ['CUDA_VISIBLE_DEVICES'] = 1 use_cuda = torch.cuda.is_available() # If the manual seed is not yet choosen if cfg.manualSeed == None: cfg.manualSeed = 1 # Set seed for reproducibility for CPU and GPU randomizaton process random.seed(cfg.manualSeed) torch.manual_seed(cfg.manualSeed) if use_cuda: torch.cuda.manual_seed_all(cfg.manualSeed) dataloader_train = None if hasattr(cfg, "train_mode"): # Preprocessing (transformation) instantiation for training groupwise transformation_train = torchvision.transforms.Compose([ transforms.GroupMultiScaleCrop(224, [1, 0.875, 0.75, 0.66]), transforms.GroupRandomHorizontalFlip(is_flow=False), transforms.Stack(), # concatenation of images transforms.ToTorchFormatTensor(), # to torch transforms.GroupNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalization ]) if cfg.algo == "ERM" or cfg.algo == "MTGA": # Loading training Dataset with N segment for TSN EPICdata_train = EPIC( mode=cfg.train_mode, cfg=cfg, transforms=transformation_train, ) # Creating validation dataloader # batch size = 16, num_workers = 8 are best fit for 12 Gb GPU and >= 16 Gb RAM dataloader_train = DataLoader( EPICdata_train, batch_size=cfg.train_batch_size, shuffle=True, num_workers=cfg.num_worker_train, pin_memory=True, ) elif cfg.algo == "IRM": df = pd.read_csv(cfg.anno_path) p_ids = list(set(df["participant_id"].tolist())) dataloader_train = [] for p_id in p_ids: tmp_dataset = EPIC( mode=cfg.train_mode, cfg=cfg, transforms=transformation_train, participant_id=p_id, ) if tmp_dataset.haveData: dataloader_train.append( DataLoader( tmp_dataset, batch_size=cfg.train_batch_size, shuffle=True, num_workers=cfg.num_worker_train, pin_memory=True, )) elif cfg.algo == "FSL": dataloader_train = {} # Loading training Dataset with N segment for TSN EPICdata_train_verb = EPIC(mode=cfg.train_mode, cfg=cfg, transforms=transformation_train) sampler = CategoriesSampler(EPICdata_train_verb.verb_label, 200, cfg.way, cfg.shot + cfg.query) dataloader_train["verb"] = DataLoader( dataset=EPICdata_train_verb, batch_sampler=sampler, num_workers=cfg.num_worker_train, pin_memory=True, ) EPICdata_train_noun = EPIC(mode=cfg.train_mode, cfg=cfg, transforms=transformation_train) sampler = CategoriesSampler(EPICdata_train_noun.noun_label, 200, cfg.way, cfg.shot + cfg.query) dataloader_train["noun"] = DataLoader( dataset=EPICdata_train_noun, batch_sampler=sampler, num_workers=cfg.num_worker_train, pin_memory=True, ) dataloader_val = None if hasattr(cfg, "val_mode") and hasattr(cfg, "train_mode"): # Preprocessing (transformation) instantiation for validation groupwise transformation_val = torchvision.transforms.Compose([ transforms.GroupOverSample( 224, 256), # group sampling from images using multiple crops transforms.Stack(), # concatenation of images transforms.ToTorchFormatTensor(), # to torch transforms.GroupNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalization ]) # Loading validation Dataset with N segment for TSN EPICdata_val = EPIC( mode=cfg.val_mode, cfg=cfg, transforms=transformation_val, ) # Creating validation dataloader dataloader_val = DataLoader( EPICdata_val, batch_size=cfg.val_batch_size, shuffle=False, num_workers=cfg.num_worker_val, pin_memory=True, ) # Loading Models (Resnet50) model = EPICModel(config=cfg) if not cfg.feature_extraction: if hasattr(cfg, "train_mode"): policies = model.get_optim_policies() # for group in policies: # print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format( # group['name'], len(group['params']), group['lr_mult'], group['decay_mult']))) # Optimizer # initial lr = 0.01 # momentum = 0.9 # weight_decay = 5e-4 optimizer = torch.optim.SGD(policies, lr=cfg.lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay) # Loss function (CrossEntropy) if cfg.algo == "IRM": criterion = torch.nn.CrossEntropyLoss(reduction="none") elif cfg.algo == "ERM" or cfg.algo == "MTGA": criterion = torch.nn.CrossEntropyLoss() elif cfg.algo == "FSL": criterion = torch.nn.CrossEntropyLoss() # If multiple GPUs are available (and bridged) # if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # model = torch.nn.DataParallel(model) # Convert model and loss function to GPU if available for faster computation if use_cuda: model = model.cuda() criterion = criterion.cuda() # Loading Trainer experiment = Experiment( cfg=cfg, model=model, loss=criterion, optimizer=optimizer, use_cuda=use_cuda, data_train=dataloader_train, data_val=dataloader_val, debugging=False, ) # Train the model experiment.train() else: # Load Model Checkpoint checkpoint = torch.load(cfg.checkpoint_filename_final) model.load_state_dict(checkpoint["model_state_dict"]) if use_cuda: model = model.cuda() transformation = torchvision.transforms.Compose([ transforms.GroupOverSample( 224, 256), # group sampling from images using multiple crops transforms.Stack(), # concatenation of images transforms.ToTorchFormatTensor(), # to torch transforms.GroupNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalization ]) # Loading Predictor experiment = Experiment(cfg=cfg, model=model, use_cuda=use_cuda, debugging=False) filenames = ["seen.json", "unseen.json"] for filename in filenames: EPICdata = EPIC( mode=cfg.val_mode, cfg=cfg, transforms=transformation, test_mode=filename[:-5], ) data_loader = torch.utils.data.DataLoader(EPICdata, batch_size=8, shuffle=False, num_workers=4, pin_memory=True) experiment.data_val = data_loader experiment.predict(filename) else: # Load Model Checkpoint checkpoint = torch.load(cfg.checkpoint_filename_final) model.load_state_dict(checkpoint["model_state_dict"]) if use_cuda: model = model.cuda() model.eval() transformation = torchvision.transforms.Compose([ transforms.GroupOverSample( 224, 256), # group sampling from images using multiple crops transforms.Stack(), # concatenation of images transforms.ToTorchFormatTensor(), # to torch transforms.GroupNormalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # Normalization ]) # Loading Predictor experiment = Experiment(cfg=cfg, model=model, use_cuda=use_cuda, debugging=False) with torch.no_grad(): modes = ["train-unseen", "val-unseen"] for mode in modes: data = np.empty((1, 2050)) EPICdata = EPIC( mode=mode, cfg=cfg, transforms=transformation, ) data_loader = torch.utils.data.DataLoader(EPICdata, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) for i, sample_batch in enumerate(data_loader): output = experiment.extract_features(sample_batch) verb_ann = sample_batch["verb_id"].data.item() noun_ann = sample_batch["noun_id"].data.item() out = np.append(np.mean(output, 0), verb_ann) out = np.append(out, noun_ann) data = np.concatenate((data, np.expand_dims(out, 0)), 0) np.save(mode, data)
def eval(self): """The function for the meta-eval phase.""" # Load the logs with open(osp.join(self.args.save_path, 'trlog.json'), 'r') as f: trlog = yaml.load(f) # Load meta-test set test_set = Dataset('test', self.args, train_aug=False) sampler = CategoriesSampler(test_set.label, self.args.test_batch, self.args.way, self.args.shot + self.args.val_query) loader = DataLoader(test_set, batch_sampler=sampler, num_workers=self.args.num_work, pin_memory=True) test_data = self.inf_get(loader) # Load model for meta-test phase if self.args.eval_weights is not None: self.model.load_state_dict( torch.load(self.args.eval_weights)['params']) else: self.model.load_state_dict( torch.load(osp.join(self.args.save_path, 'max_acc' + '.pth'))['params']) # Set model to eval mode #self.model.eval() ################################ ????????? ################################################################ # Set accuracy averager ave_acc = Averager() acc_log = [] # Generate labels label_shot = torch.arange(self.args.way).repeat(self.args.shot).to( self.args.device).type(torch.long) label_query = torch.arange(self.args.way).repeat( self.args.train_query).to(self.args.device).type(torch.long) for i in tqdm.tqdm(range(self.args.test_batch // self.args.meta_batch)): data_list = [] label_shot_list = [] for _ in range(self.args.meta_batch): data_list.append(test_data.__next__().to(self.args.device)) label_shot_list.append(label_shot) pass data_list = torch.stack(data_list, dim=0) label_shot_list = torch.stack(label_shot_list, dim=0) out = self.model(data_list, label_shot_list).detach() for inner_id in range(self.args.meta_batch): cur_acc = count_acc(out[inner_id], label_query) acc_log.append(cur_acc) ave_acc.add(cur_acc) pass pass acc_np = np.array(acc_log, dtype=np.float) m, pm = compute_confidence_interval(acc_np) trlog['test_acc'] = [m, pm] cur_test_save_name = 'trlog_test_' + str(self.args.index) + '.json' with open(osp.join(self.args.save_path, cur_test_save_name), 'w') as f: json.dump(trlog, f) print('Val Best Epoch {}, Acc {:.4f}, Test Acc {:.4f}'.format( trlog['max_acc_epoch'], trlog['max_acc'], ave_acc.item())) print('Test Acc {:.4f} + {:.4f}'.format(m, pm))
args = parser.parse_args() pprint(vars(args)) set_gpu(args.gpu) ensure_path(args.save_path) writer = SummaryWriter() # noise_sample = torch.distributions.normal(loc=0, scale=.02) noise = torch.distributions.Normal(loc=0, scale=.02) # trainset = MiniImageNet('train') # train_sampler = CategoriesSampler(trainset.label, 100, # args.train_way, 2*args.shot + args.query) # train_loader = DataLoader(dataset=trainset, batch_sampler=train_sampler, # num_workers=args.num_workers, pin_memory=True) ssdata = SSMiniImageNet() ss_sampler = CategoriesSampler(ssdata.slabel, 100, args.train_way, 2 * args.shot + args.query) ss_loader = DataLoader(dataset=ssdata, batch_sampler=ss_sampler, num_workers=args.num_workers, pin_memory=True) valset = MiniImageNet('val') val_sampler = CategoriesSampler(valset.label, 400, args.test_way, args.shot + args.query) val_loader = DataLoader(dataset=valset, batch_sampler=val_sampler, num_workers=args.num_workers, pin_memory=True) model = Convnet().cuda() if args.load is not 'na':
parser.add_argument('--max-epoch', type=int, default=200) parser.add_argument('--save-epoch', type=int, default=20) parser.add_argument('--shot', type=int, default=1) parser.add_argument('--query', type=int, default=15) parser.add_argument('--train-way', type=int, default=30) parser.add_argument('--test-way', type=int, default=5) parser.add_argument('--save-path', default='./save/proto-1') parser.add_argument('--gpu', default='0') parser.add_argument('--folds', type=int, default=2) args = parser.parse_args() pprint(vars(args)) #set_gpu(args.gpu) ensure_path(args.save_path) testset = MiniImageNet('test') test_sampler = CategoriesSampler(testset.label, args.batch, args.way, args.folds * args.shot + args.query) test_loader = DataLoader(testset, batch_sampler=test_sampler, num_workers=args.num_workers, pin_memory=True) trainset = MiniImageNet('train') train_sampler = CategoriesSampler(trainset.label, 100, args.train_way, args.shot + args.query) train_loader = DataLoader(dataset=trainset, batch_sampler=train_sampler, num_workers=args.num_workers, pin_memory=True) valset = MiniImageNet('val') val_sampler = CategoriesSampler(valset.label, 400, args.test_way, args.shot + args.query) val_loader = DataLoader(dataset=valset, batch_sampler=val_sampler, num_workers=8, pin_memory=True)
default='./save/subspace-5w5sdiscriminative/max-acc.pth') parser.add_argument( '--data-path', default='/scratch1/sim314/flush1/miniimagenet/ctm_images/') parser.add_argument('--gpu', default='0') #parser.add_argument('--subspace-dim', type=int, default=4) parser.add_argument('--lamb', type=float, default=5) args = parser.parse_args() args.subspace_dim = args.shot - 1 pprint(vars(args)) set_gpu(args.gpu) testset = MiniImageNet('test', args.data_path) test_sampler = CategoriesSampler(testset.label, 600, args.test_way, args.shot + args.query) val_loader = DataLoader(dataset=testset, batch_sampler=test_sampler, num_workers=8, pin_memory=True) model = ConvNet().cuda() model.load_state_dict(torch.load(args.save_path)) projection_pro = Subspace_Projection(num_dim=args.subspace_dim) if args.shot == 1: shot_num = 2 args.subspace_dim = 1 else: shot_num = args.shot
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data-path', type=str, default='data/ontonotes/') parser.add_argument('--shot', type=int, default=10) parser.add_argument('--query', type=int, default=15) parser.add_argument('--train-way', type=int, default=10) parser.add_argument('--test_way', type=int, default=2) parser.add_argument('--glove-path', type=str, default='./data/glove.6B.100d.txt') parser.add_argument('--store-glove', action='store_true', help="store all pretrained Glove vectors") parser.add_argument('--save-path', type=str, default='./save/proto-1') parser.add_argument('--test-class', type=str, default='I-PERSON') parser.add_argument('--val-class', type=str, default='I-FAC') args = parser.parse_args() pprint(vars(args)) ensure_path(args.save_path) fname = args.data_path train = read_data(fname + "onto.train.bio") dev = read_data(fname + "onto.dev.bio") test = read_data(fname + "onto.test.bio") id_to_token = [PAD, UNK] token_to_id = {PAD: 0, UNK: 1} id_to_tag = ["O"] tag_to_id = {"O": 0} for tokens, tags in train + dev + test: for token in tokens: token = simplify_token(token) '''for char in token: if char not in char_to_id: char_to_id[char] = len(char_to_id) id_to_char.append(char)''' token = token.lower( ) # use lowercased tokens but original characters if token not in token_to_id: token_to_id[token] = len(token_to_id) id_to_token.append(token) for tag in tags: if tag not in tag_to_id: tag_to_id[tag] = len(tag_to_id) id_to_tag.append(tag) print(tag_to_id) #Fill in the val classes and test classes trainset = SeqDataset(train, token_to_id, id_to_token, tag_to_id, id_to_tag, 'train', args.test_class, args.val_class) print(set(trainset.data_label)) train_sampler = CategoriesSampler(trainset.data_label, trainset.data_sent_id, N_TRAIN_BATCHES, args.train_way, args.shot, args.query) valset = SeqDataset(dev, token_to_id, id_to_token, tag_to_id, id_to_tag, 'dev', args.test_class, args.val_class) val_sampler = CategoriesSampler(valset.data_label, valset.data_sent_id, N_VAL_BATCHES, args.test_way, args.shot, args.query, True) print(set(valset.data_label)) val_batches = [[valset[idx] for idx in indices] for indices, _ in val_sampler] val_counter = [[idx for idx in indices] for _, indices in val_sampler] testset = SeqDataset(test, token_to_id, id_to_token, tag_to_id, id_to_tag, 'test', args.test_class, args.val_class) test_sampler = CategoriesSampler(testset.data_label, testset.data_sent_id, N_TEST_BATCHES, args.test_way, args.shot, args.query, True) test_batches = [[testset[idx] for idx in indices] for indices, _ in test_sampler] test_counter = [[idx for idx in indices] for _, indices in test_sampler] print(set(testset.data_label)) #id_to_token, token_to_id = trainset.id_to_token, trainset.token_to_id pretrained = {} word_emb_size = 0 for line in open(args.glove_path): parts = line.strip().split() word = parts[0] vector = [float(v) for v in parts[1:]] pretrained[word] = vector word_emb_size = len(vector) if args.store_glove and word not in token_to_id: token_to_id[word] = len(token_to_id) id_to_token.append(word) pretrained_list = [] scale = np.sqrt(3.0 / word_emb_size) for word in id_to_token: #apply lower() because all glove vectors are for lowercase words if word.lower() in pretrained: pretrained_list.append(np.array(pretrained[word.lower()])) else: random_vector = np.random.uniform(-scale, scale, [word_emb_size]) pretrained_list.append(random_vector) model = Encoder(pretrained_list, LSTM_HIDDEN, LSTM_LAYER, DROPOUT_RATE) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) trlog = {} trlog['args'] = vars(args) trlog['train_loss'] = [] trlog['train_acc'] = [] trlog['val_acc'] = [] trlog['max_acc'] = 0.0 timer = Timer() expressions = (model, optimizer) test_acc = 0. for epoch in range(1, EPOCHS + 1): model.train() train_batches = [[trainset[idx] for idx in indices] for indices, _ in train_sampler] train_counter = [[idx for idx in indices] for _, indices in train_sampler] #exit() trn_loss, trn_acc = do_pass(train_batches, train_counter, args.shot, args.train_way, args.query, expressions, True, False, args.test_class) trn_loss = trn_loss.item() trn_acc = trn_acc.item() model.eval() _, val_acc = do_pass(val_batches, val_counter, args.shot, args.test_way, args.query, expressions, False, False, id_to_token, id_to_tag, args.test_class) val_acc = val_acc.item() print('epoch {}, train, loss={:.4f} acc={:.4f}, val acc={:.4f}'.format( epoch, trn_loss, trn_acc, val_acc)) if val_acc > trlog['max_acc']: trlog['max_acc'] = val_acc _, tst_acc = do_pass(test_batches, test_counter, args.shot, args.test_way, args.query, expressions, False, True, id_to_token, id_to_tag, args.test_class) test_acc = tst_acc.item() torch.save( { 'pretrained_list': pretrained_list, 'token_to_id': token_to_id, 'model_state_dict': model.state_dict() }, os.path.join(args.save_path, 'best.model')) trlog['train_loss'].append(trn_loss) trlog['train_acc'].append(trn_acc) trlog['val_acc'].append(val_acc) trlog['test_acc'] = test_acc print('Final results for test class\t' + args.test_class + ' \t validation class\t' + args.val_class) print('Final results, val acc={:.4f}, test acc={:.4f}'.format( trlog['max_acc'], test_acc)) torch.save(trlog, os.path.join(args.save_path, 'trlog'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data-path', type=str, default='./data/airlines_processed.csv') parser.add_argument('--glove-path', type=str, default='./data/glove.840B.300d.txt') parser.add_argument('--pre-model', type=str, choices=['ae', 'qt'], default='qt') parser.add_argument('--pre-epoch', type=int, default=0) parser.add_argument('--pt-batch', type=int, default=100) parser.add_argument('--model-path', type=str, help='path of pretrained model to load') parser.add_argument('--way', type=int, default=5) parser.add_argument('--num-epochs', type=int, default=100) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--save-model-path', type=str) parser.add_argument('--view1-col', type=str, default='view1') parser.add_argument('--view2-col', type=str, default='view2') parser.add_argument('--label-col', type=str, default='tag') args = parser.parse_args() np.random.seed(args.seed) print('loading dataset') dataset = Dataset(args.data_path, view1_col=args.view1_col, view2_col=args.view2_col, label_col=args.label_col) n_cluster = len(dataset.id_to_label) - 1 print ("num of class = %d" %n_cluster) if args.model_path is not None: id_to_token, token_to_id, vocab_size, word_emb_size, model = multiview_encoders.load_model(args.model_path) print('loaded model') else: id_to_token, token_to_id, vocab_size, word_emb_size, model = multiview_encoders.create_model_from_embeddings( args.glove_path, dataset.id_to_token, dataset.token_to_id) print('created randomly initialized model') print('vocab_size', vocab_size) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) expressions = (model, optimizer) pre_acc, pre_state = 0., None pretrain_method = { 'ae': pretrain.pretrain_ae, 'qt': pretrain.pretrain_qt, }[args.pre_model] for epoch in range(1, args.pre_epoch + 1): model.train() perm_idx = np.random.permutation(dataset.trn_idx) trn_loss, _ = pretrain_method(dataset, perm_idx, expressions, train=True) model.eval() _, tst_acc = pretrain_method(dataset, dataset.tst_idx, expressions, train=False) if tst_acc > pre_acc: pre_state = copy.deepcopy(model.state_dict()) pre_acc = tst_acc print('{} epoch {}, train_loss={:.4f} test_acc={:.4f}'.format(datetime.datetime.now(), epoch, trn_loss, tst_acc)) if args.save_model_path is not None: save_model_path = f'{args.save_model_path}_pre_e{epoch}.dat' state = { 'model_state': model.state_dict(), 'id_to_token': dataset.id_to_token, 'word_emb_size': word_emb_size } with open(expand(save_model_path), 'wb') as f: torch.save(state, f) print('saved model to ', save_model_path) save_model_path = f'{args.save_model_path}_pre_best_e{epoch}.dat' state = { 'model_state': pre_state, 'id_to_token': dataset.id_to_token, 'word_emb_size': word_emb_size } with open(expand(save_model_path), 'wb') as f: torch.save(state, f) print('saved model to ', save_model_path) if args.pre_epoch > 0: # load best state model.load_state_dict(pre_state) print('loaded best state') # deepcopy pretrained views into v1 and/or view2 { 'ae': pretrain.after_pretrain_ae, 'qt': pretrain.after_pretrain_qt, }[args.pre_model](model) # reinitialiate optimizer optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) expressions = (model, optimizer) print('applied post-pretraining') kmeans = sklearn.cluster.KMeans(n_clusters=n_cluster, max_iter=300, verbose=0, random_state=0) latent_z1s, golds = transform(dataset, dataset.trn_idx, model, encoder='v1') pred1s = kmeans.fit_predict(latent_z1s) lgolds, lpreds = [], [] for g, p in zip(golds, list(pred1s)): if g > 0: lgolds.append(g) lpreds.append(p) prec, rec, f1 = cluster_metrics.calc_prec_rec_f1(gnd_assignments=torch.LongTensor(lgolds).to(device), pred_assignments=torch.LongTensor(lpreds).to(device)) acc = cluster_metrics.calc_ACC(torch.LongTensor(lpreds).to(device), torch.LongTensor(lgolds).to(device)) silhouette, davies_bouldin = sklearn.metrics.silhouette_score(latent_z1s, pred1s, metric='euclidean'), sklearn.metrics.davies_bouldin_score(latent_z1s, pred1s) print('{} pretrain: eval prec={:.4f} rec={:.4f} f1={:.4f} acc={:.4f} sil={:.4f}, db={:.4f}'.format(datetime.datetime.now(), prec, rec, f1, acc, silhouette, davies_bouldin)) perm_idx = dataset.trn_idx pred2s, centroids1, centroids2, pred1s_perm_idx, preds2_perm_idx = None, None, None, None, None for epoch in range(1, args.num_epochs + 1): trn_loss = 0. shot, way, query = 5, args.way, 15 sampler1 = CategoriesSampler(pred1s, args.pt_batch, way, shot+query) train1_batches = [[dataset[perm_idx[idx]] for idx in indices] for indices in sampler1] trn_loss += do_pass(train1_batches, shot, way, query, expressions, encoder='v2') latent_z2s, _ = transform(dataset, perm_idx, model, encoder='v2') centroids2 = calc_centroids(latent_z2s, pred1s, n_cluster) kmeans2 = sklearn.cluster.KMeans(n_clusters=n_cluster, init=centroids2, max_iter=10, verbose=0) pred2s = kmeans2.fit_predict(latent_z2s) pred2s_perm_idx = perm_idx.copy() tst_latent_z2s, _ = transform(dataset, dataset.tst_idx, model, encoder='v2') tst_pred2s = kmeans2.predict(tst_latent_z2s) sampler2 = CategoriesSampler(pred2s, args.pt_batch, way, shot+query) train2_batches = [[dataset[perm_idx[idx]] for idx in indices] for indices in sampler2] trn_loss += do_pass(train2_batches, shot, way, query, expressions, encoder='v1') perm_idx = np.random.permutation(dataset.trn_idx) latent_z1s, golds = transform(dataset, perm_idx, model, encoder='v1') centroids1 = calc_centroids(latent_z1s, pred2s, n_cluster) kmeans1 = sklearn.cluster.KMeans(n_clusters=n_cluster, init=centroids1, max_iter=10, verbose=0) pred1s = kmeans1.fit_predict(latent_z1s) pred1s_perm_idx = perm_idx.copy() tst_latent_z1s, _ = transform(dataset, dataset.tst_idx, model, encoder='v1') tst_pred1s = kmeans1.predict(tst_latent_z1s) f1 = cluster_metrics.calc_f1(gnd_assignments=torch.LongTensor(tst_pred1s).to(device), pred_assignments=torch.LongTensor(tst_pred2s).to(device)) acc = cluster_metrics.calc_ACC(torch.LongTensor(tst_pred2s).to(device), torch.LongTensor(tst_pred1s).to(device)) print('TEST f1={:.4f} acc={:.4f}'.format(f1, acc)) lgolds, lpreds = [], [] for g, p in zip(golds, list(pred1s)): if g > 0: lgolds.append(g) lpreds.append(p) prec, rec, f1 = cluster_metrics.calc_prec_rec_f1(gnd_assignments=torch.LongTensor(lgolds).to(device), pred_assignments=torch.LongTensor(lpreds).to(device)) acc = cluster_metrics.calc_ACC(torch.LongTensor(lpreds).to(device), torch.LongTensor(lgolds).to(device)) silhouette, davies_bouldin = sklearn.metrics.silhouette_score(latent_z1s, pred1s, metric='euclidean'), sklearn.metrics.davies_bouldin_score(latent_z1s, pred1s) print('{} epoch {}, eval prec={:.4f} rec={:.4f} f1={:.4f} acc={:.4f} sil={:.4f}, db={:.4f}'.format( datetime.datetime.now(), epoch, prec, rec, f1, acc, silhouette, davies_bouldin)) if args.save_model_path is not None: pred1s = torch.from_numpy(pred1s) if pred2s is not None: pred2s = torch.from_numpy(pred2s) state = { 'model_state': model.state_dict(), 'id_to_token': dataset.id_to_token, 'word_emb_size': word_emb_size, 'v1_assignments': pred1s, 'v2_assignments': pred2s, 'pred1s_perm_idx': pred1s_perm_idx, 'pred2s_perm_idx': pred2s_perm_idx } with open(expand(args.save_model_path), 'wb') as f: torch.save(state, f) print('saved model to ', args.save_model_path)
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--gpu', default='0') parser.add_argument('--load', default='./save/proto-1/max-acc.pth') parser.add_argument('--batch', type=int, default=2000) parser.add_argument('--test-way', type=int, default=5) parser.add_argument('--train-way', type=int, default=5) parser.add_argument('--shot', type=int, default=1) parser.add_argument('--query', type=int, default=30) args = parser.parse_args() pprint(vars(args)) set_gpu(args.gpu) dataset = MiniImageNet('test') sampler = CategoriesSampler(dataset.label, num_workers=8, pin_memory=True) model = Convnet().cuda() model = CTM_apadter(model, args).cuda() model.load_state_dict(torch.load(args.load)) model.eval() ave_acc = Averager() for i, batch in enumerate(loader, 1): data, labels = [_.cuda() for _ in batch] label = torch.arange(args.test_way).repeat(args.query) label = label.type(torch.cuda.LongTensor) logits = model(data)
parser.add_argument('--gpu', default='0') parser.add_argument('--load', default='./save/proto-1/max-acc.pth') parser.add_argument('--batch', type=int, default=2000) parser.add_argument('--test-way', type=int, default=5) parser.add_argument('--train-way', type=int, default=5) parser.add_argument('--shot', type=int, default=1) parser.add_argument('--query', type=int, default=30) parser.add_argument('--base_model', default='resnet18') parser.add_argument('--use_CTM', type = int, default=1) args = parser.parse_args() pprint(vars(args)) set_gpu(args.gpu) dataset = MiniImageNet('test') sampler = CategoriesSampler(dataset.label, args.batch, args.test_way, args.shot + args.query) loader = DataLoader(dataset, batch_sampler=sampler, num_workers=8, pin_memory=True) # model = Convnet().cuda() # model = CTM_apadter(model, args).cuda() model = create_model(args) model.load_state_dict(torch.load(args.load)) model.eval() ave_acc = Averager() for i, batch in enumerate(loader, 1): data, _ = [_.cuda() for _ in batch] label = torch.arange(args.test_way).repeat(args.query)
def main(args): mean, std = (0.1307,), (0.3081,) if not os.path.exists(args.save_path): os.makedirs(args.save_path) train_trans = T.Compose((T.RandomHorizontalFlip(0.5), T.ToTensor(), T.Normalize(mean=mean, std=std))) test_trans = T.Compose((T.ToTensor(), T.Normalize(mean=mean, std=std))) set_seed_pytorch(args.epoch + args.layer_num * 100) model = Pse_Inv_Lmser(class_num=args.class_num, layer_num=args.layer_num, reflect_num=args.reflect_num, channel=args.channel) if args.dataset == "MNIST": trainset = torchvision.datasets.MNIST(root="./data/MNIST/", train=True, download=True, transform=train_trans) testset = torchvision.datasets.MNIST(root="./data/MNIST/", train=False, download=True, transform=test_trans) elif args.dataset == "F-MNIST": trainset = torchvision.datasets.FashionMNIST(root="./data/F-MNIST/", train=True, download=True, transform=train_trans) testset = torchvision.datasets.FashionMNIST(root="./data/F-MNIST/", train=False, download=True, transform=test_trans) else: raise RuntimeError("Invalid dataset name!") if args.use_small_samples: sampler = CategoriesSampler(trainset.targets, args.num_batch, args.way, args.n_per_batch) train_loader = DataLoader(trainset, batch_sampler=sampler, pin_memory=True) else: train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.bs, shuffle=True) test_loader = torch.utils.data.DataLoader(testset, batch_size=args.bs, shuffle=False) # params = [] model.to(args.device) # for i in range(args.layer_num): # model.fc[i].to(args.device) # model.dec_fc[i].to(args.device) # params.append({"params": model.fc[i].parameters()}) # params.append({"params": model.dec_fc[i].parameters()}) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) train_list = [] test_list = [] log_lr_st = math.log10(args.lr) lr_epoch = torch.logspace(log_lr_st, log_lr_st - 1, steps=args.epoch) for epoch in range(args.epoch): print(f"=========epoch{epoch}===============") set_seed_pytorch(args.epoch + args.layer_num * 10 + epoch * 100 + args.reflect_num * 1000) for param_group in optimizer.param_groups: param_group['lr'] = lr_epoch[epoch] train_loss = train(args, model, train_loader, optimizer) model.eval() set_seed_pytorch(args.epoch + args.layer_num * 20 + epoch * 200 + args.reflect_num * 2000) test_loss = test(args, model, test_loader) model.train() train_list.append(train_loss) test_list.append(test_loss) draw(args, train_list, test_list, args.save_path) if epoch % 10 == 0 or epoch == args.epoch - 1: torch.save(model, args.save_path + f"model_layer-{args.layer_num}_reflect-{args.reflect_num}_channel-{args.channel}_lr-{args.lr}_epoch-{epoch}.pkl") print("finished training") # weight analysis with torch.no_grad(): print("weight analysis start:") for i in range(args.layer_num): w1 = model.fc[i].weight.to('cpu').numpy() w2 = model.dec_fc[i].weight.to('cpu').numpy() print(f"weight analysis: fc[{i}].weight[] and dec_fc[{i}].weight:") weight_analysis(w1, w2)