def load_static(args): device, n_gpu = setup_device() set_seed_everywhere(args.seed, n_gpu) schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir) grammar = semQL.Grammar() model = IRNet(args, device, grammar) model.to(device) # load the pre-trained parameters model.load_state_dict( torch.load(args.model_to_load, map_location=torch.device('cpu'))) model.eval() print("Load pre-trained model from '{}'".format(args.model_to_load)) nlp = English() tokenizer = nlp.Defaults.create_tokenizer(nlp) with open(os.path.join(args.conceptNet, 'english_RelatedTo.pkl'), 'rb') as f: related_to_concept = pickle.load(f) with open(os.path.join(args.conceptNet, 'english_IsA.pkl'), 'rb') as f: is_a_concept = pickle.load(f) return args, grammar, model, nlp, tokenizer, related_to_concept, is_a_concept, schemas_raw, schemas_dict
def _remove_spaces(sentence): s = sentence.strip().split() s = " ".join(s) return s def _find_nums(question): nums = re.findall('\d*\.?\d+', question) return nums if __name__ == '__main__': args = read_arguments_manual_inference() device, n_gpu = setup_device() set_seed_everywhere(args.seed, n_gpu) schemas_raw, schemas_dict = spider_utils.load_schema(args.data_dir) grammar = semQL.Grammar() model = IRNet(args, device, grammar) model.to(device) # load the pre-trained parameters model.load_state_dict(torch.load(args.model_to_load)) # to use cpu instead of gpu , uncomment this code # model.load_state_dict(torch.load(args.model_to_load,map_location=torch.device('cpu'))) model.eval() print("Load pre-trained model from '{}'".format(args.model_to_load))
def train(wandb_track, experiment_name, epochs, task, gpu_num=0, pretrained='', margin=0.4, losstype='deepcca'): """Train joint embedding networks.""" epochs = int(epochs) gpu_num = int(gpu_num) margin = float(margin) # Setup the results and device. results_dir = setup_dirs(experiment_name) if not os.path.exists(results_dir + 'train_results/'): os.makedirs(results_dir + 'train_results/') train_results_dir = results_dir + 'train_results/' device = setup_device(gpu_num) #### Hyperparameters ##### #Initialize wandb if wandb_track == 1: import wandb wandb.init(project=experiment_name) config = wandb.config config.epochs = epochs with open(results_dir + 'hyperparams_train.txt', 'w') as f: f.write('Command used to run: python ') f.write(' '.join(sys.argv)) f.write('\n') f.write('device in use: ' + str(device)) f.write('\n') f.write('--experiment_name ' + str(experiment_name)) f.write('\n') f.write('--epochs ' + str(epochs)) f.write('\n') # Setup data loaders and models. if task == 'cifar10': train_loader, test_loader = cifar10_loaders() model_A = CIFAREmbeddingNet() model_B = CIFAREmbeddingNet() elif task == 'mnist': train_loader, test_loader = mnist_loaders() model_A = MNISTEmbeddingNet() model_B = MNISTEmbeddingNet() elif task == 'uw': uw_data = 'bert' train_loader, test_loader = uw_loaders(uw_data) if uw_data == 'bert': model_A = RowNet(3072, embed_dim=1024) # Language. model_B = RowNet(4096, embed_dim=1024) # Vision. # Finish model setup. if pretrained == 'pretrained': # If we want to load pretrained models to continue training. print('Starting from pretrained networks.') model_A.load_state_dict( torch.load(train_results_dir + 'model_A_state.pt')) model_B.load_state_dict( torch.load(train_results_dir + 'model_B_state.pt')) print('Starting from scratch to train networks.') model_A.to(device) model_B.to(device) # Initialize the optimizers and loss function. optimizer_A = torch.optim.Adam(model_A.parameters(), lr=0.00001) optimizer_B = torch.optim.Adam(model_B.parameters(), lr=0.00001) # Add learning rate scheduling. def lr_lambda(e): if e < 50: return 0.001 elif e < 100: return 0.0001 else: return 0.00001 scheduler_A = torch.optim.lr_scheduler.LambdaLR(optimizer_A, lr_lambda) scheduler_B = torch.optim.lr_scheduler.LambdaLR(optimizer_B, lr_lambda) # Track batch losses. loss_hist = [] # Put models into training mode. model_A.train() model_B.train() # Train. # wandb if wandb_track == 1: wandb.watch(model_A, log="all") wandb.watch(model_B, log="all") epoch_list = [] # in order to save epoch in a pickle file loss_list = [] # in order to save loss in a pickle file for epoch in tqdm(range(epochs)): epoch_loss = 0.0 counter = 0 for data in train_loader: data_a = data[0].to(device) data_b = data[1].to(device) #label = data[2] # Zero the parameter gradients. optimizer_A.zero_grad() optimizer_B.zero_grad() # Forward. if losstype == 'deepcca': # Based on Galen Andrew's Deep CCA # data_a is from domain A, and data_b is the paired data from domain B. embedding_a = model_A(data_a) embedding_b = model_B(data_b) loss = deepcca(embedding_a, embedding_b, device, use_all_singular_values=True, outdim_size=128) # Backward. loss.backward() # Update. optimizer_A.step() optimizer_B.step() # Save batch loss. Since we are minimizing -corr the loss is negative. loss_hist.append(-1 * loss.item()) epoch_loss += embedding_a.shape[0] * loss.item() #reporting progress counter += 1 if counter % 64 == 0: print('epoch:', epoch, 'loss:', loss.item()) if wandb_track == 1: wandb.log({"epoch": epoch, "loss": loss}) # Save network state at each epoch. torch.save(model_A.state_dict(), train_results_dir + 'model_A_state.pt') torch.save(model_B.state_dict(), train_results_dir + 'model_B_state.pt') #since the batch size is 1 therefore: len(trainloader)==counter print('*********** epoch is finished ***********') epoch_loss = -1 * epoch_loss print('epoch: ', epoch, 'loss(correlation): ', (epoch_loss) / counter) epoch_list.append(epoch + 1) loss_list.append(epoch_loss / counter) pickle.dump(([epoch_list, loss_list]), open(train_results_dir + 'epoch_loss.pkl', "wb")) Visualize(train_results_dir + 'epoch_loss.pkl', 'Correlation History', True, 'epoch', 'Correlation (log scale)', None, 'log', None, (14, 7), train_results_dir + 'Figures/') # Update learning rate schedulers. scheduler_A.step() scheduler_B.step() # Plot and save batch loss history. pickle.dump(([loss_hist[::10]]), open(train_results_dir + 'epoch_corr.pkl', "wb")) Visualize(train_results_dir + 'epoch_corr.pkl', 'Correlation Batch', False, 'Batch', 'Correlation (log scale)', None, 'log', None, (14, 7), train_results_dir + 'Figures/') #### Learn the transformations for CCA #### if losstype == "CCA": a_base = [] b_base = [] no_model = True if no_model: # without using model: using raw data without featurization for data in train_loader: x = data[0].to(device) y = data[1].to(device) if task == 'uw': a_base.append(x) b_base.append(y) else: a_base.append(x.cpu().detach().numpy()) b_base.append(y.cpu().detach().numpy()) else: import torchvision.models as models #Either use these models, or use trained models with triplet loss res18_model = models.resnet18(pretrained=True) #changing the first layer of ResNet to accept images with 1 channgel instead of 3. res18_model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) # Select the desired layers model_A = torch.nn.Sequential(*list(res18_model.children())[:-2]) model_B = torch.nn.Sequential(*list(res18_model.children())[:-2]) model_A.eval() model_B.eval() for data in train_loader: x = data[0].to(device) # Domain A y = data[1].to(device) # Domain B a_base.append(model_A(x).cpu().detach().numpy()) b_base.append(model_B(y).cpu().detach().numpy()) # Concatenate predictions. a_base = np.concatenate(a_base, axis=0) b_base = np.concatenate(b_base, axis=0) a_base = np.squeeze(a_base) b_base = np.squeeze(b_base) if no_model: new_a_base = [] new_b_base = [] for i in range(len(a_base)): new_a_base.append(a_base[i, :, :].flatten()) new_b_base.append(b_base[i, :, :].flatten()) new_a_base = np.asarray(new_a_base) new_b_base = np.asarray(new_b_base) a_base = new_a_base b_base = new_b_base print('Finished reshaping data, the shape is:', new_a_base.shape) from sklearn.cross_decomposition import CCA from joblib import dump components = 128 cca = CCA(n_components=components) cca.max_iter = 5000 cca.fit(a_base, b_base) dump(cca, 'Learned_CCA.joblib') #### End of CCA fit to find the transformations #### print('Training Done!')
def test(experiment_name, task, gpu_num=0, pretrained='', margin=0.4, losstype='deepcca'): cosined = False embed_dim = 1024 gpu_num = int(gpu_num) margin = float(margin) # Setup the results and device. results_dir = setup_dirs(experiment_name) if not os.path.exists(results_dir + 'test_results/'): os.makedirs(results_dir + 'test_results/') test_results_dir = results_dir + 'test_results/' device = setup_device(gpu_num) #### Hyperparameters ##### #Initialize wandb #import wandb #wandb.init(project=experiment_name) #config = wandb.config with open(results_dir + 'hyperparams_test.txt', 'w') as f: f.write('Command used to run: python ') f.write(' '.join(sys.argv)) f.write('\n') f.write('device in use: ' + str(device)) f.write('\n') f.write('--experiment_name ' + str(experiment_name)) f.write('\n') # Setup data loaders and models based on task. if task == 'cifar10': train_loader, test_loader = cifar10_loaders() model_A = CIFAREmbeddingNet() model_B = CIFAREmbeddingNet() elif task == 'mnist': train_loader, test_loader = mnist_loaders() model_A = MNISTEmbeddingNet() model_B = MNISTEmbeddingNet() elif task == 'uw': uw_data = 'bert' train_loader, test_loader = uw_loaders(uw_data) if uw_data == 'bert': model_A = RowNet(3072, embed_dim=1024) # Language. model_B = RowNet(4096, embed_dim=1024) # Vision. # Finish model setup. model_A.load_state_dict( torch.load(results_dir + 'train_results/model_A_state.pt')) model_B.load_state_dict( torch.load(results_dir + 'train_results/model_B_state.pt')) model_A.to(device) model_B.to(device) # Put models into evaluation mode. model_A.eval() model_B.eval() """For UW data.""" ## we use train data to calculate the threshhold for distance. a_train = [] b_train = [] # loading saved embeddings to be faster a_train = load_embeddings(test_results_dir + 'lang_embeds_train.npy') b_train = load_embeddings(test_results_dir + 'img_embeds_train.npy') # Iterate through the train data. if a_train is None or b_train is None: a_train = [] b_train = [] print( "Computing embeddings for train data to calculate threshhold for distance" ) for data in train_loader: anchor_data = data[0].to(device) positive_data = data[1].to(device) label = data[2] a_train.append( model_A(anchor_data.to(device)).cpu().detach().numpy()) b_train.append( model_B(positive_data.to(device)).cpu().detach().numpy()) print("Finished Computing embeddings for train data") #saving embeddings if not already saved save_embeddings(test_results_dir + 'lang_embeds_train.npy', a_train) save_embeddings(test_results_dir + 'img_embeds_train.npy', b_train) a_train = np.concatenate(a_train, axis=0) b_train = np.concatenate(b_train, axis=0) # Test data # For accumulating predictions to check embedding visually using test set. # a is embeddings from domain A, b is embeddings from domain B, ys is their labels a = [] b = [] ys = [] instance_data = [] # loading saved embeddings to be faster a = load_embeddings(test_results_dir + 'lang_embeds.npy') b = load_embeddings(test_results_dir + 'img_embeds.npy') if a is None or b is None: compute_test_embeddings = True a = [] b = [] # Iterate through the test data. print("computing embeddings for test data") for data in test_loader: language_data, vision_data, object_name, instance_name = data language_data = language_data.to(device) vision_data = vision_data.to(device) instance_data.extend(instance_name) if compute_test_embeddings: a.append( model_A(language_data).cpu().detach().numpy()) # Language. b.append(model_B(vision_data).cpu().detach().numpy()) # Vision. ys.extend(object_name) print("finished computing embeddings for test data") # Convert string labels to ints. labelencoder = LabelEncoder() labelencoder.fit(ys) ys = labelencoder.transform(ys) #saving embeddings if not already saved save_embeddings(test_results_dir + 'lang_embeds.npy', a) save_embeddings(test_results_dir + 'img_embeds.npy', b) # Concatenate predictions. a = np.concatenate(a, axis=0) b = np.concatenate(b, axis=0) ab = np.concatenate((a, b), axis=0) ground_truth, predicted, distance = object_identification_task_classifier( a, b, ys, a_train, b_train, lamb_std=1, cosine=cosined) #### Retrieval task by giving an image and finding the closest word descriptions #### ground_truth_word, predicted_word, distance_word = object_identification_task_classifier( b, a, ys, b_train, a_train, lamb_std=1, cosine=cosined) with open('retrieval_non_pro.csv', mode='w') as retrieval_non_pro: csv_file_writer = csv.writer(retrieval_non_pro, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_file_writer.writerow( ['image', 'language', 'predicted', 'ground truth']) for i in range(50): csv_file_writer.writerow([ instance_data[0], instance_data[i], predicted_word[0][i], ground_truth_word[0][i] ]) precisions = [] recalls = [] f1s = [] precisions_pos = [] recalls_pos = [] f1s_pos = [] #print(classification_report(oit_res[i], 1/np.arange(1,len(oit_res[i])+1) > 0.01)) for i in range(len(ground_truth)): p, r, f, s = precision_recall_fscore_support(ground_truth[i], predicted[i], warn_for=(), average='micro') precisions.append(p) recalls.append(r) f1s.append(f) p, r, f, s = precision_recall_fscore_support(ground_truth[i], predicted[i], warn_for=(), average='binary') precisions_pos.append(p) recalls_pos.append(r) f1s_pos.append(f) print('\n ') print(experiment_name + '_' + str(embed_dim)) print('MRR, KNN, Corr, Mean F1, Mean F1 (pos only)') print('%.3g & %.3g & %.3g & %.3g & %.3g' % (mean_reciprocal_rank( a, b, ys, cosine=cosined), knn(a, b, ys, k=5, cosine=cosined), corr_between(a, b, cosine=cosined), np.mean(f1s), np.mean(f1s_pos))) plt.figure(figsize=(14, 7)) for i in range(len(ground_truth)): fpr, tpr, thres = roc_curve(ground_truth[i], [1 - e for e in distance[i]], drop_intermediate=True) plt.plot(fpr, tpr, alpha=0.08, color='r') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.savefig(test_results_dir + '_' + str(embed_dim) + '_ROC.svg') # Pick a pair, plot distance in A vs distance in B. Should be correlated. a_dists = [] b_dists = [] for _ in range(3000): i1 = random.randrange(len(a)) i2 = random.randrange(len(a)) a_dists.append(euclidean(a[i1], a[i2])) b_dists.append(euclidean(b[i1], b[i2])) # a_dists.append(cosine(a[i1], a[i2])) # b_dists.append(cosine(b[i1], b[i2])) # Plot. plt.figure(figsize=(14, 14)) #plt.title('Check Distance Correlation Between Domains') plt.xlim([0, 3]) plt.ylim([0, 3]) # plt.xlim([0,max(a_dists)]) # plt.ylim([0,max(b_dists)]) # plt.xlabel('Distance in Domain A') # plt.ylabel('Distance in Domain B') plt.xlabel('Distance in Language Domain') plt.ylabel('Distance in Vision Domain') #plt.plot(a_dists_norm[0],b_dists_norm[0],'.') #plt.plot(np.arange(0,2)/20,np.arange(0,2)/20,'k-',lw=3) plt.plot(a_dists, b_dists, 'o', alpha=0.5) plt.plot(np.arange(0, 600), np.arange(0, 600), 'k--', lw=3, alpha=0.5) #plt.text(-0.001, -0.01, 'Corr: %.3f'%(pearsonr(a_dists,b_dists)[0]), fontsize=20) plt.savefig(test_results_dir + '_' + str(embed_dim) + '_CORR.svg') # Inspect embedding distances. clas = 5 # Base class. i_clas = [i for i in range(len(ys)) if ys[i].item() == clas] i_clas_2 = np.random.choice(i_clas, len(i_clas), replace=False) clas_ref = 4 # Comparison class. i_clas_ref = [i for i in range(len(ys)) if ys[i].item() == clas_ref] ac = np.array([a[i] for i in i_clas]) bc = np.array([b[i] for i in i_clas]) ac2 = np.array([a[i] for i in i_clas_2]) bc2 = np.array([b[i] for i in i_clas_2]) ac_ref = np.array([a[i] for i in i_clas_ref]) aa_diff_ref = norm(ac[:min(len(ac), len(ac_ref))] - ac_ref[:min(len(ac), len(ac_ref))], ord=2, axis=1) ab_diff = norm(ac - bc2, ord=2, axis=1) aa_diff = norm(ac - ac2, ord=2, axis=1) bb_diff = norm(bc - bc2, ord=2, axis=1) # aa_diff_ref = [cosine(ac[:min(len(ac),len(ac_ref))][i],ac_ref[:min(len(ac),len(ac_ref))][i]) for i in range(len(ac[:min(len(ac),len(ac_ref))]))] # ab_diff = [cosine(ac[i],bc2[i]) for i in range(len(ac))] # aa_diff = [cosine(ac[i],ac2[i]) for i in range(len(ac))] # bb_diff = [cosine(bc[i],bc2[i]) for i in range(len(ac))] bins = np.linspace(0, 0.1, 100) plt.figure(figsize=(14, 7)) plt.hist(ab_diff, bins, alpha=0.5, label='between embeddings') plt.hist(aa_diff, bins, alpha=0.5, label='within embedding A') plt.hist(bb_diff, bins, alpha=0.5, label='within embedding B') plt.hist(aa_diff_ref, bins, alpha=0.5, label='embedding A, from class ' + str(clas_ref)) plt.title('Embedding Distances - Class: ' + str(clas)) plt.xlabel('L2 Distance') plt.ylabel('Count') plt.legend() #labelencoder.classes_ classes_to_keep = [36, 6, 9, 46, 15, 47, 50, 22, 26, 28] print(labelencoder.inverse_transform(classes_to_keep)) ab_norm = [ e for i, e in enumerate(ab) if ys[i % len(ys)] in classes_to_keep ] ys_norm = [e for e in ys if e in classes_to_keep] color_index = {list(set(ys_norm))[i]: i for i in range(len(set(ys_norm)))} #set(ys_norm) markers = ["o", "v", "^", "s", "*", "+", "x", "D", "h", "4"] marker_index = { list(set(ys_norm))[i]: markers[i] for i in range(len(set(ys_norm))) } embedding = umap.UMAP(n_components=2).fit_transform( ab_norm) # metric='cosine' # Plot UMAP embedding of embeddings for all classes. f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10)) mid = len(ys_norm) ax1.set_title('Language UMAP') for e in list(set(ys_norm)): x1 = [ embedding[:mid, 0][i] for i in range(len(ys_norm)) if ys_norm[i] == e ] x2 = [ embedding[:mid, 1][i] for i in range(len(ys_norm)) if ys_norm[i] == e ] ax1.scatter( x1, x2, marker=marker_index[int(e)], alpha=0.5, c=[sns.color_palette("colorblind", 10)[color_index[int(e)]]], label=labelencoder.inverse_transform([int(e)])[0]) ax1.set_xlim([min(embedding[:, 0]) - 4, max(embedding[:, 0]) + 4]) ax1.set_ylim([min(embedding[:, 1]) - 4, max(embedding[:, 1]) + 4]) ax1.grid(True) ax1.legend(loc='upper center', bbox_to_anchor=(1.1, -0.08), fancybox=True, shadow=True, ncol=5) ax2.set_title('Vision UMAP') for e in list(set(ys_norm)): x1 = [ embedding[mid::, 0][i] for i in range(len(ys_norm)) if ys_norm[i] == e ] x2 = [ embedding[mid::, 1][i] for i in range(len(ys_norm)) if ys_norm[i] == e ] ax2.scatter( x1, x2, marker=marker_index[int(e)], alpha=0.5, c=[sns.color_palette("colorblind", 10)[color_index[int(e)]]]) ax2.set_xlim([min(embedding[:, 0]) - 4, max(embedding[:, 0]) + 4]) ax2.set_ylim([min(embedding[:, 1]) - 4, max(embedding[:, 1]) + 4]) ax2.grid(True) plt.savefig(test_results_dir + '_' + str(embed_dim) + '_UMAP_wl.svg', bbox_inches='tight')
def main(): config = get_train_config() # device device, device_ids = setup_device(config.n_gpu) # tensorboard writer = TensorboardWriter(config.summary_dir, config.tensorboard) # metric tracker metric_names = ['loss', 'acc1', 'acc5'] train_metrics = MetricTracker(*[metric for metric in metric_names], writer=writer) valid_metrics = MetricTracker(*[metric for metric in metric_names], writer=writer) # create model print("create model") model = VisionTransformer(image_size=(config.image_size, config.image_size), patch_size=(config.patch_size, config.patch_size), emb_dim=config.emb_dim, mlp_dim=config.mlp_dim, num_heads=config.num_heads, num_layers=config.num_layers, num_classes=config.num_classes, attn_dropout_rate=config.attn_dropout_rate, dropout_rate=config.dropout_rate) # load checkpoint if config.checkpoint_path: state_dict = load_checkpoint(config.checkpoint_path) if config.num_classes != state_dict['classifier.weight'].size(0): del state_dict['classifier.weight'] del state_dict['classifier.bias'] print("re-initialize fc layer") model.load_state_dict(state_dict, strict=False) else: model.load_state_dict(state_dict) print("Load pretrained weights from {}".format(config.checkpoint_path)) # send model to device model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # create dataloader print("create dataloaders") train_dataloader = eval("{}DataLoader".format(config.dataset))( data_dir=os.path.join(config.data_dir, config.dataset), image_size=config.image_size, batch_size=config.batch_size, num_workers=config.num_workers, split='train') valid_dataloader = eval("{}DataLoader".format(config.dataset))( data_dir=os.path.join(config.data_dir, config.dataset), image_size=config.image_size, batch_size=config.batch_size, num_workers=config.num_workers, split='val') # training criterion print("create criterion and optimizer") criterion = nn.CrossEntropyLoss() # create optimizers and learning rate scheduler optimizer = torch.optim.SGD(params=model.parameters(), lr=config.lr, weight_decay=config.wd, momentum=0.9) lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer=optimizer, max_lr=config.lr, pct_start=config.warmup_steps / config.train_steps, total_steps=config.train_steps) # start training print("start training") best_acc = 0.0 epochs = config.train_steps // len(train_dataloader) for epoch in range(1, epochs + 1): log = {'epoch': epoch} # train the model model.train() result = train_epoch(epoch, model, train_dataloader, criterion, optimizer, lr_scheduler, train_metrics, device) log.update(result) # validate the model model.eval() result = valid_epoch(epoch, model, valid_dataloader, criterion, valid_metrics, device) log.update(**{'val_' + k: v for k, v in result.items()}) # best acc best = False if log['val_acc1'] > best_acc: best_acc = log['val_acc1'] best = True # save model save_model(config.checkpoint_dir, epoch, model, optimizer, lr_scheduler, device_ids, best) # print logged informations to the screen for key, value in log.items(): print(' {:15s}: {}'.format(str(key), value))
def main(args): since = time.time() print(args) #setup the directory to save the experiment log and trained models log_dir = utils.setup_savedir(prefix=args.saveprefix, basedir=args.saveroot, args=args, append_args=args.saveargs) #save args utils.save_args(log_dir, args) #setup gpu device = utils.setup_device(args.gpu) #setup dataset and dataloaders dataset_dict = setup_dataset(args) dataloader_dict = setup_dataloader(args, dataset_dict) #setup backbone cnn backbone = setup_backbone(args.backbone, pretrained=args.backbone_pretrained) #setup fewshot classification feature_dim = 64 if args.backbone == "conv4" else 512 num_train_classes = dataset_dict["train"].num_classes classifier = setup_classifier(args.classifier, feature_dim, num_train_classes) #setup data augmentation model mixer = setup_image_mixer(args.mixer, pretrained=args.fusion_pretrained) #setup meta-learning model model = MetaModel(feature=backbone, classifier=classifier, mixer=mixer) #resume model if needed if args.resume is not None: model = utils.resume_model(model, args.resume, state_dict_key="model") #setup loss criterion = torch.nn.CrossEntropyLoss() #setup optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True) if args.resume_optimizer is not None: optimizer = utils.resume_model(optimizer, args.resume_optimizer, state_dict_key="optimizer") lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.steps, gamma=args.step_facter) #main training log = {} log["git"] = utils.check_gitstatus() log["timestamp"] = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") log["train"] = [] log["val"] = [] log_save_path = os.path.join(log_dir, "log.json") utils.save_json(log, log_save_path) valacc = 0 best_val_acc = 0 bestmodel = model for epoch in range(args.epochs): print("epoch: %d --start from 0 and end at %d" % (epoch, args.epochs - 1)) lr_scheduler.step() loss, acc = train_one_epoch(dataloader_dict["train"], model, criterion, optimizer, accuracy=accuracy, device=device, print_freq=args.print_freq, random_seed=epoch) log["train"].append({'epoch': epoch, "loss": loss, "acc": acc}) if epoch % args.eval_freq == 0: valloss, valacc, conf = evaluate(dataloader_dict["val"], model, criterion, accuracy=accuracy, device=device) log["val"].append({ 'epoch': epoch, "loss": valloss, "acc": valacc, "95conf": conf }) #if this is the best model so far, keep it on cpu and save it if valacc > best_val_acc: best_val_acc = valacc bestmodel = deepcopy(model) bestmodel.cpu() save_path = os.path.join(log_dir, "bestmodel.pth") utils.save_checkpoint(save_path, bestmodel, key="model") save_path = os.path.join(log_dir, "bestmodel_optimizer.pth") utils.save_checkpoint(save_path, optimizer, key="optimizer") log["best_epoch"] = epoch log["best_acc"] = best_val_acc utils.save_json(log, log_save_path) #use the best model to evaluate on test set loss, acc, conf = evaluate(dataloader_dict["test"], bestmodel, criterion, accuracy=accuracy, static_augmentations=args.augmentations, device=device) log["test"] = {"loss": loss, "acc": acc, "95conf": conf} time_elapsed = time.time() - since log["time_elapsed"] = time_elapsed #save the final log utils.save_json(log, log_save_path)
im.save(save_path) print("saved", save_path) if __name__ == '__main__': #fix seed for reproducibility np.random.seed(123) torch.manual_seed(123) random.seed(123) args = argparse_setup() max_iter = 500 num_gen = 30 save_gen = 10 truc = 0.4 device = utils.setup_device(args.gpu) savedir = os.path.join(args.saveroot, args.dataset + args.save_suffix) #setup dataset as pandas data frame dataset = getattr(__import__("datasets.%s" % args.dataset), args.dataset) dataset_root = "./data/%s" % args.dataset if args.dataset_root is not None: dataset_root = args.dataset_root df_dict = dataset.setup_df(dataset_root) dataset_df = pd.concat(df_dict.values()).sort_values("path") transform = transforms.Compose([ transforms.Resize(146), transforms.CenterCrop((128, 128)), transforms.ToTensor(), ])
def main(args): since = time.time() print(args) #set seed args.seed = utils.setup_seed(args.seed) utils.make_deterministic(args.seed) #setup the directory to save the experiment log and trained models log_dir = utils.setup_savedir(prefix=args.saveprefix, basedir=args.saveroot, args=args, append_args=args.saveargs) #save args utils.save_args(log_dir, args) #setup device device = utils.setup_device(args.gpu) #setup dataset and dataloaders dataset_dict = setup_dataset(args) dataloader_dict = setup_dataloader(args, dataset_dict) #setup backbone cnn num_classes = dataset_dict["train"].num_classes model = setup_backbone(args.backbone, pretrained=args.backbone_pretrained, num_classes=num_classes) #resume model if needed if args.resume is not None: model = utils.resume_model(model, args.resume, state_dict_key="model") #setup loss criterion = torch.nn.CrossEntropyLoss().to(device) if args.loss_balanced: print("using balanced loss") #if this optin is true, weight the loss inversely proportional to class frequency weight = torch.FloatTensor(dataset_dict["train"].inverse_label_freq) criterion = torch.nn.CrossEntropyLoss(weight=weight).to(device) #setup optimizer if args.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) else: raise NotImplementedError() if args.resume_optimizer is not None: optimizer = utils.resume_model(optimizer, args.resume_optimizer, state_dict_key="optimizer") lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=args.patience, factor=args.step_facter, verbose=True) #main training log = {} log["git"] = utils.check_gitstatus() log["timestamp"] = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") log["train"] = [] log["val"] = [] log["lr"] = [] log_save_path = os.path.join(log_dir, "log.json") utils.save_json(log, log_save_path) valacc = 0 best_val_acc = 0 bestmodel = model for epoch in range(args.epochs): print("epoch: %d --start from 0 and at most end at %d" % (epoch, args.epochs - 1)) loss, acc = train_one_epoch(dataloader_dict["train"], model, criterion, optimizer, accuracy=accuracy, device=device, print_freq=args.print_freq) log["train"].append({'epoch': epoch, "loss": loss, "acc": acc}) valloss, valacc = evaluate(dataloader_dict["val"], model, criterion, accuracy=accuracy, device=device) log["val"].append({'epoch': epoch, "loss": valloss, "acc": valacc}) lr_scheduler.step(valloss) #if this is the best model so far, keep it on cpu and save it if valacc > best_val_acc: best_val_acc = valacc log["best_epoch"] = epoch log["best_acc"] = best_val_acc bestmodel = deepcopy(model) bestmodel.cpu() if args.savemodel: save_path = os.path.join(log_dir, "bestmodel.pth") utils.save_checkpoint(save_path, bestmodel, key="model") save_path = os.path.join(log_dir, "bestmodel_optimizer.pth") utils.save_checkpoint(save_path, optimizer, key="optimizer") utils.save_json(log, log_save_path) max_lr_now = max([group['lr'] for group in optimizer.param_groups]) log["lr"].append(max_lr_now) if max_lr_now < args.lr_min: break #use the best model to evaluate on test set print("test started") loss, acc = evaluate(dataloader_dict["test"], bestmodel, criterion, accuracy=accuracy, device=device) log["test"] = {"loss": loss, "acc": acc} time_elapsed = time.time() - since log["time_elapsed"] = time_elapsed #save the final log utils.save_json(log, log_save_path)
for (dirpath, dirnames, filenames) in os.walk("tests/"): for f in filenames: if not f.endswith(".py"): continue p = dirpath + '/' + f spec = importlib.util.spec_from_file_location('module.name', p) m = importlib.util.module_from_spec(spec) spec.loader.exec_module(m) attrs = set(dir(m)) - set(dir(PerfTest)) for cname in attrs: c = getattr(m, cname) if inspect.isclass(c) and issubclass(c, PerfTest.PerfTest): tests.append(c()) for s in sections: setup_device(config, s) for t in tests: if t.__class__.__name__ in disabled_tests: print("Skipping {}".format(t.__class__.__name__)) continue if len(args.tests) and t.name not in args.tests: continue print("Running {}".format(t.__class__.__name__)) run_test(args, session, config, s, t) if args.testonly: today = datetime.date.today() week_ago = today - datetime.timedelta(days=7) for s in sections: print(f"{s} test results") for t in tests:
def main(): config = get_eval_config() # device device, device_ids = setup_device(config.n_gpu) # create model model = VisionTransformer(image_size=(config.image_size, config.image_size), patch_size=(config.patch_size, config.patch_size), emb_dim=config.emb_dim, mlp_dim=config.mlp_dim, num_heads=config.num_heads, num_layers=config.num_layers, num_classes=config.num_classes, attn_dropout_rate=config.attn_dropout_rate, dropout_rate=config.dropout_rate) # load checkpoint if config.checkpoint_path: state_dict = load_checkpoint(config.checkpoint_path) model.load_state_dict(state_dict) print("Load pretrained weights from {}".format(config.checkpoint_path)) # send model to device model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # create dataloader data_loader = eval("{}DataLoader".format(config.dataset))( data_dir=os.path.join(config.data_dir, config.dataset), image_size=config.image_size, batch_size=config.batch_size, num_workers=config.num_workers, split='val') total_batch = len(data_loader) # starting evaluation print("Starting evaluation") acc1s = [] acc5s = [] model.eval() with torch.no_grad(): pbar = tqdm(enumerate(data_loader), total=total_batch) for batch_idx, (data, target) in pbar: pbar.set_description("Batch {:05d}/{:05d}".format( batch_idx, total_batch)) data = data.to(device) target = target.to(device) pred_logits = model(data) acc1, acc5 = accuracy(pred_logits, target, topk=(1, 5)) acc1s.append(acc1.item()) acc5s.append(acc5.item()) pbar.set_postfix(acc1=acc1.item(), acc5=acc5.item()) print( "Evaluation of model {:s} on dataset {:s}, Acc@1: {:.4f}, Acc@5: {:.4f}" .format(config.model_arch, config.dataset, np.mean(acc1s), np.mean(acc5s)))