def main(): # load the config file config_file = '../../log/' + args.load + '/train_config.json' with open(config_file) as fi: config = json.load(fi) print(" ".join("\033[96m{}\033[0m: {},".format(k, v) for k, v in config.items())) # define data transformation test_transforms = transforms.Compose([ transforms.Resize(size=448), transforms.CenterCrop(size=448), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) # define test dataset and loader test_data = CUB200(root='../../data/cub200', train=False, transform=test_transforms) test_loader = torch.utils.data.DataLoader(test_data, batch_size=config['batch_size'], shuffle=False, num_workers=config['workers'], pin_memory=False, drop_last=False) # load the model in eval mode if config['arch'] == 'resnet101': model = nn.DataParallel( ResNet101(num_classes, num_parts=config['nparts'])).cuda() elif config['arch'] == 'resnet50': model = nn.DataParallel( ResNet50(num_classes, num_parts=config['nparts'])).cuda() else: raise (RuntimeError( "Only support resnet50 or resnet101 for architecture!")) resume = '../../checkpoints/' + args.load + '_best.pth.tar' print("=> loading checkpoint '{}'".format(resume)) checkpoint = torch.load(resume) model.load_state_dict(checkpoint['state_dict'], strict=True) model.eval() # test the model acc = test(test_loader, model) # print the overall best acc print('Testing finished...') print('Best accuracy on test set is: %.4f.' % acc)
def main(args, logger): train_loader, test_loader = load_data(args) if args.dataset == 'CIFAR10': num_classes = 10 elif args.dataset == 'CIFAR100': num_classes = 100 elif args.dataset == 'IMAGENET': num_classes = 1000 print('img_size: {}, num_classes: {}, stem: {}'.format( args.img_size, num_classes, args.stem)) if args.model_name == 'ResNet26': print('Model Name: {0}'.format(args.model_name)) model = ResNet26(num_classes=num_classes, stem=args.stem, dataset=args.dataset) elif args.model_name == 'ResNet38': print('Model Name: {0}'.format(args.model_name)) model = ResNet38(num_classes=num_classes, stem=args.stem, dataset=args.dataset) elif args.model_name == 'ResNet50': print('Model Name: {0}'.format(args.model_name)) model = ResNet50(num_classes=num_classes, stem=args.stem, dataset=args.dataset) if args.pretrained_model: filename = 'best_model_' + str(args.dataset) + '_' + str( args.model_name) + '_' + str(args.stem) + '_ckpt.tar' print('filename :: ', filename) file_path = os.path.join(args.checkpoint_dir, filename) checkpoint = torch.load(file_path) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] model_parameters = checkpoint['parameters'] print('Load model, Parameters: {0}, Start_epoch: {1}, Acc: {2}'.format( model_parameters, start_epoch, best_acc)) logger.info( 'Load model, Parameters: {0}, Start_epoch: {1}, Acc: {2}'.format( model_parameters, start_epoch, best_acc)) else: start_epoch = 1 best_acc = 0.0 if args.cuda: if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model = model.cuda() eval(model, test_loader, args)
def train_keras(): """ Distributed strategy with Keras API """ epochs = 2 strategy = tf.distribute.MirroredStrategy() global_batch_size = strategy.num_replicas_in_sync * 32 train_dataset = create_dataset(global_batch_size) with strategy.scope(): model = ResNet50(input_shape=(224, 224, 3), num_classes=1000) model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.fit(train_dataset, epochs=epochs)
def train_multiway(tag, classes, num_epochs=200, resume=False): device = 'cuda' if torch.cuda.is_available() else 'cpu' directory = f"checkpoints/{tag}" if not os.path.isdir(directory): os.makedirs(directory) # Data print('==> Preparing data...') trainloader, testloader = MultiwaySubDatasets(classes) # Create model, criterion and optimizer print(f'==> Building model...') net = ResNet50(len(classes)) _train(num_epochs, resume, net, device, directory, trainloader, testloader)
def train_custom(): """ Distributed strategy with custom training loop """ epochs = 2 strategy = tf.distribute.MirroredStrategy() global_batch_size = strategy.num_replicas_in_sync * 32 train_dataset = create_dataset(global_batch_size) train_distribute_dataset = strategy.experimental_distribute_dataset( train_dataset) with strategy.scope(): model = ResNet50(input_shape=(224, 224, 3), num_classes=1000) loss_object = tf.keras.losses.CategoricalCrossentropy( reduction=tf.keras.losses.Reduction.NONE) optimizer = tf.keras.optimizers.Adam() def train_step(inputs): images, labels = inputs with tf.GradientTape() as tape: outputs = model(images, training=True) loss = loss_object(labels, outputs) grads = tape.gradient(target=loss, sources=model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return loss @tf.function def distributed_train_epoch(dataset): t0 = tf.timestamp() for one_batch in dataset: per_replica_loss = strategy.experimental_run_v2( train_step, args=(one_batch, )) strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_loss, axis=None) delta_t = tf.strings.as_string((tf.timestamp() - t0) * 1000, precision=1) tf.print(delta_t, 'ms/step') t0 = tf.timestamp() for i in range(0, epochs): distributed_train_epoch(train_distribute_dataset)
def main(): # Parse arguments. parser = argparse.ArgumentParser() parser.add_argument( "-i", dest="image_file_to_predict", type=str, help="Image file to predict", required=True) parser.add_argument( "-m", dest="saved_model_path", type=str, help="Path to saved model", required=True) args = parser.parse_args() # Load image. img = cv2.imread(args.image_file_to_predict, cv2.IMREAD_GRAYSCALE) # Pre-process. img = cv2.resize(img, (28, 28)) transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.1307,), (0.3081,)), ]) input_tensor = transform(img).unsqueeze(0) # Create device. if torch.cuda.is_available(): device_type = "cuda" print("Predict on GPU.") else: device_type = "cpu" print("Predict on CPU.") device = torch.device(device_type) # Load model. model_path = "{}/{}".format(args.saved_model_path, SAVED_MODEL_NAME) model = ResNet50() model.load_state_dict(torch.load(model_path, map_location=device)) model.to(device) # Predict. model.eval() input_tensor = input_tensor.to(device) output = model(input_tensor) pred = output.argmax(1, keepdim=True) print("Predict: {}".format(pred.cpu()[0][0]))
def predict(model_path, batch_size): Y_hat, model_params = ResNet50() X = model_params['input'] saver = tf.train.Saver() test_gen = DataLabTest('./datasets/test_set/').generator() Y = [] with tf.Session() as sess: saver.restore(sess, model_path) for i in range(12500 // batch_size + 1): y = sess.run(Y_hat, feed_dict={X: next(test_gen)}) print(y.shape, end=' ') Y.append(y[:, 1]) print(len(Y)) Y = np.concatenate(Y) print(Y.shape) return Y
def main() -> None: args = parseArguments() #load the dataset class print("loading NegativePeopleDataset.") dataset = NegativePeopleDataset(args["nImgs"], nTestDir=args["sets"]) #create model model = None if args["model"] == "resNet50": model = ResNet50() elif args["model"] == "googleNet": model = GoogleNet() if dataset is not None and model is not None: #create or load the encodings dictionary encodingsFile = ''.join([ args["encOut"], "/", args["model"], "_negativePeople_s", str(args["sets"]), "-n", str(args["nImgs"]), ".pkl" ]) if args["createEncoding"]: print("[INFO] Creating encodings...") encodings = encodeDataset(model, dataset, encodingsFile) else: print("[INFO] Loading encodings...") encodings = pickle.loads(open(encodingsFile, "rb").read()) #pairing people to the space of encodings while True: q = 0 i = input("\nquery number (error will quit):") if i != "": try: q = int(i) except: print("The input is not a number...") break imgBaseName = ''.join( [args["imgOut"], args["model"], "_negativePeople_"]) pairPeople(model, dataset, encodings, q, imgBaseName)
def get_model(): if args.model == 'ResNet18': return ResNet18(p_dropout=args.dropout) elif args.model == 'ResNet34': return ResNet34(p_dropout=args.dropout) elif args.model == 'ResNet50': return ResNet50(p_dropout=args.dropout) elif args.model == 'ResNet101': return ResNet101(p_dropout=args.dropout) elif args.model == 'ResNet152': return ResNet152(p_dropout=args.dropout) elif args.model == 'VGG11': return VGG('VGG11', p_dropout=args.dropout) elif args.model == 'VGG13': return VGG('VGG13', p_dropout=args.dropout) elif args.model == 'VGG16': return VGG('VGG16', p_dropout=args.dropout) elif args.model == 'VGG19': return VGG('VGG19', p_dropout=args.dropout) else: raise 'Model Not found'
def main(): """ Script entrypoint """ t_start = datetime.now() header = ["Start Time", "End Time", "Duration (s)"] row = [t_start.strftime(DEFAULT_DATE_TIME_FORMAT)] dnn = ResNet50() # show class indices print('****************') for cls, idx in dnn.train_batches.class_indices.items(): print('Class #{} = {}'.format(idx, cls)) print('****************') print(dnn.model.summary()) dnn.train(t_start, epochs=dnn.num_epochs, batch_size=dnn.batch_size, training=dnn.train_batches, validation=dnn.valid_batches) # save trained weights dnn.model.save(dnn.file_weights + 'old') dnn.model.save_weights(dnn.file_weights) with open(dnn.file_architecture, 'w') as f: f.write(dnn.model.to_json()) t_end = datetime.now() difference_in_seconds = get_difference_in_seconds(t_start, t_end) row.append(t_end.strftime(DEFAULT_DATE_TIME_FORMAT)) row.append(str(difference_in_seconds)) append_row_to_csv(complete_run_timing_file, header) append_row_to_csv(complete_run_timing_file, row)
def train(): Y_hat, model_params = ResNet50() #Y_hat = tf.sigmoid(Z) X = model_params['input'] Y_true = tf.placeholder(dtype=tf.float32, shape=[None, 2]) Z = model_params['out']['Z'] loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z, labels=Y_true)) train_step = tf.train.AdamOptimizer(1e-3).minimize(loss) saver = tf.train.Saver() with tf.Session() as sess: try: train_gen = DataLabTrain('./datasets/train_set/').generator() sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) ix = 0 for X_true, Y_true_ in train_gen: ix += 1 if ix % 10 == 0: l, _ = sess.run([loss, train_step], feed_dict={X:X_true, Y_true:Y_true_}) #acc = np.mean(y.astype('int32') == Y_true_.astype('int32')) print('epoch: ' + str(ix) + ' loss: ' + str(l)) else: sess.run([train_step], feed_dict={X: X_true, Y_true: Y_true_}) if ix % 500 == 0: path = './models/model' + (str(ix)) os.makedirs(path) saver.save(sess, path + '/model.ckpt') if ix == 5000: break finally: sess.close()
def train_binary(tag, positive_class, negative_classes=None, num_epochs=200, resume=False): print(f"Training {positive_class} classifier...") device = 'cuda' if torch.cuda.is_available() else 'cpu' directory = f"checkpoints/{tag}/{positive_class}" if not os.path.isdir(directory): os.makedirs(directory) # Data print('==> Preparing data...') trainloader, testloader = BinaryDatasets([positive_class], negative_classes) # Create model, criterion and optimizer print(f'==> Building model...') net = ResNet50(2) _train(num_epochs, resume, net, device, directory, trainloader, testloader)
def main(args): train_loader, test_loader = load_data(args) model = ResNet50() optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) if not os.path.isdir('checkpoints'): os.mkdir('checkpoints') if args.checkpoints is not None: checkpoints = torch.load(os.path.join('checkpoints', args.checkpoints)) model.load_state_dict(checkpoints['model_state_dict']) optimizer.load_state_dict(checkpoints['optimizer_state_dict']) start_epoch = checkpoints['global_epoch'] else: start_epoch = 1 if args.cuda: model = model.cuda() if not args.evaluation: criterion = nn.CrossEntropyLoss() lr_scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=0.0001) global_acc = 0. for epoch in range(start_epoch, args.epochs + 1): _train(epoch, train_loader, model, optimizer, criterion, args) best_acc = _eval(epoch, test_loader, model, args) if global_acc < best_acc: global_acc = best_acc save_checkpoint(best_acc, model, optimizer, args, epoch) lr_scheduler.step() print('Current Learning Rate: {}'.format(lr_scheduler.get_last_lr())) else: _eval(start_epoch, test_loader, model, args)
def trainer( train_set: torch.utils.data.Dataset, test_set: torch.utils.data.Dataset, size_dict: Dict[int, int], model: str = "ResNet50", device: torch.device = torch.device("cpu"), batch_size: int = 500, num_epochs: int = 10, learning_rate: float = 0.001, weight_decay: float = 0, dropout: float = 0, ) -> float: """ Get the best test accuracy after training for `num_epochs` epochs. """ # create data-loaders train_loader = DataLoader( dataset=train_set, batch_size=batch_size, drop_last=True, shuffle=True, pin_memory=True, ) test_loader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False, pin_memory=True) # set model, loss function and optimizer num_classes = len(size_dict) if model == "ResNet50": model = ResNet50(num_classes, dropout).to(device) elif model == "ResNet18": model = ResNet18(num_classes, dropout).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay) # here class encoding is necessary since we need the dimension # of one-hot encoding identical to the number of classes class_encoding = { class_id: i for i, (class_id, _) in enumerate(size_dict.items()) } # start training best_acc = 0 for epoch in range(num_epochs): model.train() for images, labels in train_loader: labels = labels.apply_(lambda id: class_encoding[id]) images, labels = images.to(device), labels.to(device) # forward pass pred = model(images) loss = criterion(pred, labels) # backward pass optimizer.zero_grad() loss.backward() optimizer.step() # test after each epoch model.eval() accuracies = [] with torch.no_grad(): for images, labels in test_loader: labels = labels.apply_(lambda id: class_encoding[id]) images, labels = images.to(device), labels.to(device) # forward pass pred = model(images) batch_acc = accuracy(pred, labels) accuracies.append(batch_acc) epoch_acc = sum(accuracies) / len(accuracies) best_acc = max(best_acc, epoch_acc) return float(round(best_acc, 4))
def main(): # load the config file config_file = '../../log/' + args.load + '/train_config.json' with open(config_file) as fi: config = json.load(fi) print(" ".join("\033[96m{}\033[0m: {},".format(k, v) for k, v in config.items())) # define data transformation (no crop) test_transforms = transforms.Compose([ transforms.Resize(size=(256, 256)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) # wrap to dataset #test_data = UBIPr_Identification("pairs_to_explain_identification", split='train', transform=test_transforms) test_data = UBIPr_Verification("pairs_to_explain_verification", split='train', transform=test_transforms) # wrap to dataloader test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=False, drop_last=False) test_loader_iter = iter(test_loader) # define the figure layout fig_rows = 5 fig_cols = 5 f_assign, axarr_assign = plt.subplots(fig_rows, fig_cols, figsize=(fig_cols * 2, fig_rows * 2)) f_assign.subplots_adjust(wspace=0, hspace=0) # load the model in eval mode # with batch size = 1, we only support single GPU visaulization if config['arch'] == 'resnet101': model = ResNet101(num_classes, num_parts=config['nparts']).cuda() elif config['arch'] == 'resnet50': model = ResNet50(num_classes, num_parts=config['nparts']).cuda() else: raise (RuntimeError( "Only support resnet50 or resnet101 for architecture!")) # load model resume = '../../checkpoints/' + args.load + '_best.pth.tar' print("=> loading checkpoint '{}'".format(resume)) checkpoint = torch.load(resume) # remove the module prefix new_state_dict = OrderedDict() for k, v in checkpoint['state_dict'].items(): name = k[7:] # remove `module.` new_state_dict[name] = v model.load_state_dict(new_state_dict, strict=True) model.eval() with torch.no_grad(): # the visualization code current_id = 0 for i in range(100): t0 = time.time() # inference the model img_batch, ground_truth, _ = next(test_loader_iter) input = img_batch.cuda() target = ground_truth.cuda() #image_A = img_batch[0][0].cuda() #image_B = img_batch[1][0].cuda() '''image_A_labels = img_labels[0][0].cuda() image_B_labels = img_labels[1][0].cuda()''' current_id += 1 with torch.no_grad(): print("Visualizing %dth image..." % current_id) #output_list_A, att_list_A, assign_A = model(torch.reshape(image_A, [1, 3, 256, 256])) #output_list_B, att_list_B, assign_B = model(torch.reshape(image_B, [1, 3, 256, 256])) output_list, att_list, assign = model(input) # define root for saving results and make directories correspondingly root = os.path.join('../../visualization', args.load, str(current_id)) os.makedirs(root, exist_ok=True) '''os.makedirs(os.path.join(root, 'attentions_A'), exist_ok=True) os.makedirs(os.path.join(root, 'attentions_B'), exist_ok=True)''' os.makedirs(os.path.join(root, 'attentions'), exist_ok=True) if (not JUST_CARE_ABOUT_THE_SCORES): '''os.makedirs(os.path.join(root, 'assignments_A'), exist_ok=True) os.makedirs(os.path.join(root, 'assignments_B'), exist_ok=True)''' os.makedirs(os.path.join(root, 'assignments'), exist_ok=True) # denormalize the image and save the input '''save_input = transforms.Normalize(mean=(0, 0, 0),std=(1/0.229, 1/0.224, 1/0.225))(torch.reshape(image_A, [1, 3, 256, 256]).data[0].cpu()) save_input = transforms.Normalize(mean=(-0.485, -0.456, -0.406),std=(1, 1, 1))(save_input) save_input = torch.nn.functional.interpolate(save_input.unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False).squeeze(0) img = torchvision.transforms.ToPILImage()(save_input) img.save(os.path.join(root, 'input_A.png')) save_input = transforms.Normalize(mean=(0, 0, 0),std=(1/0.229, 1/0.224, 1/0.225))(torch.reshape(image_B, [1, 3, 256, 256]).data[0].cpu()) save_input = transforms.Normalize(mean=(-0.485, -0.456, -0.406),std=(1, 1, 1))(save_input) save_input = torch.nn.functional.interpolate(save_input.unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False).squeeze(0) img = torchvision.transforms.ToPILImage()(save_input) img.save(os.path.join(root, 'input_B.png'))''' # denormalize the image and save the input save_input = transforms.Normalize(mean=(0, 0, 0), std=(1 / 0.229, 1 / 0.224, 1 / 0.225))(input.data[0].cpu()) save_input = transforms.Normalize(mean=(-0.485, -0.456, -0.406), std=(1, 1, 1))(save_input) save_input = torch.nn.functional.interpolate( save_input.unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False).squeeze(0) img = torchvision.transforms.ToPILImage()(save_input) img.save(os.path.join(root, 'input.png')) # save the labels and pred as list '''label_A = list(torch.reshape(image_A_labels, [1, image_A_labels.shape[0]]).data[0].cpu().numpy()) assert (len(label_A) == num_classes) prediction_A = [] highest_predicted_class_A = (0.0, 0, 0) for k in range(num_classes): current_pred = torch.sigmoid(output_list_A[k]).squeeze().data.item() if(current_pred > highest_predicted_class_A[0]): highest_predicted_class_A = (current_pred, UBIPR_CLASSES[k], k) prediction_A.append(current_pred) label_B = list(torch.reshape(image_B_labels, [1, image_B_labels.shape[0]]).data[0].cpu().numpy()) prediction_B = [] highest_predicted_class_B = (0.0, 0, 0) for k in range(num_classes): current_pred = torch.sigmoid(output_list_B[k]).squeeze().data.item() if(current_pred > highest_predicted_class_B[0]): highest_predicted_class_B = (current_pred, UBIPR_CLASSES[k], k) prediction_B.append(current_pred)''' # save the labels and pred as list label = list(target.data[0].cpu().numpy()) prediction = [] assert (len(label) == num_classes) highest_predicted_class = (0.0, 0, 0) for k in range(num_classes): current_pred = torch.sigmoid( output_list[k]).squeeze().data.item() #current_pred = int(current_score > 0.5) if (current_pred > highest_predicted_class[0]): highest_predicted_class = (current_pred, UBIPR_CLASSES[k], k) prediction.append(current_pred) # write the labels and pred '''if(not JUST_CARE_ABOUT_THE_SCORES): with open(os.path.join(root, 'prediction_A.txt'), 'w') as pred_log: for k in range(num_classes): pred_log.write('%s pred: %f, label: %d\n' % (UBIPR_CLASSES[k], prediction_A[k], label_A[k])) with open(os.path.join(root, 'prediction_B.txt'), 'w') as pred_log: for k in range(num_classes): pred_log.write('%s pred: %f, label: %d\n' % (UBIPR_CLASSES[k], prediction_B[k], label_B[k])) # upsample the assignment and transform the attention correspondingly assign_A_reshaped = torch.nn.functional.interpolate(assign_A.data.cpu(), size=(256, 256), mode='bilinear', align_corners=False) assign_B_reshaped = torch.nn.functional.interpolate(assign_B.data.cpu(), size=(256, 256), mode='bilinear', align_corners=False)''' # write the labels and pred with open(os.path.join(root, 'prediction.txt'), 'w') as pred_log: for k in range(num_classes): pred_log.write('%s pred: %f, label: %d\n' % (UBIPR_CLASSES[k], prediction[k], label[k])) # upsample the assignment and transform the attention correspondingly assign_reshaped = torch.nn.functional.interpolate( assign.data.cpu(), size=(256, 256), mode='bilinear', align_corners=False) # visualize the attention '''for k in range(num_classes): #if(k != highest_predicted_class[2]): continue # attention vector for kth attribute att = att_list_A[k].view(1, config['nparts'], 1, 1).data.cpu() # multiply the assignment with the attention vector assign_att = assign_A_reshaped * att # sum along the part dimension to calculate the spatial attention map attmap_hw = torch.sum(assign_att, dim=1).squeeze(0).numpy() # normalize the attention map and merge it onto the input img = cv2.imread(os.path.join(root, 'input_A.png')) mask_A = attmap_hw / attmap_hw.max() # save the attention map for image A np.save(os.path.join(root, 'attention_map_A.npy'), mask_A) img_float = img.astype(float) / 255. show_att_on_image(img_float, mask_A, os.path.join(root, 'attentions_A', UBIPR_CLASSES[k]+'.png')) # generate the one-channel hard assignment via argmax _, assign = torch.max(assign_A_reshaped, 1) # colorize and save the assignment if(not JUST_CARE_ABOUT_THE_SCORES): plot_assignment(root, assign.squeeze(0).numpy(), config['nparts'], "A") # collect the assignment for the final image array color_assignment_name = os.path.join(root, 'assignment_A.png') color_assignment = mpimg.imread(color_assignment_name) #axarr_assign[j, col_id].imshow(color_assignment) #axarr_assign[j, col_id].axis('off') # plot the assignment for each dictionary vector if(not JUST_CARE_ABOUT_THE_SCORES): for i in range(config['nparts']): img = torch.nn.functional.interpolate(assign_A_reshaped.data[:, i].cpu().unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False) img = torchvision.transforms.ToPILImage()(img.squeeze(0)) img.save(os.path.join(root, 'assignments_A', 'part_'+str(i)+'.png')) # visualize the attention for k in range(num_classes): #if(k != highest_predicted_class[2]): continue # attention vector for kth attribute att = att_list_B[k].view(1, config['nparts'], 1, 1).data.cpu() # multiply the assignment with the attention vector assign_att = assign_B_reshaped * att # sum along the part dimension to calculate the spatial attention map attmap_hw = torch.sum(assign_att, dim=1).squeeze(0).numpy() # normalize the attention map and merge it onto the input img = cv2.imread(os.path.join(root, 'input_B.png')) mask_B = attmap_hw / attmap_hw.max() # save the attention map for image B np.save(os.path.join(root, 'attention_map_B.npy'), mask_B) img_float = img.astype(float) / 255. show_att_on_image(img_float, mask_B, os.path.join(root, 'attentions_B', UBIPR_CLASSES[k]+'.png')) # generate the one-channel hard assignment via argmax _, assign = torch.max(assign_B_reshaped, 1) # colorize and save the assignment if(not JUST_CARE_ABOUT_THE_SCORES): plot_assignment(root, assign.squeeze(0).numpy(), config['nparts'], "B") # collect the assignment for the final image array color_assignment_name = os.path.join(root, 'assignment_B.png') color_assignment = mpimg.imread(color_assignment_name) #axarr_assign[j, col_id].imshow(color_assignment) #axarr_assign[j, col_id].axis('off') # plot the assignment for each dictionary vector if(not JUST_CARE_ABOUT_THE_SCORES): for i in range(config['nparts']): img = torch.nn.functional.interpolate(assign_B_reshaped.data[:, i].cpu().unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False) img = torchvision.transforms.ToPILImage()(img.squeeze(0)) img.save(os.path.join(root, 'assignments_B', 'part_'+str(i)+'.png')) ''' # visualize the attention for k in range(num_classes): if (k != 0): continue # attention vector for kth attribute att = att_list[k].view(1, config['nparts'], 1, 1).data.cpu() # multiply the assignment with the attention vector assign_att = assign_reshaped * att # sum along the part dimension to calculate the spatial attention map attmap_hw = torch.sum(assign_att, dim=1).squeeze(0).numpy() # normalize the attention map and merge it onto the input img = cv2.imread(os.path.join(root, 'input.png')) mask = attmap_hw / attmap_hw.max() # save the attention map np.save(os.path.join(root, 'attention_map.npy'), mask) img_float = img.astype(float) / 255. show_att_on_image( img_float, mask, os.path.join(root, 'attentions', UBIPR_CLASSES[k] + '.png')) # generate the one-channel hard assignment via argmax _, assign = torch.max(assign_reshaped, 1) # colorize and save the assignment if (not JUST_CARE_ABOUT_THE_SCORES): plot_assignment(root, assign.squeeze(0).numpy(), config['nparts'], None) # collect the assignment for the final image array color_assignment_name = os.path.join(root, 'assignment.png') color_assignment = mpimg.imread(color_assignment_name) #axarr_assign[j, col_id].imshow(color_assignment) #axarr_assign[j, col_id].axis('off') # plot the assignment for each dictionary vector if (not JUST_CARE_ABOUT_THE_SCORES): for i in range(config['nparts']): img = torch.nn.functional.interpolate( assign_reshaped.data[:, i].cpu().unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False) img = torchvision.transforms.ToPILImage()(img.squeeze(0)) img.save( os.path.join(root, 'assignments', 'part_' + str(i) + '.png')) # -------------------------------------------------------------------------------------------------------------------------------- # build the final explanation # -------------------------------------------------------------------------------------------------------------------------------- '''difference_mask_1 = np.asarray(Image.open(os.path.join(root, 'attentions_A') + "/" + ground_truth[0][0] + ".png").convert("RGBA")) difference_mask_2 = np.asarray(Image.open(os.path.join(root, 'attentions_B') + "/" + ground_truth[1][0] + ".png").convert("RGBA")) image_A = np.asarray(Image.open(os.path.join(root, 'input_A.png')).convert("RGBA").resize((127, 127), Image.LANCZOS)) image_B = np.asarray(Image.open(os.path.join(root, 'input_B.png')).convert("RGBA").resize((127, 127), Image.LANCZOS)) assemble_explanation(image_A, image_B, difference_mask_2, difference_mask_1, 0.0, "I", os.path.join(root, 'explanation.png')) if(JUST_CARE_ABOUT_THE_SCORES): rmtree(os.path.join(root, 'attentions_A')) rmtree(os.path.join(root, 'attentions_B')) elapsed_time = time.time() - t0 print("[INFO] ELAPSED TIME: %.2fs\n" % (elapsed_time)) with open("times_by_parts.txt", "a") as file: file.write(str(elapsed_time) + "\n")''' difference_mask_aux = np.asarray( Image.open(os.path.join(root, 'attentions') + "/I.png").convert("RGBA")) difference_mask_1 = difference_mask_aux[64:64 + 128, :128, :] difference_mask_2 = difference_mask_aux[64:64 + 128, 128:, :] input_aux = np.asarray( Image.open(os.path.join(root, 'input.png')).convert("RGBA")) image_A = np.asarray( Image.fromarray(input_aux[64:64 + 128, :128, :].astype( np.uint8)).resize((127, 127), Image.LANCZOS).convert("RGBA")) image_B = np.asarray( Image.fromarray(input_aux[64:64 + 128, 128:, :].astype(np.uint8)).resize( (127, 127), Image.LANCZOS).convert("RGBA")) assemble_explanation(image_A, image_B, difference_mask_2, difference_mask_1, 0.0, "I", os.path.join(root, 'explanation.png')) if (JUST_CARE_ABOUT_THE_SCORES): rmtree(os.path.join(root, 'attentions')) elapsed_time = time.time() - t0 print("[INFO] ELAPSED TIME: %.2fs\n" % (elapsed_time)) with open("times_by_parts.txt", "a") as file: file.write(str(elapsed_time) + "\n") # save the array version os.makedirs('../../visualization/collected', exist_ok=True) f_assign.savefig( os.path.join('../../visualization/collected', args.load + '.png')) print('Visualization finished!')
def main(): # load the config file config_file = '../log/'+ args.load +'/train_config.json' with open(config_file) as fi: config = json.load(fi) print(" ".join("\033[96m{}\033[0m: {},".format(k, v) for k, v in config.items())) # define data transformation (no crop) test_transforms = transforms.Compose([ transforms.Resize(size=(256, 256)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) # define test dataset and loader if config['split'] == 'accuracy': dataset = CelebA('../data/celeba', split='test', align=True, percentage=None, transform=test_transforms, resize=(256, 256)) elif config['split'] == 'interpretability': dataset = CelebA('../data/celeba', split='test', align=False, percentage=0.3, transform=test_transforms, resize=(256, 256)) else: raise(RuntimeError("Please choose either \'accuracy\' or \'interpretability\' for data split.")) test_loader = torch.utils.data.DataLoader( dataset, batch_size=1, shuffle=True, num_workers=1, pin_memory=False) # create a dataloader iter instance test_loader_iter = iter(test_loader) # define the figure layout fig_rows = 5 fig_cols = 5 f_assign, axarr_assign = plt.subplots(fig_rows, fig_cols, figsize=(fig_cols*2,fig_rows*2)) f_assign.subplots_adjust(wspace=0, hspace=0) # load the model in eval mode # with batch size = 1, we only support single GPU visaulization if config['arch'] == 'resnet101': model = ResNet101(num_classes, num_parts=config['nparts']).cuda() elif config['arch'] == 'resnet50': model = ResNet50(num_classes, num_parts=config['nparts']).cuda() else: raise(RuntimeError("Only support resnet50 or resnet101 for architecture!")) # load model resume = '../checkpoints/'+args.load+'_best.pth.tar' print("=> loading checkpoint '{}'".format(resume)) checkpoint = torch.load(resume) # remove the module prefix new_state_dict = OrderedDict() for k, v in checkpoint['state_dict'].items(): name = k[7:] # remove `module.` new_state_dict[name] = v model.load_state_dict(new_state_dict, strict=True) model.eval() with torch.no_grad(): # the visualization code current_id = 0 for col_id in range(fig_cols): for j in range(fig_rows): # inference the model input, target, _ = next(test_loader_iter) input = input.cuda() target = target.cuda() current_id += 1 with torch.no_grad(): print("Visualizing %dth image..." % current_id) output_list, att_list, assign = model(input) # define root for saving results and make directories correspondingly root = os.path.join('../visualization', args.load, str(current_id)) os.makedirs(root, exist_ok=True) os.makedirs(os.path.join(root, 'attentions'), exist_ok=True) os.makedirs(os.path.join(root, 'assignments'), exist_ok=True) # denormalize the image and save the input save_input = transforms.Normalize(mean=(0, 0, 0),std=(1/0.229, 1/0.224, 1/0.225))(input.data[0].cpu()) save_input = transforms.Normalize(mean=(-0.485, -0.456, -0.406),std=(1, 1, 1))(save_input) save_input = torch.nn.functional.interpolate(save_input.unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False).squeeze(0) img = torchvision.transforms.ToPILImage()(save_input) img.save(os.path.join(root, 'input.png')) # save the labels and pred as list label = list(target.data[0].cpu().numpy()) prediction = [] assert (len(label) == num_classes) for k in range(num_classes): current_score = torch.sigmoid(output_list[k]).squeeze().data.item() current_pred = int(current_score > 0.5) prediction.append(current_pred) # write the labels and pred with open(os.path.join(root, 'prediction.txt'), 'w') as pred_log: for k in range(num_classes): pred_log.write('%s pred: %d, label: %d\n' % (celeba_attr[k], prediction[k], label[k])) # upsample the assignment and transform the attention correspondingly assign_reshaped = torch.nn.functional.interpolate(assign.data.cpu(), size=(256, 256), mode='bilinear', align_corners=False) # visualize the attention for k in range(num_classes): # attention vector for kth attribute att = att_list[k].view( 1, config['nparts'], 1, 1).data.cpu() # multiply the assignment with the attention vector assign_att = assign_reshaped * att # sum along the part dimension to calculate the spatial attention map attmap_hw = torch.sum(assign_att, dim=1).squeeze(0).numpy() # normalize the attention map and merge it onto the input img = cv2.imread(os.path.join(root, 'input.png')) mask = attmap_hw / attmap_hw.max() img_float = img.astype(float) / 255. show_att_on_image(img_float, mask, os.path.join(root, 'attentions', celeba_attr[k]+'.png')) # generate the one-channel hard assignment via argmax _, assign = torch.max(assign_reshaped, 1) # colorize and save the assignment plot_assignment(root, assign.squeeze(0).numpy(), config['nparts']) # collect the assignment for the final image array color_assignment_name = os.path.join(root, 'assignment.png') color_assignment = mpimg.imread(color_assignment_name) axarr_assign[j, col_id].imshow(color_assignment) axarr_assign[j, col_id].axis('off') # plot the assignment for each dictionary vector for i in range(config['nparts']): img = torch.nn.functional.interpolate(assign_reshaped.data[:, i].cpu().unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False) img = torchvision.transforms.ToPILImage()(img.squeeze(0)) img.save(os.path.join(root, 'assignments', 'part_'+str(i)+'.png')) # save the array version os.makedirs('../visualization/collected', exist_ok=True) f_assign.savefig(os.path.join('../visualization/collected', args.load+'.png')) print('Visualization finished!')
def trainer( train_set: np.ndarray, test_set: np.ndarray, size_dict: Dict[int, int], model: str = "ResNet50", batch_size: int = 500, num_epochs: int = 10, learning_rate: float = 0.001, weight_decay: float = 0, dropout: float = 0, ) -> float: """ Get the best test accuracy during training for `num_epochs` epochs. """ # create dataloader train_loader = tf.data.Dataset.from_tensor_slices(train_set) test_loader = tf.data.Dataset.from_tensor_slices(test_set) train_loader = train_loader.shuffle( buffer_size=train_set[1].shape[0], reshuffle_each_iteration=True ).batch(batch_size) test_loader = test_loader.shuffle( buffer_size=test_set[1].shape[0], reshuffle_each_iteration=False ).batch(batch_size) # set model and optimizer num_classes = len(size_dict) if model == "ResNet50": model = ResNet50(num_classes, dropout) optimizer = tf.optimizers.Adam(learning_rate=learning_rate) # here class encoding is necessary since we need the dimension # of one-hot encoding identical to the number of classes class_encoding = {class_id: i for i, (class_id, _) in enumerate(size_dict.items())} # start training best_acc = 0 for epoch in range(num_epochs): for images, labels in train_loader: labels = np.vectorize(lambda id: class_encoding[id])(labels) with tf.GradientTape() as g: # forward pass preds = model(images, training=True) loss = cross_entropy_loss(preds, labels) l2_loss = weight_decay * tf.add_n( [tf.nn.l2_loss(v) for v in model.trainable_variables] ) loss += l2_loss # backward pass grad = g.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grad, model.trainable_variables)) # test after each epoch accuracies = [] for images, labels in test_loader: labels = np.vectorize(lambda id: class_encoding[id])(labels) preds = model(images, training=False) batch_acc = accuracy(preds, labels) accuracies.append(batch_acc) epoch_acc = sum(accuracies) / len(accuracies) best_acc = max(best_acc, epoch_acc) return float(round(best_acc, 4))
def main(): # load the config file config_file = '../log/' + args.load + '/train_config.json' with open(config_file) as fi: config = json.load(fi) print(" ".join("\033[96m{}\033[0m: {},".format(k, v) for k, v in config.items())) # test transform test_transforms = transforms.Compose([ transforms.Resize(size=(256, 256)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) # define test dataset and loader if config['split'] == 'accuracy': test_data = CelebA('../data/celeba', split='test', align=True, percentage=None, transform=test_transforms, resize=(256, 256)) elif config['split'] == 'interpretability': test_data = CelebA('../data/celeba', split='test', align=False, percentage=0.3, transform=test_transforms, resize=(256, 256)) else: raise (RuntimeError( "Please choose either \'accuracy\' or \'interpretability\' for data split." )) test_loader = torch.utils.data.DataLoader(test_data, batch_size=config['batch_size'], shuffle=False, num_workers=6, pin_memory=False, drop_last=False) # load the model in eval mode if config['arch'] == 'resnet101': model = nn.DataParallel( ResNet101(num_classes, num_parts=config['nparts'])).cuda() elif config['arch'] == 'resnet50': model = nn.DataParallel( ResNet50(num_classes, num_parts=config['nparts'])).cuda() else: raise (RuntimeError( "Only support resnet50 or resnet101 for architecture!")) resume = '../checkpoints/' + args.load + '_best.pth.tar' print("=> loading checkpoint '{}'".format(resume)) checkpoint = torch.load(resume) model.load_state_dict(checkpoint['state_dict'], strict=True) model.eval() # test the model acc_per_attr, acc = test(test_loader, model) # print the overall best acc print('Testing finished...') print('Per-attribute accuracy:') print( '===========================================================================' ) for k in range(num_classes): print('\033[96m%s\033[0m: %.4f' % (celeba_attr[k], acc_per_attr[k].avg)) print( '===========================================================================' ) print('Best average accuracy on test set is: %.4f.' % acc)
def main(): global best_acc # create model by archetecture and load the pretrain weight print("=> creating model...") if args['arch'] == 'resnet101': model = ResNet101(args['num_classes'], args['nparts']) model.load_state_dict(models.resnet101(pretrained=True).state_dict(), strict=False) elif args['arch'] == 'resnet50': model = ResNet50(args['num_classes'], args['nparts']) model.load_state_dict(models.resnet50(pretrained=True).state_dict(), strict=False) else: raise(RuntimeError("Only support ResNet50 or ResNet101!")) model = torch.nn.DataParallel(model).cuda() # optionally resume from a checkpoint start_epoch = 0 if args['resume'] != '': if os.path.isfile(args['resume']): print("=> loading checkpoint '{}'".format(args['resume'])) checkpoint = torch.load(args['resume']) start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args['resume'], checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args['resume'])) # data augmentation train_transforms = transforms.Compose([ transforms.Resize(size=(256, 256)), transforms.RandomHorizontalFlip(), transforms.ColorJitter(0.1), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) val_transforms = transforms.Compose([ transforms.Resize(size=(256, 256)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) # wrap to dataset if args['split'] == 'accuracy': train_data = CelebA(data_dir, split='train_full', align=True, percentage=None, transform=train_transforms, resize=(256, 256)) val_data = CelebA(data_dir, split='val', align=True, percentage=None, transform=val_transforms, resize=(256, 256)) elif args['split'] == 'interpretability': train_data = CelebA(data_dir, split='train', align=False, percentage=0.3, transform=train_transforms, resize=(256, 256)) val_data = CelebA(data_dir, split='val', align=False, percentage=0.3, transform=val_transforms, resize=(256, 256)) else: raise(RuntimeError("Please choose either \'accuracy\' or \'interpretability\' for data split.")) # wrap to dataloader train_loader = torch.utils.data.DataLoader( train_data, batch_size=args['batch_size'], shuffle=True, num_workers=args['workers'], pin_memory=False, drop_last=True) val_loader = torch.utils.data.DataLoader( val_data, batch_size=args['batch_size'], shuffle=False, num_workers=args['workers'], pin_memory=True) # define loss function (criterion) and optimizer criterion = torch.nn.BCEWithLogitsLoss().cuda() # fix/finetune several layers fixed_layers = args['fixed'] finetune_layers = args['finetune'] finetune_parameters = [] scratch_parameters = [] for name, p in model.named_parameters(): layer_name = name.split('.')[1] if layer_name not in fixed_layers: if layer_name in finetune_layers: finetune_parameters.append(p) else: scratch_parameters.append(p) else: p.requires_grad = False # define the optimizer according to different param groups optimizer = torch.optim.SGD([{'params': scratch_parameters, 'lr':20*args['lr']}, {'params': finetune_parameters, 'lr':args['lr']}, ], weight_decay=args['weight_decay'], momentum=0.9) # define the MultiStep learning rate scheduler scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5], gamma=0.1) # load the scheduler from the checkpoint if needed if args['resume'] != '': if os.path.isfile(args['resume']): optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler']) # training part for epoch in range(start_epoch, args['epochs']): # training train(train_loader, model, criterion, optimizer, epoch) # evaluate on val set acc_per_attr, acc = validate(val_loader, model, criterion, epoch) # LR scheduler scheduler.step() # remember best acc and save checkpoint is_best = acc > best_acc if is_best: best_acc = acc best_per_attr = acc_per_attr save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'best_acc': best_acc, 'scheduler': scheduler.state_dict(), }, is_best, os.path.join(check_dir, args['save'])) # print current best acc print('Current best average accuracy is: %.4f' % best_acc) # print the overall best acc and close the writer print('Training finished...') with open(os.path.join(log_dir, "acc_per_attr.txt"), 'w') as logfile: for k in range(args['num_classes']): logfile.write('%s: %.4f\n' % (celeba_attr[k], best_per_attr[k].avg)) print('Per-attribute accuracy on val set has been written to acc_per_attr.txt under the log folder') print('Best average accuracy on val set is: %.4f.' % best_acc) writer.close()
img = cv2.imread(self.imgList[index]) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = self.transform(img) # label = torch.zeros((1, OUT_DIM)) label = torch.from_numpy(np.array(l).astype(np.float32)) return img, label.squeeze() ######################### train ######################### resnet = models.resnet50() resnet.load_state_dict(torch.load('./resnet50-19c8e357.pth')) net = ResNet50(resnet, OUT_DIM) try: net.load_state_dict(torch.load('models/latestModel.pth')) log.info('load pre trained model') except: log.info('pre trained model isn\'t exist') try: os.mkdir('models') except: pass device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = net.to(device)
def train(args): transform = transforms.Compose([ transforms.Resize(224), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=2) print('Dataset loaded.') model = ResNet50(num_classes=10) print('Model built.') if torch.cuda.device_count() > 1: model = nn.DataParallel(model).cuda() print('Model data parallel to cuda.') else: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) handles = register_layers(model) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) model.train() epoch_num = args.epoch for epoch in range(epoch_num): running_loss = 0.0 for i, data in enumerate(tqdm(trainloader)): inputs, labels = data inputs, labels = inputs.cuda(), labels.cuda() optimizer.zero_grad() outputs = model(inputs) for handle in handles: handle.remove() handles = [] compute_cost = sum(FLOP_list) * args.alpha loss = criterion(outputs, labels) + torch.Tensor([compute_cost ]).cuda() #loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % args.checkpoint == (args.checkpoint - 1): print("epoch-%d sample-%d running_loss: %.3f" % (epoch + 1, i + 1, running_loss / args.checkpoint)) running_loss = 0.0
def main(): args = parse_args() level = logging.INFO if args.debug: level = logging.DEBUG logging.basicConfig(format="%(asctime)s %(levelname)s: %(message)s", datefmt="%d-%m-%Y %H:%M:%S", level=level) # load cfg if os.path.exists(args.cfg): with open(args.cfg) as f: cfg = yaml.load(f, Loader=yaml.FullLoader) logging.debug(cfg) else: logging.error(f"Cannot find cfg file: {args.cfg}") return 0 # load ontology OntReader = OntologyReader(graph_file=os.path.join( os.path.dirname(args.cfg), cfg["graph"]), weighting_scheme=cfg["weighting_scheme"], leaf_node_weight=cfg["leaf_node_weight"]) # init torch device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): batch_size = torch.cuda.device_count() * args.batch_size else: batch_size = args.batch_size # build model and load checkpoint if cfg["model_type"] == "ontology": weights = OntReader.get_node_weights(cfg["redundancy_removal"]) num_classes = len(weights) else: # cfg["model_type"] == "classification" num_classes = OntReader.num_leafs if torch.cuda.device_count() == 0: logging.info(f"Test on CPU with batch_size {batch_size}") else: logging.info( f"Test on {torch.cuda.device_count()} GPU(s) with batch_size {batch_size}" ) model = ResNet50(num_classes=num_classes, model_type=cfg["model_type"], redundancy_removal=cfg["redundancy_removal"]) model.to(device) if torch.cuda.device_count() > 1: logging.info(f"Found {torch.cuda.device_count()} GPUs") model = nn.DataParallel(model) model.eval() model.load(device=device, path=os.path.join(os.path.dirname(args.cfg), cfg["model_checkpoint"])) # Init testing dataset infer_dataset = EventDataset(image_dir=args.image_dir, testset_path=args.testset) infer_dataloader = DataLoader(infer_dataset, batch_size=batch_size, num_workers=8) # predict event classes for images sample_predictions = get_sample_predictions( infer_dataloader=infer_dataloader, OntReader=OntReader, model=model, device=device, s2l_strategy=args.s2l_strategy) # calculate result for all nodes in the ontology logging.info("Calculate results ...") node_results = get_test_results(sample_predictions=sample_predictions, OntReader=OntReader) # print final results (global results are stored in the root node occurrence (Q1190554)) if "Q1190554" not in node_results: logging.warning("No results written ...") return 0 print_results(node_results["Q1190554"]["metrics"], node_results["Q1190554"]["num_test_images"]) # write results for each node if args.output: if not os.path.exists(os.path.dirname(args.output)): os.makedirs(os.path.dirname(args.output)) result_list = [] for val in node_results.values(): # calculate mean result for metric, result in val["metrics"].items(): val["metrics"][metric] = result / val["num_test_images"] result_list.append(val) result_list = sorted(result_list, key=lambda x: x["num_test_images"], reverse=True) with open(args.output, "w") as jsonfile: for result in result_list: jsonfile.write(json.dumps(result) + "\n") logging.info(f"Results written to: {args.output}") return 0
def main(args, logger): train_loader, valid_loader, test_loader = load_data(args) # TODO : THIS WILL NOT WORK OUT FOR OTHER DATASETS DUE TO THE WAY WE SPLIT WITH SUBSET FLAG IN CIFAR100 num_classes = len(train_loader.dataset.dataset.dataset.classes) # if args.dataset == 'CIFAR10': # num_classes = 10 # elif args.dataset == 'CIFAR100': # num_classes = 100 # elif args.dataset == 'TinyImageNet': # num_classes = 200 print('dataset: {}, num_classes: {}'.format(args.dataset, num_classes)) model = None print('ARGS: ', args) if args.model_name == 'ResNet26': print('Model Name: {0}'.format(args.model_name)) model = ResNet26(num_classes=num_classes, args=args) elif args.model_name == 'ResNet38': print('Model Name: {0}'.format(args.model_name)) model = ResNet38(num_classes=num_classes, all_attention=args.all_attention) elif args.model_name == 'ResNet50': print('Model Name: {0}'.format(args.model_name)) model = ResNet50(num_classes=num_classes, all_attention=args.all_attention) if args.use_adam: optimizer = optim.Adam(model.parameters(), lr=args.adam_lr) # Try altering initial settings of Adam later. else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) if args.T_max == -1: args.T_max = args.epochs scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.T_max, eta_min=args.eta_min) start_epoch = 1 best_acc = 0.0 best_epoch = 1 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if args.pretrained_model or args.test: filename = args.xpid + '_model_' + str(args.dataset) + '_' + str(args.model_name) + '_ckpt.tar' print('filename :: ', filename) map_location = 'cuda' if args.cuda else None checkpoint = torch.load(filename, map_location=map_location) model = nn.DataParallel(model) model.load_state_dict(checkpoint['state_dict']) print('MADE IT') model = model.to(device) dummy_input = torch.randn((2, 3, 32, 32)) dummy_input = dummy_input.to(device) macs, params = profile(copy.deepcopy(model.module), inputs=(dummy_input,), custom_ops={Bottleneck: count_bootleneck}, verbose=True) print('FLOPS : {}, PARAMS : {}'.format(macs, params)) max_spans = [] if args.adaptive_span: #print out max z value per layer as a list for layer in model.module.layers: for block in layer: max_span = block.conv2[0].adaptive_mask.get_current_max_size() max_spans.append(max_span) print('MAX SPANS: ', max_spans) optimizer.load_state_dict(checkpoint['optimizer']) # reset to this learning rate given optimizer.param_groups[0]['lr'] = args.lr scheduler.load_state_dict(checkpoint['scheduler']) start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] best_epoch = start_epoch model_parameters = checkpoint['parameters'] print('Load model, Parameters: {0}, Start_epoch: {1}, Acc: {2}'.format(model_parameters, start_epoch, best_acc)) #logger.info('Load model, Parameters: {0}, Start_epoch: {1}, Acc: {2}'.format(model_parameters, start_epoch, best_acc)) if args.test: #Compute test accuracy test_acc = eval(model, test_loader, args, is_valid=False, device=device) print('TEST ACCURACY: ',test_acc) return if not args.pretrained_model: model = nn.DataParallel(model) model = model.to(device) print("Number of model parameters: ", get_model_parameters(model)) #logger.info("Number of model parameters: {0}".format(get_model_parameters(model))) filename = args.xpid + '_model_' + str(args.dataset) + '_' + str(args.model_name) + '_ckpt.tar' plogger = file_writer.FileWriter( xpid=args.xpid, rootdir=os.path.dirname(os.path.abspath(__file__)) ) print('will save model as filename :: ', filename) criterion = nn.CrossEntropyLoss() for epoch in range(start_epoch, args.epochs + 1): if args.all_attention or args.attention_conv or args.force_cosine_annealing: if args.no_annealing: optimizer.param_groups[0]['lr'] = args.lr elif epoch < args.warmup_epochs: for param_group in optimizer.param_groups: param_group['lr'] = args.lr * (epoch + 1) / args.warmup_epochs elif epoch >= args.start_scheduler: scheduler.step() else: adjust_learning_rate(optimizer, epoch, args) learning_rate = optimizer.param_groups[0]['lr'] # learning_rate = [x['lr'] for x in optimizer.param_groups] print('Updated lr: ', learning_rate) start_time = time.time() train(model, train_loader, optimizer, criterion, epoch, args, logger, device) print('Epoch took: ', time.time()-start_time) eval_acc = eval(model, valid_loader, args, is_valid=True, device=device) to_log = dict(accuracy=eval_acc, learning_rate=learning_rate) plogger.log(to_log) is_best = eval_acc > best_acc best_acc = max(eval_acc, best_acc) if is_best: best_epoch = epoch elif epoch - best_epoch > int(args.epochs * 0.2): print('EARLY STOPPING') break if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') parameters = get_model_parameters(model) if is_best: print('Saving best model') state = { 'epoch': epoch, 'arch': args.model_name, 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'parameters': parameters, } torch.save(state,filename) plogger.close()
def main(): train_loader, test_loader = get_dataloader() if args.mode == "train": args.round = 0 model = ResNet50(num_classes=10) train_model(model, train_loader, test_loader) elif args.mode == "prune": previous_ckpt = "./checkpoints/resnet50-round%d.pth" % (args.round - 1) print("Pruning round %d, load model from %s" % (args.round, previous_ckpt)) model = torch.load(previous_ckpt) prune_model(model) print(model) params = sum([np.prod(p.size()) for p in model.parameters()]) print("Number of Parameters: %.1fM" % (params / 1e6)) train_model(model, train_loader, test_loader) elif args.mode == "test": ckpt = "./checkpoints/resnet50-round%d.pth" % (args.round) print("Load model from %s" % (ckpt)) model = torch.load(ckpt) params = sum([np.prod(p.size()) for p in model.parameters()]) print("Number of Parameters: %.1fM" % (params / 1e6)) acc = eval(model, test_loader) print("Acc=%.4f\n" % (acc)) elif args.mode == "tensorrt": ckpt = "./checkpoints/resnet50-round%d.pth" % (args.round) print("Load model from %s" % (ckpt)) model = torch.load(ckpt) params = sum([np.prod(p.size()) for p in model.parameters()]) print("Number of Parameters: %.1fM" % (params / 1e6)) torch_in = torch.ones((1, 3, 32, 32)).cuda() torch.onnx.export( model, torch_in, "./checkpoints/model_onnx.onnx", verbose=False, opset_version=12, ) onnx_model = onnx.load("./checkpoints/model_onnx.onnx") model_simp, check = simplify(onnx_model) onnx.save(model_simp, "./checkpoints/model_onnx.onnx") cmd = ( "onnx2trt " + "./checkpoints/model_onnx.onnx" + " -o " + "./checkpoints/tensorrt_engine.engine" + " -b " + "1" + " -w " + str(1024 * 1024 * 1024) + " -d 32" ) os.system(cmd) trt_model = TRT_Engine("./checkpoints/tensorrt_engine.engine", max_batch_size=1) num_iter = 2000 total_time_list = [] with torch.no_grad(): for i in range(num_iter): start = time.time() trt_model(torch_in) total_time_list.append(time.time() - start) print( "total FPS -> avg:{}, max:{}, min:{}".format( 1 / (sum(total_time_list[100:]) / (num_iter - 100)), 1 / (max(total_time_list[100:])), 1 / (min(total_time_list[100:])), ) )
def main(): # load the config file config_file = '../../log/' + args.load + '/train_config.json' with open(config_file) as fi: config = json.load(fi) print(" ".join("\033[96m{}\033[0m: {},".format(k, v) for k, v in config.items())) # define data transformation (no crop) data_transforms = transforms.Compose([ transforms.Resize(size=(448)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) # define dataset and loader fit_data = CUB200(root='../../data/cub200', train=True, transform=data_transforms, resize=448) eval_data = CUB200(root='../../data/cub200', train=False, transform=data_transforms, resize=448) fit_loader = torch.utils.data.DataLoader(fit_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=False, drop_last=False) eval_loader = torch.utils.data.DataLoader(eval_data, batch_size=1, shuffle=False, num_workers=1, pin_memory=False, drop_last=False) # load the model in eval mode if config['arch'] == 'resnet101': model = nn.DataParallel( ResNet101(num_classes, num_parts=config['nparts'])).cuda() elif config['arch'] == 'resnet50': model = nn.DataParallel( ResNet50(num_classes, num_parts=config['nparts'])).cuda() else: raise (RuntimeError( "Only support resnet50 or resnet101 for architecture!")) resume = '../../checkpoints/' + args.load + '_best.pth.tar' print("=> loading checkpoint '{}'".format(resume)) checkpoint = torch.load(resume) model.load_state_dict(checkpoint['state_dict'], strict=True) model.eval() # convert the assignment to centers for both splits print('Evaluating the model for the whole data split...') fit_centers, fit_annos, fit_masks = create_centers(fit_loader, model, config['nparts']) eval_centers, eval_annos, eval_masks = create_centers( eval_loader, model, config['nparts']) # fit the linear regressor with sklearn # normalized assignment center coordinates -> normalized landmark coordinate annotations print('=> fitting and evaluating the regressor') error = 0 n_valid_samples = 0 # different landmarks have different masks for i in range(num_landmarks): # get the valid indices for the current landmark fit_masks_np = fit_masks.cpu().numpy().astype(np.float64) eval_masks_np = eval_masks.cpu().numpy().astype(np.float64) fit_selection = (abs(fit_masks_np[:, i * 2]) > 1e-5) eval_selection = (abs(eval_masks_np[:, i * 2]) > 1e-5) # convert tensors to numpy (64 bit double) fit_centers_np = fit_centers.cpu().numpy().astype(np.float64) fit_annos_np = fit_annos.cpu().numpy().astype(np.float64) eval_centers_np = eval_centers.cpu().numpy().astype(np.float64) eval_annos_np = eval_annos.cpu().numpy().astype(np.float64) # select the current landmarks for both fit and eval set fit_annos_np = fit_annos_np[:, i * 2:i * 2 + 2] eval_annos_np = eval_annos_np[:, i * 2:i * 2 + 2] # remove invalid indices fit_centers_np = fit_centers_np[fit_selection] fit_annos_np = fit_annos_np[fit_selection] eval_centers_np = eval_centers_np[eval_selection] eval_annos_np = eval_annos_np[eval_selection] eval_data_size = eval_centers_np.shape[0] # data standardization scaler_centers = StandardScaler() scaler_landmarks = StandardScaler() # fit the StandardScaler with the fitting split scaler_centers.fit(fit_centers_np) scaler_landmarks.fit(fit_annos_np) # stardardize the fitting split fit_centers_std = scaler_centers.transform(fit_centers_np) fit_annos_std = scaler_landmarks.transform(fit_annos_np) # define regressor without intercept and fit it regressor = LinearRegression(fit_intercept=False) regressor.fit(fit_centers_std, fit_annos_std) # standardize the centers on the evaluation split eval_centers_std = scaler_centers.transform(eval_centers_np) # regress the landmarks on the evaluation split eval_pred_std = regressor.predict(eval_centers_std) # unstandardize the prediction with StandardScaler for landmarks eval_pred = scaler_landmarks.inverse_transform(eval_pred_std) # calculate the error eval_pred = eval_pred.reshape((eval_data_size, 1, 2)) eval_annos_np = eval_annos_np.reshape((eval_data_size, 1, 2)) error += L2_distance(eval_pred, eval_annos_np) * eval_data_size n_valid_samples += eval_data_size error = error * 100 / n_valid_samples print('Mean L2 Distance on the test set is %.2f%%.' % error) print('Evaluation finished for model \'' + args.load + '\'.')
def main(): # load the config file config_file = '../log/'+ args.load +'/train_config.json' with open(config_file) as fi: config = json.load(fi) print(" ".join("\033[96m{}\033[0m: {},".format(k, v) for k, v in config.items())) # test transform data_transforms = transforms.Compose([ transforms.Resize(size=(256, 256)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) # define dataset and loader assert config['split'] == 'interpretability' fit_data = CelebA('../data/celeba', split='fit', align=False, percentage=0.3, transform=data_transforms, resize=(256, 256)) eval_data = CelebA('../data/celeba', split='eval', align=False, percentage=0.3, transform=data_transforms, resize=(256, 256)) fit_loader = torch.utils.data.DataLoader( fit_data, batch_size=config['batch_size'], shuffle=False, num_workers=6, pin_memory=False, drop_last=False) eval_loader = torch.utils.data.DataLoader( eval_data, batch_size=config['batch_size'], shuffle=False, num_workers=6, pin_memory=False, drop_last=False) # load the model in eval mode if config['arch'] == 'resnet101': model = nn.DataParallel(ResNet101(num_classes, num_parts=config['nparts'])).cuda() elif config['arch'] == 'resnet50': model = nn.DataParallel(ResNet50(num_classes, num_parts=config['nparts'])).cuda() else: raise(RuntimeError("Only support resnet50 or resnet101 for architecture!")) resume = '../checkpoints/'+args.load+'_best.pth.tar' print("=> loading checkpoint '{}'".format(resume)) checkpoint = torch.load(resume) model.load_state_dict(checkpoint['state_dict'], strict=True) model.eval() # convert the assignment to centers for both splits print('Evaluating the model for the whole data split...') fit_centers, fit_annos, fit_eyedists = create_centers( fit_loader, model, config['nparts']) eval_centers, eval_annos, eval_eyedists = create_centers( eval_loader, model, config['nparts']) eval_data_size = eval_centers.shape[0] # normalize the centers to make sure every face image has unit eye distance fit_centers, fit_annos = fit_centers / fit_eyedists, fit_annos / fit_eyedists eval_centers, eval_annos = eval_centers / eval_eyedists, eval_annos / eval_eyedists # fit the linear regressor with sklearn # normalized assignment center coordinates -> normalized landmark coordinate annotations print('=> fitting and evaluating the regressor') # convert tensors to numpy (64 bit double) fit_centers_np = fit_centers.cpu().numpy().astype(np.float64) fit_annos_np = fit_annos.cpu().numpy().astype(np.float64) eval_centers_np = eval_centers.cpu().numpy().astype(np.float64) eval_annos_np = eval_annos.cpu().numpy().astype(np.float64) # data standardization scaler_centers = StandardScaler() scaler_landmarks = StandardScaler() # fit the StandardScaler with the fitting split scaler_centers.fit(fit_centers_np) scaler_landmarks.fit(fit_annos_np) # stardardize the fitting split fit_centers_std = scaler_centers.transform(fit_centers_np) fit_annos_std = scaler_landmarks.transform(fit_annos_np) # define regressor without intercept and fit it regressor = LinearRegression(fit_intercept=False) regressor.fit(fit_centers_std, fit_annos_std) # standardize the centers on the evaluation split eval_centers_std = scaler_centers.transform(eval_centers_np) # regress the landmarks on the evaluation split eval_pred_std = regressor.predict(eval_centers_std) # unstandardize the prediction with StandardScaler for landmarks eval_pred = scaler_landmarks.inverse_transform(eval_pred_std) # calculate the error eval_pred = eval_pred.reshape((eval_data_size, num_landmarks, 2)) eval_annos = eval_annos_np.reshape((eval_data_size, num_landmarks, 2)) error = L2_distance(eval_pred, eval_annos) * 100 print('Mean L2 Distance on the test set is %.2f%%.' % error) print('Evaluation finished for model \''+args.load+'\'.')
def ensemble_eval(): classes = cfg.CLASSES_TO_RUN # _, testloader = MultiwayDatasets() _, testloader = MultiwaySubDatasets(classes) net = ResNet50(2) net.to("cuda") net = torch.nn.DataParallel(net) cudnn.benchmark = True net.eval() softmax = torch.nn.Softmax(1) n_images = len(testloader.dataset) scores = torch.zeros((n_images, len(classes))) gt = torch.zeros((n_images)) # get target results for batch_idx, (_, targets) in enumerate(testloader): gt[batch_idx * len(targets):(batch_idx + 1) * len(targets)] = targets # get results from each classifier for i, class_ in tqdm(enumerate(classes)): ckpt = torch.load(f"checkpoints/10way/{class_}/last_ckpt.pth") net.load_state_dict(ckpt['net']) scores = predict_with_one_net(net, i, scores, testloader, softmax) # consolidate results _, predicted = scores.max(1) correct = predicted.eq(gt).sum().item() accuracy = 100. * correct / n_images # ERROR ANALYSIS actual_classes = torch.zeros((len(classes))) misclassified_classes = torch.zeros((len(classes))) counts = [0, 0, 0, 0] with open("results.txt", "a") as f: for i in range(len(predicted)): actual_class = int(gt[i]) predicted_class = predicted[i] actual_class_score = scores[i, actual_class] predicted_class_score = scores[i, predicted_class] if gt[i] != predicted[i]: f.write( f"actual: {actual_class} ({actual_class_score:.4f}), predicted: {predicted_class} ({predicted_class_score:.4f})\n" ) actual_classes[int(gt[i])] += 1 misclassified_classes[predicted[i]] += 1 # Actual class classifier predicted wrongly (<0.5) and predicted class classifier predicted wrongly (>0.5) if actual_class_score < 0.5 and predicted_class_score > 0.5: counts[0] += 1 # Actual class classifier predicted correctly (>0.5), but predicted class classifier predicted wrongly and was more confident (>>0.5, closer to 1) elif actual_class_score > 0.5 and predicted_class_score > 0.5: counts[1] += 1 # Actual class classifier predicted wrongly (<<0.5) and predicted class classifier predicted correctly but was a bit more confident (<0.5, closer to 0.5) elif actual_class_score < 0.5 and predicted_class_score < 0.5: counts[2] += 1 else: # All classifiers return a negative score, but class is still predicted correctly (because every class predicted negatively) if actual_class_score < 0.5 and predicted_class_score < 0.5: counts[3] += 1 print(counts) print(actual_classes) print(misclassified_classes) return accuracy
def final(modelPath, csvName): model = ResNet50() pickledir = '/tmp/analyze_csv/' if not os.path.exists(os.path.dirname(pickledir)): try: os.makedirs(os.path.dirname(pickledir)) except OSError as exc: # Guard again raise modelPath = modelPath slicesPathTest = '/tmp/SlicesPrivateTest' model.load_weights(modelPath) finalanalyze = pickledir + csvName analyze_dict = {} filenames = os.listdir(slicesPathTest) sliceSize = 128 batchsize = 16 filenames = [ filename for filename in filenames if filename.endswith('.png') ] temp = '' x_batch = [] y_batch = [] count_batch = 0 tempresult = np.zeros([10]) filenames = sorted(filenames) for idx, filename in enumerate(reversed(filenames)): images = [] imgData = getImageData(slicesPathTest + "/" + filename, sliceSize) images.append(imgData) images = np.asarray(images) if filename.split('_')[0] == temp: if count_batch < batchsize: count_batch += 1 x_batch.append(imgData) else: x_batch = np.array(x_batch) result = model.predict(x_batch) result = np.sum(result, axis=0) result = np.reshape(result, (10)) tempresult += result count_batch = 0 x_batch = [] else: if (temp != ''): if count_batch != 0: x_batch = np.array(x_batch) result = model.predict(x_batch) result = np.sum(result, axis=0) result = np.reshape(result, (10)) tempresult += result count_batch = 0 x_batch = [] idx = np.argmax(tempresult) + 1 analyze_dict[temp] = tempresult print(temp) temp = filename.split('_')[0] tempresult = np.zeros([10]) x_batch.append(imgData) count_batch += 1 x_batch = np.array(x_batch) result = model.predict(x_batch) result = np.sum(result, axis=0) result = np.reshape(result, (10)) tempresult += result idx = np.argmax(tempresult) + 1 analyze_dict[temp] = tempresult with open(finalanalyze + '.pickle', 'wb') as handle: pickle.dump(analyze_dict, handle, protocol=pickle.HIGHEST_PROTOCOL) print(modelPath)
################## config ################ device = torch.device("cuda:7") date = time.strftime("%m-%d", time.localtime()) #date = "03-14" model_path = "/home/lxd/checkpoints/" + date model_name = sys.argv[1] if model_name == "vgg16": model = Vgg16Net() elif model_name == "mobile": model = MobileNet() elif model_name == "alexnet": model = AlexNet() elif model_name == "res50": model = ResNet50() elif model_name == "res34": model = ResNet34() elif model_name == "vgg11": model = Vgg11Net() else: print("Moddel Wrong") model.to(device) # train/test loss_name = sys.argv[2] batch = sys.argv[3] model.eval() model.load_state_dict( torch.load("/home/lxd/checkpoints/{}/{}_{}_VeRI_{}.pt".format(
def main(args, logger): train_loader, test_loader = load_data(args) if args.dataset == 'CIFAR10': num_classes = 10 elif args.dataset == 'CIFAR100': num_classes = 100 elif args.dataset == 'IMAGENET': num_classes = 1000 print('img_size: {}, num_classes: {}, stem: {}'.format( args.img_size, num_classes, args.stem)) if args.model_name == 'ResNet26': print('Model Name: {0}'.format(args.model_name)) model = ResNet26(num_classes=num_classes, stem=args.stem) elif args.model_name == 'ResNet38': print('Model Name: {0}'.format(args.model_name)) model = ResNet38(num_classes=num_classes, stem=args.stem) elif args.model_name == 'ResNet50': print('Model Name: {0}'.format(args.model_name)) model = ResNet50(num_classes=num_classes, stem=args.stem) if args.pretrained_model: filename = 'best_model_' + str(args.dataset) + '_' + str( args.model_name) + '_' + str(args.stem) + '_ckpt.tar' print('filename :: ', filename) file_path = os.path.join('./checkpoint', filename) checkpoint = torch.load(file_path) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] model_parameters = checkpoint['parameters'] print('Load model, Parameters: {0}, Start_epoch: {1}, Acc: {2}'.format( model_parameters, start_epoch, best_acc)) logger.info( 'Load model, Parameters: {0}, Start_epoch: {1}, Acc: {2}'.format( model_parameters, start_epoch, best_acc)) else: start_epoch = 1 best_acc = 0.0 if args.cuda: if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model = model.cuda() if args.debug: filename = 'debug' else: filename = 'model_' + str(args.dataset) + '_' + str( args.model_name) + '_' + str(args.stem) + '_ckpt.tar' print('filename :: ', filename) print("Number of model parameters: ", get_model_parameters(model)) logger.info("Number of model parameters: {0}".format( get_model_parameters(model))) if not os.path.exists('./logs'): os.makedirs('./logs') writer = SummaryWriter(log_dir='./logs/{}-{}'.format( filename, datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) for epoch in range(start_epoch, args.epochs + 1): train_acc = train(model, train_loader, optimizer, criterion, epoch, args, logger) writer.add_scalar('train/acc', train_acc, global_step=epoch) eval_acc = eval(model, test_loader, args) writer.add_scalar('test/acc', eval_acc, global_step=epoch) is_best = eval_acc > best_acc best_acc = max(eval_acc, best_acc) if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') parameters = get_model_parameters(model) if torch.cuda.device_count() > 1: save_checkpoint( { 'epoch': epoch, 'arch': args.model_name, 'state_dict': model.module.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'parameters': parameters, }, is_best, filename) else: save_checkpoint( { 'epoch': epoch, 'arch': args.model_name, 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'parameters': parameters, }, is_best, filename)