def main(): args = get_args() dataset = get_dataset(args.dataset) data = dataset.data tb_writer = SummaryWriter() tb_writer.iteration = 0 device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) and args.device != 'cpu' else torch.device("cpu") model = get_model(dataset.data.num_features, dataset.num_classes) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) train_loader = DataLoader( [(x_, y_) for i, (x_, y_) in enumerate(zip(data.x, data.y)) if data.train_mask[i]], batch_size=args.batch_size, shuffle=True, num_workers=0, pin_memory=True, ) if args.enable_clearml_logger: tags = [ f'Dataset: {args.dataset}', f'Pruning method: {args.pruning_method}', f'Architecture: {args.gnn}', ] pruning_param_name = 'num_minhash_funcs' if args.pruning_method == 'minhash_lsh' else 'random_pruning_prob' pruning_param = args.num_minhash_funcs if args.pruning_method == 'minhash_lsh' else args.random_pruning_prob tags.append(f'{pruning_param_name}: {pruning_param}') clearml_logger = get_clearml_logger(project_name="GNN_pruning", task_name=get_time_str(), tags=tags) for epoch in range(1, args.epochs + 1): loss, acc, f1 = train(epoch, dataset, train_loader, model, device, optimizer, tb_writer) print(f'Epoch {epoch:02d}, Loss: {loss:.4f}, Approx. Train: {f1:.4f}') train_acc, val_acc, test_acc = test(dataset, model, device) print(f'Train ACC: {train_acc:.4f}, Val ACC: {val_acc:.4f}, ' f'Test ACC: {test_acc:.4f}') tb_writer.add_scalars('Accuracy', { 'train': train_acc, 'Validation': val_acc, 'Test': test_acc }, epoch)
def register_logging_files(args): tb_writer = None best_results_file = None log_file = None if args.exps_dir is not None: exps_dir = Path( args.exps_dir ) / 'pyg_with_pruning' / args.dataset / args.pruning_method if args.pruning_method == 'random': exps_dir = exps_dir / str(args.random_pruning_prob) elif args.pruning_method == 'minhash_lsh': exps_dir = exps_dir / str(args.num_minhash_funcs) exps_dir = exps_dir / get_time_str() best_results_file = exps_dir / 'best_results.txt' log_file = exps_dir / r'log.log' tensorboard_dir = exps_dir / 'tensorboard' if not tensorboard_dir.exists(): tensorboard_dir.mkdir(parents=True, exist_ok=True) tb_writer = SummaryWriter(log_dir=tensorboard_dir) tb_writer.iteration = 0 register_logger(log_file=log_file, stdout=True) log_command() log_args_description(args) clearml_task = None if args.enable_clearml_logger: tags = [ f'Dataset: {args.dataset}', f'Pruning method: {args.pruning_method}', f'Architecture: {args.gnn}', ] pruning_param_name = 'num_minhash_funcs' if 'minhash_lsh' in args.pruning_method else 'random_pruning_prob' pruning_param = args.num_minhash_funcs if 'minhash_lsh' in args.pruning_method else args.random_pruning_prob tags.append(f'{pruning_param_name}: {pruning_param}') if pruning_param_name == 'num_minhash_funcs': tags.append(f'Sparsity: {args.sparsity}') tags.append(f'Complement: {args.complement}') clearml_task = get_clearml_logger( f"GNN_{args.dataset}_{args.target}_{args.gnn}", task_name=get_time_str(), tags=tags) return tb_writer, best_results_file, log_file, clearml_task
# Treimanento completo net = modelos.min2019() net = net.to(device) #optimizer = optim.Adam(net.parameters(), lr=0.0005) optimizer = optim.SGD(net.parameters(), lr=0.001) scheduler = MultiStepLR(optimizer, [5, 10]) # loss_fn = #loss_fn = torch.nn.BCELoss() loss_fn = torch.nn.CrossEntropyLoss() metrics = {'fps': training.BatchTimer(), 'acc': training.accuracy} # Train writer = SummaryWriter() writer.iteration, writer.interval = 0, 10 print('\n\nInitial') print('-' * 10) net.eval() training.pass_epoch(net, loss_fn, val_loader, batch_metrics=metrics, show_running=True, device=device, writer=writer) for epoch in range(epochs): print('\nEpoch {}/{}'.format(epoch + 1, epochs)) print('-' * 10)
def train(self, save_model=True): batch_size = 32 epochs = 100 workers = 0 if os.name == 'nt' else 8 optimizer = optim.Adam(self.model.parameters(), lr=0.001) scheduler = MultiStepLR(optimizer, [5, 10]) dataset = self.get_train_dataset() img_inds = np.arange(len(dataset)) np.random.shuffle(img_inds) train_inds = img_inds[:int(0.8 * len(img_inds))] val_inds = img_inds[int(0.8 * len(img_inds)):] train_loader = DataLoader(dataset, num_workers=workers, batch_size=batch_size, sampler=SubsetRandomSampler(train_inds)) val_loader = DataLoader(dataset, num_workers=workers, batch_size=batch_size, sampler=SubsetRandomSampler(val_inds)) loss_fn = torch.nn.CrossEntropyLoss() metrics = {'fps': training.BatchTimer(), 'acc': training.accuracy} writer = SummaryWriter() writer.iteration, writer.interval = 0, 10 print('\n\nInitial') print('-' * 10) self.model.eval() training.pass_epoch(self.model, loss_fn, val_loader, batch_metrics=metrics, show_running=True, writer=writer) for epoch in tqdm(range(epochs)): print('\nEpoch {}/{}'.format(epoch + 1, epochs)) print('-' * 10) self.model.train() training.pass_epoch(self.model, loss_fn, train_loader, optimizer, scheduler, batch_metrics=metrics, show_running=True, writer=writer) self.model.eval() training.pass_epoch(self.model, loss_fn, val_loader, batch_metrics=metrics, show_running=True, writer=writer) writer.close() if save_model: self.save_model()
def train_model(db_id): start_epoch = 0 batch_size = 32 epochs = 5 workers = 2 train_transform = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=0.5), np.float32, transforms.ToTensor(), fixed_image_standardization ]) images, num_classes = get_dataset(db_id) dataset = MyCustomDataset(images, train_transform) train_loader = DataLoader( dataset, num_workers=workers, batch_size=batch_size ) model = InceptionResnetV1( classify=True, num_classes=num_classes ).to(device) checkpoint_path, checkpoint_file, label_dict = get_saved_model(db_id) if checkpoint_path is not None and os.path.exists(checkpoint_path): checkpoint = torch.load(checkpoint_file) model.load_state_dict(checkpoint['net']) start_epoch = checkpoint['epoch'] else: checkpoint_path = "./checkpoint" optimizer = optim.SGD(model.parameters(), lr=0.1) scheduler = MultiStepLR(optimizer, [60, 120, 180]) loss_fn = torch.nn.CrossEntropyLoss() metrics = { 'fps': training.BatchTimer(), 'acc': training.accuracy } writer = SummaryWriter(log_dir=None, comment='', purge_step=None, max_queue=10, flush_secs=600, filename_suffix='face_rec_log_') writer.iteration, writer.interval = 1, 10 checkpoint_save_name = 'face_rec_test' ckp_dir = checkpoint_path ckp_name = '' for epoch in range(epochs): training.pass_epoch( model, loss_fn, train_loader, optimizer, scheduler, batch_metrics=metrics, show_running=False, device=device, writer=writer ) if (epoch+1) % 50 == 0: print('Saving..') state = { 'net': model.state_dict(), 'epoch': epoch, 'is_final' : 0 } ckp_name = checkpoint_save_name+'_'+str(epoch+1) #if not os.path.isdir('checkpoint'): os.makedirs(ckp_dir, exist_ok=True) torch.save(state, ckp_dir+'/'+ckp_name+'.pth') writer.close() state = { 'net': model.state_dict(), 'epoch': epochs, 'is_final' : 1 } ckp_name = checkpoint_save_name+'_final' os.makedirs(ckp_dir, exist_ok=True) save_path = ckp_dir+'/'+ckp_name+'.pth' torch.save(state, save_path) update_model(db_id, save_path)
def main(): args = get_args() dataset = get_dataset(args.dataset) data = dataset.data tb_writer = SummaryWriter() tb_writer.iteration = 0 device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available( ) and args.device != 'cpu' else torch.device("cpu") model = get_model(dataset.data.num_features, dataset.num_classes, args.gnn) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) old_edge_count = data.edge_index.shape[1] # Pass the whole graph to the pruning mechanism. Consider it as one sample pruning_params, prunning_ratio = prune_dataset( [data], args, random=np.random.RandomState(0), pruning_params=None) edge_count = data.edge_index.shape[1] print( f"Old number of edges: {old_edge_count}. New one: {edge_count}. Change: {(old_edge_count - edge_count) / old_edge_count * 100}\%" ) train_loader = NeighborSampler( data.edge_index, node_idx=data.train_mask, # sizes=[-1, -1], # citations sizes=[-1, 10], # reddit # sizes=[25, 10], # default batch_size=args.batch_size, shuffle=True, num_workers=12) subgraph_loader = NeighborSampler(data.edge_index, node_idx=None, sizes=[-1], batch_size=1024, shuffle=False, num_workers=12) clearml_task = None if args.enable_clearml_logger: tags = [ f'Dataset: {args.dataset}', f'Pruning method: {args.pruning_method}', f'Architecture: {args.gnn}', ] pruning_param_name = 'num_minhash_funcs' if 'minhash_lsh' in args.pruning_method else 'random_pruning_prob' pruning_param = args.num_minhash_funcs if 'minhash_lsh' in args.pruning_method else args.random_pruning_prob tags.append(f'{pruning_param_name}: {pruning_param}') if pruning_param_name == 'num_minhash_funcs': tags.append(f'Sparsity: {args.sparsity}') tags.append(f'Complement: {args.complement}') clearml_task = get_clearml_logger( project_name=f"GNN_{args.dataset}_{args.gnn}", task_name=get_time_str(), tags=tags) train_times = [] val_times = [] max_train_acc = 0 max_val_acc = 0 max_test_acc = 0 for epoch in range(1, args.epochs + 1): loss, acc, f1, avg_time_train = train(epoch, dataset, train_loader, model, device, optimizer, tb_writer) train_times.append(avg_time_train) print(f'Epoch {epoch:02d}, Loss: {loss:.4f}, Approx. Train: {f1:.4f}') train_acc, val_acc, test_acc, avg_time_test = test( dataset, subgraph_loader, model, device) val_times.append(avg_time_test) print(f'Train ACC: {train_acc:.4f}, Val ACC: {val_acc:.4f}, ' f'Test ACC: {test_acc:.4f}') tb_writer.add_scalars('Accuracy', { 'train': train_acc, 'Validation': val_acc, 'Test': test_acc }, epoch) max_train_acc = max(max_train_acc, train_acc) max_val_acc = max(max_val_acc, val_acc) max_test_acc = max(max_test_acc, test_acc) tb_writer.add_scalar('time/train', np.mean(train_times)) tb_writer.add_scalar('time/val', np.mean(val_times)) experiment_logs = dict() experiment_logs = clearml_task.connect(experiment_logs) experiment_logs['time/train'] = np.mean(train_times) experiment_logs['time/val'] = np.mean(val_times) experiment_logs['keep edges'] = prunning_ratio experiment_logs['max train accuracy'] = max_train_acc experiment_logs['max val accuracy'] = max_val_acc experiment_logs['max test accuracy'] = max_test_acc