def main(index, args): alphabet = alphabet_factory() train_dataset, test_dataset = split_dataset(args, alphabet) collate_fn = collate_factory(model_length_function) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, #pin_memory=True, shuffle=True, collate_fn=collate_fn, drop_last=True) test_loader = torch.utils.data.DataLoader(test_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn, drop_last=True) # Get loss function, optimizer, and model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') in_features = args.n_mfcc * (2 * args.n_context + 1) model = build_deepspeech(in_features=in_features, num_classes=len(alphabet)) model = model.to(device) logging.info("Number of parameters: %s", count_parameters(model)) optimizer = get_optimizer(args, model.parameters()) criterion = nn.CTCLoss(blank=alphabet.mapping[alphabet.char_blank]) decoder = GreedyDecoder() train_eval_fn(args.num_epochs, train_loader, test_loader, optimizer, model, criterion, device, decoder, alphabet, args.checkpoint, args.log_steps)
def run(): # Parameters n_epochs = 20 batch_size = 128 lr = 0.001 # Loading dataset train_dataset = TrainSignLanguageDataset() train_dataset, val_dataset = split_dataset(train_dataset, factor=0.9) test_dataset = TestSignLanguageDataset() # Creating model model = HandyNet(25) # model = GoogleNet(25) # Creating learning modules model_parameters = filter(lambda x: x.requires_grad, model.parameters()) optimizer = optim.Adam(model_parameters, lr=lr) criterion = nn.CrossEntropyLoss() scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=2, min_lr=0.00001, verbose=True) train(model, train_dataset, val_dataset, test_dataset, n_epochs, batch_size, optimizer, criterion, scheduler=scheduler, real_world_test=True)
def __init__(self, args): self.args = args # Study Case self.sys_name = args.sys_name # Dataset Parameters self.dataset = load_dataset(args) self.dim_z = self.dataset.dim_z self.dim_t = self.dataset.dim_t self.L = self.dataset.L self.M = self.dataset.M self.dt = self.dataset.dt self.t_vec = self.dataset.t_vec self.train_trajectories, self.test_trajectories = split_dataset( args.train_percent, self.dataset.total_trajectories) if (args.dset_norm): self.mean, self.std = self.dataset.get_statistics( self.train_trajectories) else: self.mean, self.std = 0, 1 # Training Parameters self.max_epoch = args.max_epoch self.lambda_d = args.lambda_d # Net Parameters self.dim_in = self.dim_z self.dim_out = 2 * (self.dim_z)**2 self.SPNN = StructurePreservingNet(self.dim_in, self.dim_out, args.hidden_vec, args.activation).float() if (args.train == False): # Load pretrained net load_name = 'net_' + self.sys_name + '.pt' load_path = os.path.join(args.dset_dir, load_name) self.SPNN.load_state_dict(torch.load(load_path)) else: self.SPNN.weight_init(args.net_init) self.optim = optim.Adam(self.SPNN.parameters(), lr=args.lr, weight_decay=args.lambda_r) self.scheduler = optim.lr_scheduler.MultiStepLR(self.optim, milestones=args.miles, gamma=args.gamma) # Load/Save options self.output_dir = args.output_dir if not os.path.exists(self.output_dir): os.makedirs(self.output_dir, exist_ok=True) self.save_plots = args.save_plots
def train_network(alpha=0.01, batch_size=256, epsilon=1e-08, hidden_layer_sizes=(300, ), learning_rate='adaptive', max_iter=500, test_size=0.2, random_state=9): return pipe \ | prepare_dataset() \ | split_dataset(test_size=test_size, random_state=random_state) \ | inspect_wrapper(inspect_split_dataset) \ | train(alpha=alpha, batch_size=batch_size, epsilon=epsilon, hidden_layer_sizes=hidden_layer_sizes, learning_rate=learning_rate, max_iter=max_iter)
def main(): if(len(sys.argv) < 4) : print('Usage : python siamese_nn.py graphfile no_nodes no_features') exit() graph, no_nodes, no_features = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]) dataset, graph_features, edge_index, features = generate_dataset(graph, no_nodes, no_features, siamese=1) num_features = len(graph_features[0][1]) #these parameters can be changed size_emb = 64 batch_size = 64 val_split = .2 train_dataloader, val_dataloader = split_dataset(dataset, batch_size, val_split) model = SiameseNN(num_features, size_emb).cuda() model = train_model(model, train_dataloader, val_dataloader) test_model(model, val_dataloader)
def main(): if(len(sys.argv) < 4) : print('Usage : python gcn.py graphfile no_nodes no_features') exit() graph, no_nodes, no_features = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]) dataset, graph_features, edge_index, gat_features = generate_dataset(graph, no_nodes, no_features) num_features = len(graph_features[0][1]) #these parameters can be changed size_emb = 64 batch_size = 64 val_split = .2 train_dataloader, val_dataloader = split_dataset(dataset, batch_size, val_split) data = Data(x=torch.tensor(gat_features).float().cuda() , edge_index=edge_index.cuda() , num_nodes=no_nodes) model = GAT(num_features, size_emb).cuda() model = train_model(model,train_dataloader, val_dataloader, data) test_model(model, val_dataloader, data)
def __init__(self, args, x_trunc): self.x_trunc = x_trunc.detach() # Study Case self.sys_name = args.sys_name # Dataset Parameters self.dataset = load_dataset(args) self.dt = self.dataset.dt self.dim_t = self.dataset.dim_t self.train_snaps, self.test_snaps = split_dataset(self.dim_t - 1) # Training Parameters self.max_epoch = args.max_epoch_SPNN self.lambda_d = args.lambda_d_SPNN # Net Parameters _, self.dim_in = x_trunc.shape self.dim_out = int(self.dim_in * (self.dim_in + 2)) self.SPNN = StructurePreservingNN(self.dim_in, self.dim_out, args.hidden_vec_SPNN, args.activation_SPNN).float() if (args.train_SPNN == False): # Load pretrained nets load_name = 'SPNN_' + self.sys_name + '.pt' load_path = os.path.join(args.dset_dir, load_name) self.SPNN.load_state_dict(torch.load(load_path)) else: self.SPNN.weight_init(args.init_SPNN) self.optim = optim.Adam(self.SPNN.parameters(), lr=args.lr_SPNN, weight_decay=args.lambda_r_SPNN) self.scheduler = torch.optim.lr_scheduler.MultiStepLR( self.optim, milestones=args.miles_SPNN, gamma=args.gamma_SPNN) # Load/Save options self.output_dir = args.output_dir if not os.path.exists(self.output_dir): os.makedirs(self.output_dir, exist_ok=True) self.save_plots = args.save_plots
def __init__(self, args): # Study Case self.sys_name = args.sys_name # Dataset Parameters self.dset_dir = args.dset_dir self.dataset = load_dataset(args) self.dt = self.dataset.dt self.dim_t = self.dataset.dim_t self.train_snaps, self.test_snaps = split_dataset(self.dim_t) # Training Parameters self.max_epoch = args.max_epoch_SAE self.lambda_r = args.lambda_r_SAE # Net Parameters if self.sys_name == 'viscoelastic': self.SAE = SparseAutoEncoder(args.layer_vec_SAE, args.activation_SAE).float() elif self.sys_name == 'rolling_tire': self.SAE = StackedSparseAutoEncoder(args.layer_vec_SAE_q, args.layer_vec_SAE_v, args.layer_vec_SAE_sigma, args.activation_SAE).float() self.optim = optim.Adam(self.SAE.parameters(), lr=args.lr_SAE, weight_decay=1e-4) self.scheduler = torch.optim.lr_scheduler.MultiStepLR( self.optim, milestones=args.miles_SAE, gamma=args.gamma_SAE) # Load/Save options if (args.train_SAE == False): # Load pretrained nets load_name = 'SAE_' + self.sys_name + '.pt' load_path = os.path.join(self.dset_dir, load_name) self.SAE.load_state_dict(torch.load(load_path)) self.output_dir = args.output_dir if not os.path.exists(self.output_dir): os.makedirs(self.output_dir, exist_ok=True) self.save_plots = args.save_plots
def train(data_dir, csv_path, splits_path, output_dir, target='pa', nb_epoch=100, lr=(1e-4, ), batch_size=1, optim='adam', dropout=None, min_patients_per_label=50, seed=666, data_augmentation=True, model_type='hemis', architecture='densenet121', misc=None): assert target in ['pa', 'l', 'joint'] torch.manual_seed(seed) np.random.seed(seed) output_dir = output_dir.format(seed) splits_path = splits_path.format(seed) logger.info(f"Training mode: {target}") if not exists(output_dir): os.makedirs(output_dir) if not exists(splits_path): split_dataset(csv_path, splits_path, seed=seed) # Find device logger.info(f'Device that will be used is: {DEVICE}') # Load data val_transfo = [Normalize(), ToTensor()] if data_augmentation: train_transfo = [Normalize(), ToPILImage()] if 'rotation' in misc.transforms: train_transfo.append(RandomRotation(degrees=misc.rotation_degrees)) if 'translation' in misc.transforms: train_transfo.append(RandomTranslate(translate=misc.translate)) train_transfo.append(ToTensor()) if 'noise' in misc.transforms: train_transfo.append(GaussianNoise()) else: train_transfo = val_transfo dset_args = { 'datadir': data_dir, 'csvpath': csv_path, 'splitpath': splits_path, 'max_label_weight': misc.max_label_weight, 'min_patients_per_label': min_patients_per_label, 'flat_dir': misc.flatdir } loader_args = { 'batch_size': batch_size, 'shuffle': True, 'num_workers': misc.threads, 'pin_memory': True } trainset = PCXRayDataset(transform=Compose(train_transfo), **dset_args) valset = PCXRayDataset(transform=Compose(val_transfo), dataset='val', **dset_args) trainloader = DataLoader(trainset, **loader_args) valloader = DataLoader(valset, **loader_args) n_pts = f"{len(trainset)} train," if misc.use_extended: ext_args = dset_args.copy() ext_args['splitpath'] = None ext_args['csvpath'] = misc.csv_path_ext extset = PCXRayDataset(transform=Compose(train_transfo), mode='pa_only', use_labels=trainset.labels, **ext_args) extset.labels_count = trainset.labels_count extset.labels_weights = trainset.labels_weights extloader = DataLoader(extset, **loader_args) n_pts += f" {len(extset)} ext_train," logger.info(f"Number of patients: {n_pts} {len(valset)} valid.") logger.info( f"Predicting {len(trainset.labels)} labels: \n{trainset.labels}") logger.info(trainset.labels_weights) # Load model model = create_model(model_type, num_classes=trainset.nb_labels, target=target, architecture=architecture, dropout=dropout, otherargs=misc) model.to(DEVICE) logger.info(f'Created {model_type} model') evaluator = ModelEvaluator(output_dir=output_dir, target=target, logger=logger) criterion = nn.BCEWithLogitsLoss( pos_weight=trainset.labels_weights.to(DEVICE)) loss_weights = [1.0] + list(misc.loss_weights) if len(misc.mt_task_prob) == 1: _mt_task_prob = misc.mt_task_prob[0] task_prob = [1 - _mt_task_prob, _mt_task_prob / 2., _mt_task_prob / 2.] else: _pa_prob, _l_prob = misc.mt_task_prob _jt_prob = 1 - (_pa_prob + _l_prob) task_prob = [_jt_prob, _pa_prob, _l_prob] if model_type in ['singletask', 'multitask', 'dualnet'] and len(lr) > 1: # each branch has custom learning rate optim_params = [{ 'params': model.frontal_model.parameters(), 'lr': lr[0] }, { 'params': model.lateral_model.parameters(), 'lr': lr[1] }, { 'params': model.classifier.parameters(), 'lr': lr[2] }] else: # one lr for all optim_params = [{'params': model.parameters(), 'lr': lr[0]}] if misc.learn_loss_coeffs: temperature = torch.ones(size=(3, ), requires_grad=True, device=DEVICE).float() temperature_lr = lr[-1] if len(lr) > 3 else lr[0] optim_params.append({'params': temperature, 'lr': temperature_lr}) # Optimizer optimizer, scheduler = create_opt_and_sched(optim=optim, params=optim_params, lr=lr[0], other_args=misc) start_epoch = 1 # Resume training if possible latest_ckpt_file = join(output_dir, f'{target}-latest.tar') if isfile(latest_ckpt_file): checkpoint = torch.load(latest_ckpt_file) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) del checkpoint evaluator.load_saved() start_epoch = int(evaluator.eval_df.epoch.iloc[-1]) logger.info(f"Resumed at epoch {start_epoch}") # Training loop for epoch in range(start_epoch, nb_epoch + 1): # loop over the dataset multiple times model.train() running_loss = torch.zeros(1, requires_grad=False, dtype=torch.float).to(DEVICE) train_preds, train_true = [], [] for i, data in enumerate(trainloader, 0): if target == 'joint': *images, label = data['PA'].to(DEVICE), data['L'].to( DEVICE), data['encoded_labels'].to(DEVICE) if model_type == 'stacked': images = torch.cat(images, dim=1) else: images, label = data[target.upper()].to( DEVICE), data['encoded_labels'].to(DEVICE) # Forward output = model(images) optimizer.zero_grad() if model_type == 'multitask': # order of returned logits is joint, frontal, lateral if misc.learn_loss_coeffs: loss_weights = temperature.pow(-2) all_task_losses, weighted_task_losses = [], [] for idx, _logit in enumerate(output): task_loss = criterion(_logit, label) all_task_losses.append(task_loss) weighted_task_losses.append(task_loss * loss_weights[idx]) losses_dict = { 0: sum(weighted_task_losses), 1: all_task_losses[1], 2: all_task_losses[2] } select = np.random.choice([0, 1, 2], p=task_prob) loss = losses_dict[select] # mixing this temp seems bad if misc.learn_loss_coeffs: loss += temperature.log().sum() output = output[0] else: loss = criterion(output, label) # Backward loss.backward() optimizer.step() # Save predictions train_preds.append(torch.sigmoid(output).detach().cpu().numpy()) train_true.append(label.detach().cpu().numpy()) # print statistics running_loss += loss.detach() print_every = max(1, len(trainset) // (20 * batch_size)) if (i + 1) % print_every == 0: running_loss = running_loss.cpu().detach().numpy().squeeze( ) / print_every logger.info('[{0}, {1:5}] loss: {2:.5f}'.format( epoch, i + 1, running_loss)) evaluator.store_dict['train_loss'].append(running_loss) running_loss = torch.zeros(1, requires_grad=False).to(DEVICE) del output, images, data if misc.use_extended: # Train with only PA images from extended dataset for i, data in enumerate(extloader, 0): if target == 'joint': *images, label = data['PA'].to(DEVICE), data['L'].to( DEVICE), data['encoded_labels'].to(DEVICE) else: images, label = data[target.upper()].to( DEVICE), data['encoded_labels'].to(DEVICE) # Forward output = model(images) optimizer.zero_grad() if model_type == 'multitask': # only use PA loss output = output[1] loss = criterion(output, label) # Backward loss.backward() optimizer.step() # Save predictions train_preds.append( torch.sigmoid(output).detach().cpu().numpy()) train_true.append(label.detach().cpu().numpy()) # print statistics running_loss += loss.detach() print_every = max(1, len(trainset) // (20 * batch_size)) if (i + 1) % print_every == 0: running_loss = running_loss.cpu().detach().numpy().squeeze( ) / print_every logger.info( '[{0}, {1:5}] Extended dataset loss: {2:.5f}'.format( epoch, i + 1, running_loss)) evaluator.store_dict['train_loss'].append(running_loss) running_loss = torch.zeros(1, requires_grad=False).to(DEVICE) del output, images, data train_preds = np.vstack(train_preds) train_true = np.vstack(train_true) model.eval() val_true, val_preds, val_runloss = get_model_preds( model, dataloader=valloader, loss_fn=criterion, target=target, model_type=model_type, vote_at_test=misc.vote_at_test) val_runloss /= (len(valset) / batch_size) logger.info(f'Epoch {epoch} - Val loss = {val_runloss:.5f}') val_auc, _ = evaluator.evaluate_and_save(val_true, val_preds, epoch=epoch, train_true=train_true, train_preds=train_preds, runloss=val_runloss) if 'reduce' in misc.sched: scheduler.step(metrics=val_auc, epoch=epoch) else: scheduler.step(epoch=epoch) _states = { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict() } torch.save(_states, latest_ckpt_file) torch.save(model.state_dict(), join(output_dir, '{}-e{}.pt'.format(target, epoch))) # Remove all batches weights weights_files = glob( join(output_dir, '{}-e{}-i*.pt'.format(target, epoch))) for file in weights_files: os.remove(file)
def test(data_dir, csv_path, splits_path, output_dir, target='pa', batch_size=1, pretrained=False, min_patients_per_label=100, seed=666, model_type='hemis', architecture='densenet121', misc=None): assert target in ['pa', 'l', 'joint'] print(f"\n\nTesting seed {seed}") torch.manual_seed(seed) np.random.seed(seed) extra = misc.extra name = output_dir.split("/")[-1].format('') + extra output_dir = output_dir.format(seed) splits_path = splits_path.format(seed) if not exists(splits_path): split_dataset(csv_path, splits_path) resultsfile = join(output_dir, '..', 'auc-test.csv') if not isfile(resultsfile): columns = ['expt', 'seed', 'accuracy', 'auc', 'auc_weighted', 'prc', 'prc_weighted'] test_metrics_df = pd.DataFrame(columns=columns) else: test_metrics_df = pd.read_csv(resultsfile) # Save predictions savepreds = {} saveauc = {} predsdir = join(output_dir, '..', 'test_outs') predsfile = join(predsdir, f'preds-{name}{extra}_{seed}-{target}.npz') aucfile = join(predsdir, f'auc-{name}{extra}_{seed}-{target}.npz') if isfile(predsfile): print(f'Loading {predsfile}') _arr = np.load(predsfile, allow_pickle=True) savepreds = {k: _arr[k] for k in _arr.keys()} if isfile(aucfile): print(f'Loading {aucfile}') _arr = np.load(aucfile, allow_pickle=True) saveauc = {k: _arr[k] for k in _arr.keys()} print("Test mode: {}".format(target)) print('Device that will be used is: {0}'.format(DEVICE)) # Load data preprocessing = Compose([Normalize(), ToTensor()]) testset = PCXRayDataset(data_dir, csv_path, splits_path, transform=preprocessing, dataset='test', pretrained=pretrained, min_patients_per_label=min_patients_per_label) testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) print("{0} patients in test set.".format(len(testset))) # Find best weights metricsdf = pd.read_csv(join(output_dir, f'{target}-metrics.csv')) best_epoch = int(metricsdf.idxmax()['auc']) weights_file = join(output_dir, '{}-e{}.pt'.format(target, best_epoch)) # Create model and load best weights model = create_model(model_type, num_classes=testset.nb_labels, target=target, architecture=architecture, dropout=0.0, otherargs=misc) try: model.load_state_dict(torch.load(weights_file)) except: # Issue in loading weights if trained on multiple GPUs state_dict = torch.load(weights_file, map_location='cpu') for key in list(state_dict.keys()): if 'conv' in key or 'classifier' in key: if '.0.' in key: new_key = key.replace(".0.", '.') state_dict[new_key] = state_dict[key] del state_dict[key] model.load_state_dict(state_dict) model.to(DEVICE) model.eval() # if misc.test_multi: # y_true, y_preds, _ = get_model_preds(model, dataloader=testloader, target=target, model_type=model_type, # vote_at_test=misc.vote_at_test, progress_bar=True) # metrics, per_label_auc, per_label_prc = get_metrics(y_true, y_preds) # row = {'expt': name, 'seed': seed, **metrics} # print(row) # # test_metrics_df = test_metrics_df.append(row, ignore_index=True) # test_metrics_df.to_csv(resultsfile, index=False) # # savepreds = {'y_true': y_true, 'y_preds': y_preds, 'meta': row} # saveauc = {'auc': per_label_auc, 'prc': per_label_prc, 'meta': row} for view in misc.test_on: print(f"Testing on only {view}") if view == 'pa': model.test_only_one = 0 elif view == 'l': model.test_only_one = 1 else: model.test_only_one = None y_true, y_preds, _ = get_model_preds(model, dataloader=testloader, target=target, test_on=view, model_type=model_type, vote_at_test=misc.vote_at_test, progress_bar=True) metrics, per_label_auc, per_label_prc = get_metrics(y_true, y_preds) row = {'expt': name + f'{view}_view', 'seed': seed, **metrics} print(row) test_metrics_df = test_metrics_df.append(row, ignore_index=True) savepreds[f'y_true_{view}_view'] = y_true savepreds[f'y_preds_{view}_view'] = y_preds savepreds[f'meta_{view}_view'] = row saveauc[f'auc_{view}_view'] = per_label_auc saveauc[f'prc_{view}_view'] = per_label_prc saveauc[f'meta_{view}_view'] = row test_metrics_df.to_csv(resultsfile, index=False) np.savez(join(predsdir, f'preds-{name}{extra}_{seed}-{target}'), **savepreds) np.savez(join(predsdir, f'auc-{name}{extra}_{seed}-{target}'), **saveauc)
def _main_xla(index, args): import torch_xla import torch_xla.core.xla_model as xm import torch_xla.debug.metrics as met import torch_xla.distributed.parallel_loader as pl alphabet = alphabet_factory() train_dataset, test_dataset = split_dataset(args, alphabet) collate_fn = collate_factory(model_length_function) if xm.xrt_world_size() > 1: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True) else: train_sampler = torch.utils.data.RandomSampler(train_dataset) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.num_workers, collate_fn=collate_fn, drop_last=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn, drop_last=True) # Scale learning rate to world size lr = args.learning_rate * xm.xrt_world_size() # Get loss function, optimizer, and model device = xm.xla_device() model = build_deepspeech(in_features=in_features, num_classes=len(alphabet)) model = model.to(device) optimizer = get_optimizer(args, model.parameters()) criterion = nn.CTCLoss(blank=alphabet.mapping[alphabet.char_blank]) decoder = GreedyDecoder() train_device_loader = pl.MpDeviceLoader(train_loader, device) test_device_loader = pl.MpDeviceLoader(test_loader, device) class XLAProxyOptimizer: """ XLA Proxy optimizer for compatibility with torch.Optimizer """ def __init__(self, optimizer): self.optimizer = optimizer def zero_grad(self): self.optimizer.zero_grad() def step(self): xm.optimizer_step(self.optimizer) optimizer = XLAProxyOptimizer(optimizer) train_eval_fn(args.num_epochs, train_device_loader, test_device_loader, optimizer, model, criterion, device, decoder, alphabet, args.checkpoint)
opt_list = ['FOFS','AROW','SGD'] ds_list = ['relathe','pcmac','basehock','ccat','aut','real-sim'] ds_list = ['news','rcv1','url'] ds_list = ['synthetic_100_10K_100K','synthetic_200_20K_100K'] fold_num =5 const_eta_search = '0.03125:2.0:32' eta_search = '0.25:2.0:256' delta_search = '0.03125:2.0:32' r_search = '0.25:2.0:256' delta_ofs_search = '0.0003125:2:0.32' for dt in ds_list: split_list = dataset.split_dataset(dt,fold_num) #train model for opt in opt_list: print '----------------------------------------------' print 'Cross validation on %s' %dt + ' with %s' %opt print '----------------------------------------------' cmd = exe_path.cv_script + ' {0} {1} {2} '.format(dt,opt,fold_num) if 'AROW' in opt: cmd += ' {0} {1}'.format('-r',r_search) elif 'Ada' in opt: cmd += ' {0} {1}'.format('-delta',delta_search) cmd += ' {0} {1}'.format('-eta',const_eta_search) elif opt == 'OFSGD': cmd += ' {0} {1}'.format('-delta',delta_ofs_search)
def learn(exp_name, dataset, model=None, optimizer=None, loss=None, rng_seed=1234, num_epochs=10, split=(0.7, 0.2, 0.1), bsz=64): if model is None: in_size = dataset[0][0].numel() if isinstance(dataset[0][1], (int, long, float, complex)): out_size = 1 else: out_size = dataset[0][1].numel() model = get_model(in_size, out_size) model = Network(model) if loss is None: if isinstance(dataset[0][1], (int, long, float, complex)): reg = True else: reg = False loss = get_loss(regression=reg) if optimizer is None: optimizer = get_optimizer(model) opt_hyperparams = optimizer.param_groups[0] opt_hyperparams = {k: opt_hyperparams[k] for k in opt_hyperparams if not k == 'params'} exp = ro.Experiment(exp_name, { 'model': str(model), 'optimizer': str(optimizer), 'opt_hyperparams': opt_hyperparams, 'loss': str(loss), 'rng_seed': rng_seed, 'num_epochs': num_epochs, 'bsz': bsz, 'split': split, }) th.manual_seed(rng_seed) np.random.seed(rng_seed) if args.cuda: th.cuda.manual_seed(rng_seed) model.cuda() print('Splitting dataset in ' + str(split[0]) + ' train, ' + str(split[1]) + ' Validation, ' + str(split[2]) + ' Test') dataset = split_dataset(dataset, split[0], split[1], split[2]) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = th.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True, **kwargs) dataset.use_valid() valid_loader = th.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True, **kwargs) dataset.use_test() test_loader = th.utils.data.DataLoader(dataset, batch_size=bsz, shuffle=True, **kwargs) train_errors = [] valid_errors = [] # Start training for epoch in range(num_epochs): print('\n\n', '-' * 20, ' Epoch ', epoch, ' ', '_' * 20) dataset.use_train() train_errors.append(train(train_loader, model, loss, optimizer)) print('Training error: ', train_errors[-1]) dataset.use_valid() valid_errors.append(test(valid_loader, model, loss)) print('Validation error: ', valid_errors[-1]) # Benchmark on Test dataset.use_test() test_error = test(test_loader, model, loss) print('Final Test Error: ', test_error) # Save experiment result exp.add_result(test_error, { 'train_errors': train_errors, 'valid_errors': valid_errors, }) # Plot Results if not os.path.exists('./results'): os.mkdir('./results') p = Plot('Convergence') x = np.arange(0, len(train_errors), 1) p.plot(x, np.array(train_errors), label='Train') p.plot(x, np.array(valid_errors), label='Validation') p.set_axis('Epoch', 'Loss') b = Plot('Final Error') b.bar(x=[train_errors[-1], valid_errors[-1], test_error], labels=['Train', 'Validation', 'Test']) cont = Container(1, 2, title=exp_name) cont.set_plot(0, 0, p) cont.set_plot(0, 1, b) cont.save('./results/' + exp_name + '.pdf')
import numpy as np import pandas as pd import matplotlib.pyplot as plt # Check device use_cuda = False if use_cuda and not torch.cuda.is_available(): raise Exception("No GPU found, please run without --cuda") device = torch.device("cuda" if use_cuda else "cpu") # Split the test/training dataset dataset = split_dataset( path="/neurospin/radiomics_pub/workspace/metastasis_dl/data/dataset.tsv", dataloader=LoadDataset, batch_size=10, inputs=["t1"], #outputs=["mask"], label="label", number_of_folds=1, verbose=0) #print(dataset["test"].dataset.shape) #for batch_data in dataset["test"]: # print(batch_data["inputs"].shape, batch_data["labels"].shape) # if batch_data["outputs"] is not None: # print(batch_data["outputs"].shape) # Create network model = Net(nb_voxels_at_layer2=44268).to(device) # Objective function is cross-entropy criterion = nn.CrossEntropyLoss()
def train_model(model, X, Y, alphabet, multi_task, args,val_x=None,val_y=None): startDate = datetime.now() if args.track_name is not None: out = os.path.join(get_config().get("folder", "results_prefix"), startDate.strftime('%Y-%m-%d'), args.track_name) else: out = os.path.join(get_config().get("folder", "results_prefix"), startDate.strftime('%Y-%m-%d'), startDate.strftime("%H-%M-%S")) a_size = len(alphabet) - 1 #alphabet contains 0 character for '', however we ignore that one dataset = make_dataset(X,Y,a_size,args.embed) # Make train, test and val set train, test, val = split_dataset(dataset) print len(train), len(test), len(val) logs = {'train':len(train), 'test':len(test), 'val':len(val)} # We make use of the multi process iteration such that the one-hot-encoding is done on a separate thread from the # GPU controller train_iter = I.MultiprocessIterator(train, args.batch_size) test_iter = I.MultiprocessIterator(test, args.batch_size, shuffle=False) val_iter = I.MultiprocessIterator(val, args.batch_size, shuffle=False, repeat=False) date = time.strftime("%Y-%m-%d_%H-%M-%S") fn_a = 'loss_%s.png' % date fn_a_veg = 'loss_%s_veg.png' % date fn_a_cus = 'loss_%s_cuisine.png' % date fn_b = 'lr_%s.png' % date fn_c = 'acc_%s.png' % date fn_c_veg = 'acc_%s_veg.png' % date fn_c_cus = 'acc_%s_cus.png' % date loss_r = E.PlotReport(['validation/main/loss', 'main/loss'], 'epoch', file_name=fn_a) loss_r_veg = E.PlotReport(['validation/main/loss/veg', 'main/loss/veg'], 'epoch', file_name=fn_a_veg) loss_r_cus = E.PlotReport(['validation/main/loss/cuisine', 'main/loss/cuisine'], 'epoch', file_name=fn_a_cus) lr_r = E.PlotReport(['lr'], 'epoch', file_name=fn_b) acc_r = E.PlotReport(['validation/main/accuracy','main/accuracy'], 'epoch', file_name=fn_c) acc_r_cus = E.PlotReport(['validation/main/accuracy/cuisine', 'main/accuracy/cuisine'], 'epoch', file_name=fn_c_cus) acc_r_veg = E.PlotReport(['validation/main/accuracy/veg', 'main/accuracy/veg'], 'epoch', file_name=fn_c_veg) if not multi_task: classifier = L.Classifier(model) else: tasks = [ {'name': 'cuisine', 'factor': args.cus_factor, 'loss_fun': F.loss.softmax_cross_entropy.softmax_cross_entropy, 'acc_fun': F.evaluation.accuracy.Accuracy(ignore_label=-1)}, {'name': 'veg', 'factor': 1. - args.veg_factor, 'loss_fun': F.loss.softmax_cross_entropy.softmax_cross_entropy, 'acc_fun': F.evaluation.accuracy.Accuracy(ignore_label=-1)} ] classifier = MultiTaskClassifier(model, tasks) classifier.compute_accuracy = args.acc_train # only probabilities for training if args.gpu >= 0: classifier.to_gpu(args.gpu) print "Model intialized, epochs: %d" % args.epochs optimizer = O.MomentumSGD(lr=args.lr, momentum=args.momentum) optimizer.setup(classifier) # prepare model for evaluation eval_model = classifier.copy() eval_model.compute_accuracy = args.acc_val eval_model.predictor.train = False updater = T.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = T.Trainer(updater, (args.epochs, 'epoch'), out) val_interval = (10 if args.test else 1000), 'iteration' log_interval = (10 if args.test else 1000), 'iteration' if args.no_validation: trainer.extend(E.Evaluator( val_iter, eval_model, device=args.gpu)) if args.val_ingredients is not None and args.val_chars is not None: X, Y, alphabet = get_dataset(args.val_chars, args.case_sensitive, args.ds_parts, max_recipe_size=args.val_ingredients, test_mode=args.test) dataset = make_dataset(X,Y,a_size,args.embed) # Make train, test and val set train_2, tes_2, val_2 = split_dataset(dataset) val2_iter = I.MultiprocessIterator(val_2, args.batch_size, shuffle=False, repeat=False) eval2_model = classifier.copy() eval2_model.compute_accuracy = args.acc_val eval2_model.predictor.train = False eval2_model.predictor.recipe_size = args.val_ingredients eval2_model.predictor.ingredient.width = args.val_chars ev2 = E.Evaluator(val2_iter, eval2_model, device=args.gpu) ev2.default_name = 'validation_2nd' trainer.extend(ev2) fn_d = 'acc_2nd_%s.png' % date acc_2nd = E.PlotReport(['validation_2nd/main/accuracy'], 'epoch', file_name=fn_d) trainer.extend(acc_2nd) trainer.extend(E.dump_graph('main/loss')) trainer.extend(E.ExponentialShift('lr', 0.5), trigger=(7, 'epoch')) #trainer.extend(E.snapshot(), trigger=val_interval) if args.take_model_snapshot: trainer.extend(E.snapshot_object( model, 'model'), trigger=(args.epochs,'epoch')) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(E.LogReport(trigger=log_interval)) trainer.extend(E.observe_lr(), trigger=log_interval) trainer.extend(E.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss','validation_2nd/main/loss' 'validation/main/accuracy/veg','validation/main/accuracy/cuisine', 'lr', 'main/loss/cuisine','main/loss/veg' ]), trigger=log_interval) trainer.extend(loss_r) trainer.extend(lr_r) trainer.extend(acc_r) trainer.extend(loss_r_veg) trainer.extend(loss_r_cus) trainer.extend(acc_r_veg) trainer.extend(acc_r_cus) trainer.extend(T.extensions.ProgressBar(update_interval=10)) start = time.time() trainer.run() duration = time.time() - start re_s = len(train)*args.epochs/duration logs.update({'duration': duration, 'epochs': args.epochs, 'recipes': len(train), 'recipes/s': re_s, 'recipes/m': re_s*60}) with open(os.path.join(out, 'log.json'), 'w') as io: json.dump(logs, io)
import IOHelper from classifierQualityReport import make_quality_report from dataset import split_dataset from logisticRegressionClassifier import LogisticRegressionClassifier dataset = IOHelper.pickle_deserialize('dataset') train, test = split_dataset(dataset, 0.8) cls = LogisticRegressionClassifier(train) # сохранять в файлы с расширением cls IOHelper.pickle_serialize(cls, 'test.cls') load_cls = IOHelper.pickle_deserialize('test.cls') report = make_quality_report(load_cls, test) print(report)
callbacks=[checkpoint_callback, early_stopping] ) else: train_loader = DataLoader(train_dataset, batch_size=16, num_workers=10, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=8, num_workers=10, shuffle=False) trainer = Trainer( gpus=1, num_sanity_val_steps=-1, deterministic=True, max_epochs=config.epochs, callbacks=[checkpoint_callback, early_stopping] ) trainer.fit(model, train_loader, val_loader) if __name__ == '__main__': class config: seed = 42 epochs = 15 data_dir = 'data/' save_dir = 'save/dk' device = 'gpu' seed_everything() split_dataset('data/dirty_mnist_2nd_answer.csv') model_train(0) model_train(1) model_train(2) model_train(3) model_train(4)
ds_list = ['pcmac','a9a','MNIST','aut']#,]#,'rcv1','url'] ds_list = ['a1a','a3a','a5a'] ds_list = ['news'] fold_num =5 const_eta_search = '0.03125:2.0:32' eta_search = '0.25:2.0:256' delta_search = '0.03125:2.0:32' #delta_search = '0.03125:2.0:32' r_search = '0.25:2.0:256' #r_search = delta_search delta_ofs_search = '0.0003125:2:0.32' for dt in ds_list: split_list = dataset.split_dataset(dt,fold_num) #train model for opt in opt_list: print '----------------------------------------------' print 'Cross validation on %s' %dt + ' with %s' %opt print '----------------------------------------------' cmd = exe_path.cv_script + ' {0} {1} {2} '.format(dt,opt,fold_num) if 'AROW' in opt: cmd += ' {0} {1}'.format('-r',r_search) elif 'Ada' in opt: cmd += ' {0} {1}'.format('-delta',delta_search) cmd += ' {0} {1}'.format('-eta',const_eta_search) elif opt == 'OFSGD': cmd += ' {0} {1}'.format('-delta',delta_ofs_search)
def main(args): # set device used ----------------------------------------------- args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # print dataset information ------------------------------------ # record_dataset(args) # count_dataset(args) # prepare dataset -------------------------------------------- unlabeled_dir, labeled_dir, val_dir = split_dataset(args) unlabeled_set = Probe_Dataset(unlabeled_dir, args) labeled_set = Probe_Dataset(labeled_dir, args) val_set = Probe_Dataset(val_dir, args) args.n_weights = torch.tensor(labeled_set.labelweights).float().to( args.device) args.log_string("Weights for classes:{}".format(args.n_weights)) if args.over_sample: unlabeled_set = ConcatDataset( [AugmentDataset(args, 'unlabel'), unlabeled_set]) labeled_set = ConcatDataset( [AugmentDataset(args, 'label'), labeled_set]) # labeled_set = AugmentDataset(args, 'label') try: labeled_loader = DataLoader(labeled_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) unlabeled_loader = DataLoader(unlabeled_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) except: print("Empty unlabel_set") args.log_string("The number of unlabeled data is %d" % len(unlabeled_set)) args.log_string("The number of labeled data is %d" % len(labeled_set)) args.log_string("The number of validation data is %d" % len(val_set)) # initialization ----------------------------------------------------- model, ema_model, optimizer, criterion, start_epoch, writer = initialization( args) global_epoch = 0 best_epoch = 0 best_dice = 0 for epoch in range(start_epoch, args.epoch): args.log_string('**** Epoch %d (%d/%s) ****' % (global_epoch + 1, epoch + 1, args.epoch)) # adjust hyper parameters --------------------------------------------------------- lr = max( args.learning_rate * (args.lr_decay**(epoch // args.step_size)), args.lr_clip) writer.add_scalar('misc/lr', lr, epoch) args.log_string('Learning rate:%f' % lr) for param_group in optimizer.param_groups: param_group['lr'] = lr # train -------------------------------------------------------------- if args.all_label: train_mean_teacher(args, global_epoch, labeled_loader, labeled_loader, model, ema_model, optimizer, criterion, writer) else: train_mean_teacher(args, global_epoch, labeled_loader, unlabeled_loader, model, ema_model, optimizer, criterion, writer) if epoch % 5 == 0: savepath = str(args.log_dir) + '/model.pth' args.log_string('Saving at %s' % savepath) state = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), } if not args.baseline: state['ema_model_state_dict'] = ema_model.state_dict() torch.save(state, savepath) # validate student model ------------------------------------------------------------ val_result = validate(args, global_epoch, val_loader, model, optimizer, criterion, writer, is_ema=False) args.log_string( 'Student model result -----------------------------------------------' ) args.log_string('Val mean loss %s:' % (val_result[2])) args.log_string('Val class dice %s:' % (val_result[1])) args.log_string('Val mean dice %s:' % (val_result[0])) # validate teacher model ------------------------------------------------------------ if not args.baseline: ema_val_result = validate(args, global_epoch, val_loader, ema_model, optimizer, criterion, writer, is_ema=True) args.log_string( 'Teacher model result -----------------------------------------------' ) args.log_string('Ema val mean loss %s:' % (ema_val_result[2])) args.log_string('Ema val class dice %s:' % (ema_val_result[1])) args.log_string('Ema val mean dice %s:' % (ema_val_result[0])) if ema_val_result[0] > val_result[0]: val_result = ema_val_result args.log_string( 'Epoch result -----------------------------------------------') args.log_string('Epoch class dice %s:' % (val_result[1])) args.log_string('Epoch mean dice %s:' % (val_result[0])) if val_result[0] > best_dice: best_dice = val_result[0] best_metric = val_result[1] best_epoch = epoch savepath = str(args.log_dir) + '/best_model.pth' args.log_string('Saving at %s' % savepath) state = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), } if not args.baseline: state['ema_model_state_dict'] = ema_model.state_dict() torch.save(state, savepath) args.log_string( 'Current best result -----------------------------------------------' ) args.log_string('Best Epoch, Dice and Result: %d, %f, %s' % (best_epoch, best_dice, best_metric)) global_epoch += 1 return best_dice, best_metric