def main(num_epochs, buffer_size, batch_size, datasets_path=None, output_resolution=512, max_load_resolution=512, num_classes=2, num_gpu=None, use_tpu=False): physical_gpus = tf.config.experimental.list_physical_devices('GPU') if num_gpu is None: num_gpu = len(physical_gpus) for gpu in physical_gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.experimental.list_logical_devices("GPU") try: # TPU detection tpu = tf.distribute.cluster_resolver.TPUClusterResolver() if use_tpu else None except ValueError: tpu = None # Select appropriate distribution strategy if use_tpu and tpu: tf.tpu.experimental.initialize_tpu_system(tpu) strategy = tf.distribute.experimental.TPUStrategy(tpu) tf.get_logger().info('Running on TPU ', tpu.cluster_spec().as_dict()['worker']) elif len(logical_gpus) > 1: strategy = tf.distribute.MirroredStrategy( devices=['/gpu:{}'.format(i) for i in range(num_gpu)] ) tf.get_logger().info('Running on multiple GPUs.') elif len(logical_gpus) == 1: strategy = tf.distribute.get_strategy() tf.get_logger().info('Running on single GPU.') else: strategy = tf.distribute.get_strategy() tf.get_logger().info('Running on single CPU.') tf.get_logger().info('Number of devices: {}'.format(strategy.num_replicas_in_sync)) tf.get_logger().info('num_classes: {}'.format(num_classes)) tf.get_logger().info('batch_size: {}'.format(batch_size)) tf.get_logger().info('output_resolution: {}'.format(output_resolution)) checkpoint_path = "training/cp-{epoch:04d}-{step:04d}.ckpt" checkpoint_dir = os.path.dirname(checkpoint_path) dataset_loader = DatasetLoader(buffer_size=buffer_size, batch_size=batch_size * strategy.num_replicas_in_sync, output_resolution=output_resolution, max_load_resolution=max_load_resolution) train_dataset, test_dataset, train_num_datasets, test_num_datasets = dataset_loader.load( datasets_path=datasets_path, train_dir_name="train", test_dir_name="test") tf.get_logger().info("train_num_datasets:{}".format(train_num_datasets)) tf.get_logger().info("test_num_datasets:{}".format(test_num_datasets)) with strategy.scope(): model = Model(output_resolution=output_resolution, num_classes=num_classes) train_len = tf.data.experimental.cardinality(train_dataset) train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset) test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset) trainer = Train(batch_size=batch_size, strategy=strategy, num_epochs=num_epochs, model=model, train_num_datasets=train_num_datasets, test_num_datasets=test_num_datasets, checkpoint_path=checkpoint_path, train_len=train_len, num_classes=num_classes, num_gpu=num_gpu, checkpoint_dir=checkpoint_dir) trainer.custom_loop(train_dist_dataset, test_dist_dataset, strategy)
def preprocess_data(): save_path = './new_features.csv' survival_data_save_path = './new_survival_data.csv' data_loader = DatasetLoader() features = data_loader.load_cell_features(DATASET_ROOT_DIR, save_path=save_path) print(features.shape) print(data_loader.patient_id_list) survival_data = data_loader.load_survial_data(DATASET_ROOT_DIR, save_path=survival_data_save_path) print(survival_data.shape)
def test_disjoint_samples_train_5_tasks(task, dataset, ind_task): path = "./Archives/Data/Tasks/{}/{}_5_train.pt".format(dataset, task) data = torch.load(path) data_set = DatasetLoader(data, current_task=0, transform=None, load_images=False, path=None) data_set.set_task(ind_task) folder = "./Samples/5_tasks/" if not os.path.exists(folder): os.makedirs(folder) path_out = os.path.join( folder, "{}_{}_task_{}.png".format(dataset, task, ind_task)) if task == "permutations": permutations = torch.load( "../Archives/Data/Tasks/{}/ind_permutations_5_train.pt".format( dataset)) data_set.visualize_reordered(path_out, number=100, shape=[28, 28, 1], permutations=permutations) else: data_set.visualize_sample(path_out, number=100, shape=[28, 28, 1])
def test_DataLoader_init_label_size(get_fake_dataset): """ Test if the dictionnary of label have the good size :param get_fake_dataset: :return: """ fake_dataset = get_fake_dataset dataset = DatasetLoader(fake_dataset) if not len(dataset.labels) == dataset_size: raise AssertionError("Test fail")
def test_DataLoader_init_label_is_dict(get_fake_dataset): """ Test if the dictionnary of label is really a dictionnary :param get_fake_dataset: :return: """ fake_dataset = get_fake_dataset dataset = DatasetLoader(fake_dataset) if not isinstance(dataset.labels, dict): raise AssertionError("Test fail")
def test_disjoint_samples_disjoint_classes_permutations(ind_task, dataset): index_permutation = 2 # empirically chosen permutation = torch.load("permutation_classes.t")[index_permutation] name = '' for i in range(10): name += str(int(permutation[i])) path = "./Archives/Data/Tasks/{}/disjoint_{}_10_train.pt".format( dataset, name) data = torch.load(path) data_set = DatasetLoader(data, current_task=0, transform=None, load_images=False, path=None) data_set.set_task(ind_task) folder = "./Samples/disjoint_classes_permutations/" if not os.path.exists(folder): os.makedirs(folder) path_out = os.path.join( folder, "dijsoint_classes_permutations_{}.png".format(ind_task)) data_set.visualize_sample(path_out, number=100, shape=[28, 28, 1])
def test_DataLoader_with_torch(get_fake_dataset): """ Test if the dataloader can be used with torch.utils.data.DataLoader :param get_fake_dataset: :return: """ fake_dataset = get_fake_dataset dataset = DatasetLoader(fake_dataset) train_loader = data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=6) for _, (_, _) in enumerate(train_loader): break
def test_DataLoader_with_torch_loader(get_fake_dataset): """ Test if the dataloader with torch.utils.data.DataLoader provide data of good type :param get_fake_dataset: :return: """ fake_dataset = get_fake_dataset dataset = DatasetLoader(fake_dataset) train_loader = data.DataLoader(dataset, batch_size=10, shuffle=True, num_workers=6) for _, (batch, label) in enumerate(train_loader): if not isinstance(label, torch.LongTensor): raise AssertionError("Test fail") if not isinstance(batch, torch.FloatTensor): raise AssertionError("Test fail") break
def test_disjoint_samples_train_10_tasks(dataset, ind_task): path = "./Archives/Data/Tasks/{}/disjoint_10_train.pt".format(dataset) data = torch.load(path) data_set = DatasetLoader(data, current_task=0, transform=None, load_images=False, path=None) data_set.set_task(ind_task) folder = "./Samples/disjoint_10_tasks/" if not os.path.exists(folder): os.makedirs(folder) path_out = os.path.join(folder, "{}_task_{}.png".format(dataset, ind_task)) data_set.visualize_sample(path_out, number=100, shape=[28, 28, 1])
def test_DataLoader_init_current_task(get_fake_dataset, init_current_task): fake_dataset = get_fake_dataset dataset = DatasetLoader(fake_dataset, current_task=init_current_task) if not dataset.current_task == init_current_task: raise AssertionError("Test fail")
def main(): print(torch.cuda.is_available()) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile lossgradient = options.lossgradient cfg = {} for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) outfile = options.outfile sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset print("Preparing data reading...") datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) # - load filelist with io.open(os.path.join(datadir, 'filelists', 'valid')) as f: filelist = [l.rstrip() for l in f if l.rstrip()] with io.open(os.path.join(datadir, 'filelists', 'test')) as f: filelist += [l.rstrip() for l in f if l.rstrip()] # - load mean/std meanstd_file = os.path.join(os.path.dirname(__file__), '%s_meanstd.npz' % options.dataset) dataloader = DatasetLoader(options.dataset, options.cache_spectra, datadir, input_type=options.input_type, filelist=filelist) mel_spects, labels = dataloader.prepare_batches(sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize, batch_data=False) with np.load(meanstd_file) as f: mean = f['mean'] std = f['std'] mean = mean.astype(floatX) istd = np.reciprocal(std).astype(floatX) mdl = model.CNNModel(input_type='mel_spects_norm', is_zeromean=False, meanstd_file=meanstd_file, device=device) mdl.load_state_dict(torch.load(modelfile)) mdl.to(device) mdl.eval() if (lossgradient != 'None'): mdl_lossgrad = model.CNNModel(input_type=options.input_type, is_zeromean=False, sample_rate=sample_rate, frame_len=frame_len, fps=fps, mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max, bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device) mdl_lossgrad.load_state_dict(torch.load(lossgradient)) mdl_lossgrad.to(device) mdl_lossgrad.eval() criterion = torch.nn.BCELoss() loss_grad_val = dataloader.prepare_loss_grad_batches( options.loss_grad_save, mel_spects, labels, mdl_lossgrad, criterion, blocklen, batchsize, device) # run prediction loop print("Predicting:") predictions = [] #for spect, g in zip(mel_spects, loss_grad_val): c = 0 for spect in progress(mel_spects, total=len(filelist), desc='File '): if (lossgradient != 'None'): g = loss_grad_val[c] c += 1 # naive way: pass excerpts of the size used during training # - view spectrogram memory as a 3-tensor of overlapping excerpts num_excerpts = len(spect) - blocklen + 1 excerpts = np.lib.stride_tricks.as_strided( spect.astype(floatX), shape=(num_excerpts, blocklen, spect.shape[1]), strides=(spect.strides[0], spect.strides[0], spect.strides[1])) preds = np.zeros((num_excerpts, 1)) count = 0 for pos in range(0, num_excerpts, batchsize): input_data = np.transpose( excerpts[pos:pos + batchsize, :, :, np.newaxis], (0, 3, 1, 2)) input_data = (input_data - mean) * istd if lossgradient != 'None': for i in range(input_data.shape[0]): if (options.lossgrad_algorithm == 'grad'): rank_matrix = np.abs(g[i + pos]) elif (options.lossgrad_algorithm == 'gradxinp'): rank_matrix = np.squeeze(g[i + pos] * input_data[i, :, :, :]) elif (options.lossgrad_algorithm == 'gradorig'): rank_matrix = g[i + pos] if (options.ROAR == 1): v = np.argsort(rank_matrix, axis=None)[-cfg['occlude']:] else: v = np.argsort(rank_matrix, axis=None)[:cfg['occlude']] input_data[i, :, v // 80, v % 80] = 0 else: for i in range(input_data.shape[0]): #print('random') v = np.random.choice(115 * 80, cfg['occlude'], replace=False) input_data[i, :, v // 80, v % 80] = 0 count += 1 #print('Here') #preds = np.vstack(mdl.forward(torch.from_numpy( # np.transpose(excerpts[pos:pos + batchsize,:,:, # np.newaxis],(0,3,1,2))).to(device)).cpu().detach().numpy() # for pos in range(0, num_excerpts, batchsize)) preds[pos:pos + batchsize, :] = mdl( torch.from_numpy(input_data).to( device)).cpu().detach().numpy() print('Here') predictions.append(preds) # save predictions print("Saving predictions") np.savez(outfile, **{fn: pred for fn, pred in zip(filelist, predictions)})
def main(args): # Load train & val data # adj, features, labels, idx_train, idx_val, idx_test = load_data() train_kwargs = { 'root_dir': args.root_dir, 'data_file': args.train_file, 'corpus_file': args.corpus_file, 'label_file': args.label_file } train_data = DatasetLoader(kwargs=train_kwargs, transform=True) train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True) val_kwargs = { 'root_dir': args.root_dir, 'data_file': args.val_file, 'corpus_file': args.corpus_file, 'label_file': args.label_file } val_data = DatasetLoader(kwargs=val_kwargs, transform=True) val_loader = DataLoader(val_data, batch_size=1, shuffle=True) # Model and optimizer model = EGAT(node_feat=len(train_data.corpus), edge_feat=8, nclass=len(CLASSES), nhidden=args.hidden, dropout=args.dropout, alpha=args.alpha, nheads=args.nb_heads) model = model.to(DEVICE) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) best_loss = 1000 best_acc = 0.0 for epoch in range(0, args.epochs): model.train() train_loss_mean = [] train_acc_mean = [] start_time = time.time() for in_data in train_loader: optimizer.zero_grad() output = model(in_data) label = in_data['graph_lbl'].to(DEVICE) loss_train = F.nll_loss(output, label) loss_train.backward() optimizer.step() acc_train = accuracy(output, label) train_loss_mean.append(loss_train.data.item()) train_acc_mean.append(acc_train) print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(np.mean(train_loss_mean)), 'acc_train: {:.4f}'.format(np.mean(train_acc_mean)), 'time: {:.4f}s'.format(time.time() - start_time)) if epoch == args.patience: model.eval() val_loss_mean = [] val_acc_mean = [] for in_data in val_loader: output = model(in_data) label = in_data['graph_lbl'] loss_val = F.nll_loss(output, label) acc_val = accuracy(output, label) val_loss_mean.append(loss_val.data.item()) val_acc_mean.append(acc_val) print("*" * 20) print('Epoch: {:04d}'.format(epoch + 1), 'loss_val: {:.4f}'.format(np.mean(val_loss_mean)), 'acc_val: {:.4f}'.format(np.mean(val_acc_mean))) if (np.mean(val_acc_mean) > best_acc and np.mean(val_loss_mean)): torch.save( { "state_dict": model.state_dict(), "configs": args, "epoch": epoch, "train_acc": np.mean(train_loss_mean), "val_acc": np.mean(val_loss_mean), }, "{0}_epoch_{1}.pt".format(args.save_path, epoch))
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile cfg = {} print(options.vars) for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) meanstd_file = os.path.join(os.path.dirname(__file__), '%s_meanstd.npz' % options.dataset) if (options.input_type == 'audio'): dataloader = DatasetLoader(options.dataset, options.cache_spectra, datadir, input_type=options.input_type) batches = dataloader.prepare_audio_batches(sample_rate, frame_len, fps, blocklen, batchsize) else: dataloader = DatasetLoader(options.dataset, options.cache_spectra, datadir, input_type=options.input_type) batches = dataloader.prepare_batches(sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize) validation_data = DatasetLoader(options.dataset, '../ismir2015/experiments/mel_data/', datadir, dataset_split='valid', input_type='mel_spects') mel_spects_val, labels_val = validation_data.prepare_batches( sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize, batch_data=False) mdl = model.CNNModel(model_type=options.model_type, input_type=options.input_type, is_zeromean=False, sample_rate=sample_rate, frame_len=frame_len, fps=fps, mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max, bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device) mdl = mdl.to(device) #Setting up learning rate and learning rate parameters initial_eta = cfg['initial_eta'] eta_decay = cfg['eta_decay'] momentum = cfg['momentum'] eta_decay_every = cfg.get('eta_decay_every', 1) eta = initial_eta #set up loss criterion = torch.nn.BCELoss() #set up optimizer optimizer = torch.optim.SGD(mdl.parameters(), lr=eta, momentum=momentum, nesterov=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=eta_decay_every, gamma=eta_decay) #set up optimizer writer = SummaryWriter(os.path.join(modelfile, 'runs')) epochs = cfg['epochs'] epochsize = cfg['epochsize'] batches = iter(batches) #conditions to save model best_val_loss = 100000. best_val_error = 1. for epoch in range(epochs): # - Initialize certain parameters that are used to monitor training err = 0 total_norm = 0 loss_accum = 0 mdl.train(True) # - Compute the L-2 norm of the gradients for p in mdl.parameters(): if p.grad is not None: param_norm = p.grad.data.norm(2) total_norm += param_norm.item()**2 total_norm = total_norm**(1. / 2) # - Start the training for this epoch for batch in progress(range(epochsize), min_delay=0.5, desc='Epoch %d/%d: Batch ' % (epoch + 1, epochs)): data = next(batches) if (options.input_type == 'audio' or options.input_type == 'stft'): input_data = data[0] else: input_data = np.transpose(data[0][:, :, :, np.newaxis], (0, 3, 1, 2)) labels = data[1][:, np.newaxis].astype(np.float32) #map labels to make them softer if not options.adversarial_training: labels = (0.02 + 0.96 * labels) optimizer.zero_grad() if (options.adversarial_training): mdl.train(False) if (options.input_type == 'stft'): input_data_adv = attacks.PGD( mdl, torch.from_numpy(input_data).to(device), target=torch.from_numpy(labels).to(device), eps=cfg['eps'], step_size=cfg['eps_iter'], iterations=cfg['nb_iter'], use_best=True, random_start=True, clip_min=0, clip_max=1e8).cpu().detach().numpy() else: input_data_adv = attacks.PGD( mdl, torch.from_numpy(input_data).to(device), target=torch.from_numpy(labels).to(device), eps=cfg['eps'], step_size=cfg['eps_iter'], iterations=cfg['nb_iter'], use_best=True, random_start=True).cpu().detach().numpy() mdl.train(True) optimizer.zero_grad() outputs = mdl(torch.from_numpy(input_data_adv).to(device)) else: optimizer.zero_grad() outputs = mdl(torch.from_numpy(input_data).to(device)) #input(outputs.size()) #input(mdl.conv(torch.from_numpy(input_data).to(device)).cpu().detach().numpy().shape) loss = criterion(outputs, torch.from_numpy(labels).to(device)) loss.backward() optimizer.step() print(loss.item()) loss_accum += loss.item() # - Compute validation loss and error if desired if options.validate: mdl.input_type = 'mel_spects' from eval import evaluate mdl.train(False) val_loss = 0 preds = [] labs = [] max_len = fps num_iter = 0 for spect, label in zip(mel_spects_val, labels_val): num_excerpts = len(spect) - blocklen + 1 excerpts = np.lib.stride_tricks.as_strided( spect, shape=(num_excerpts, blocklen, spect.shape[1]), strides=(spect.strides[0], spect.strides[0], spect.strides[1])) # - Pass mini-batches through the network and concatenate results for pos in range(0, num_excerpts, batchsize): input_data = np.transpose( excerpts[pos:pos + batchsize, :, :, np.newaxis], (0, 3, 1, 2)) #if (pos+batchsize>num_excerpts): # label_batch = label[blocklen//2+pos:blocklen//2+num_excerpts, # np.newaxis].astype(np.float32) #else: # label_batch = label[blocklen//2+pos:blocklen//2+pos+batchsize, # np.newaxis].astype(np.float32) if (pos + batchsize > num_excerpts): label_batch = label[pos:num_excerpts, np.newaxis].astype(np.float32) else: label_batch = label[pos:pos + batchsize, np.newaxis].astype(np.float32) pred = mdl(torch.from_numpy(input_data).to(device)) e = criterion(pred, torch.from_numpy(label_batch).to(device)) preds = np.append(preds, pred[:, 0].cpu().detach().numpy()) labs = np.append(labs, label_batch) val_loss += e.item() num_iter += 1 mdl.input_type = options.input_type print("Validation loss: %.3f" % (val_loss / num_iter)) _, results = evaluate(preds, labs) print("Validation error: %.3f" % (1 - results['accuracy'])) if (1 - results['accuracy'] < best_val_error): torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth')) best_val_loss = val_loss / num_iter best_val_error = 1 - results['accuracy'] print('New saved model', best_val_loss, best_val_error) #Update the learning rate scheduler.step() print('Training Loss per epoch', loss_accum / epochsize) # - Save parameters for examining writer.add_scalar('Training Loss', loss_accum / epochsize, epoch) writer.add_scalar('Validation loss', val_loss / num_iter, epoch) writer.add_scalar('Gradient norm', total_norm, epoch) writer.add_scalar('Validation error', 1 - results['accuracy']) #for param_group in optimizer.param_groups: #print(param_group['lr']) if not options.validate: torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth')) with io.open(os.path.join(modelfile, 'model.vars'), 'w') as f: f.writelines('%s=%s\n' % kv for kv in cfg.items())
dir_path = "/backtesting_data" file_name = "validation_data_Training_31a3e6e41cef24188ab2121d55be07ab98f7ccaf_2018-05-08_production.db" validation_db = join(dir_path, file_name) transform = Transform(db_file=validation_db, min_start_date='2015-01-01', max_end_date='2017-12-30', training_transformation=True) train_db = DatasetReader(path_to_training_db=validation_db, transform=transform, num_csku_per_query=1000, shuffle_transform=True) dataloader = DatasetLoader(train_db, collate_fn=append_lists, mini_batch_size=100, num_workers=0) st = time.time() for epoch in range(1): print "epoch %d" % epoch for i_batch, sample_batched in enumerate(dataloader): if i_batch % 99 == 0: print i_batch d = 1 end_t = time.time() print i_batch, (end_t - st) st = time.time() print(time.time() - st)
def main(): # data_dir = './train_data/' train_image_dir = './train_data/DUTS/DUTS-TR-Image/' train_label_dir = './train_data/DUTS/DUTS-TR-Mask/' model_dir = './saved_models/' resume_train = True saved_model_path = model_dir + 'model.pth' validation = True save_every = 1 epoch_num = 100000 batch_size_train = 16 batch_size_val = 1 train_num = 0 val_num = 0 if validation: val_image_dir = 'test_data/val/images/' val_label_dir = 'test_data/val/gts/' prediction_dir = './val_results/' val_img_name_list = glob.glob(val_image_dir + '*.jpg') val_lbl_name_list = glob.glob(val_label_dir + '*.png') val_dataset = DatasetLoader(img_name_list=val_img_name_list, lbl_name_list=val_lbl_name_list, transform=transforms.Compose( [Rescale(256), ToTensor()])) val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=4) train_img_name_list = glob.glob(train_image_dir + '*.jpg') train_lbl_name_list = [] for img_path in train_img_name_list: img_path = img_path.replace('.jpg', '.png') img_path = img_path.replace('DUTS-TR-Image', 'DUTS-TR-Mask') train_lbl_name_list.append(img_path) if len(train_img_name_list) == 0 or len(val_img_name_list) == 0: print('0 images found.') assert False print('Train images: ', len(train_img_name_list)) print('Train labels: ', len(train_lbl_name_list)) train_num = len(train_img_name_list) dataset = DatasetLoader(img_name_list=train_img_name_list, lbl_name_list=train_lbl_name_list, transform=transforms.Compose([ RandomHorizontalFlip(0.5), RandomVerticalFlip(0.5), Rescale(300), RandomCrop(256), ToTensor() ])) dataloader = DataLoader(dataset, batch_size=batch_size_train, shuffle=True, num_workers=4) model = MYNet(3, 1) model.cuda() from torchsummary import summary summary(model, input_size=(3, 256, 256)) # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.00001, nesterov=False) optimizer = optim.Adam(model.parameters(), lr=0.01, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[200000, 350000], gamma=0.1, last_epoch=-1) # scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001, # max_lr=0.01, step_size_up=8000, mode='triangular2') i_num_tot = 0 loss_output = 0.0 loss_pre_ref = 0.0 i_num_epoch = 0 epoch_init = 0 if resume_train: print('Loading checkpoint: ', saved_model_path) checkpoint = torch.load(saved_model_path) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']), epoch_init = checkpoint['epoch'] + 1 i_num_tot = checkpoint['i_num_tot'] + 1 i_num_epoch = checkpoint['i_num_epoch'] loss_output = checkpoint['loss_output'] # loss_pre_ref = checkpoint['loss_pre_ref'] log_file = open('logs/log.txt', 'a+') log_file.write(str(model) + '\n') log_file.close() print('Training...') _s = time.time() for epoch in range(epoch_init, epoch_num): model.train() print('Epoch {}...'.format(epoch)) _time_epoch = time.time() for i, data in enumerate(dataloader): i_num_tot += 1 i_num_epoch += 1 inputs, labels = data inputs = inputs.cuda() labels = labels.cuda() optimizer.zero_grad() out = model(inputs) loss = muti_bce_loss_fusion(out, labels) loss[0].backward() optimizer.step() scheduler.step() loss_output += loss[0].item() # loss_pre_ref += loss[1].item() del out, inputs, labels print('Epoch time: {}'.format(time.time() - _time_epoch)) if epoch % save_every == 0: # save the model every X epochs state_dic = { 'epoch': epoch, 'i_num_tot': i_num_tot, 'i_num_epoch': i_num_epoch, 'loss_output': loss_output, # 'loss_pre_ref': loss_pre_ref, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), } torch.save(state_dic, model_dir + 'model.pth') log = '[epoch: {:d}/{:d}, ite: {:d}] loss_output: {:.6f}, l: {:.6f}\n'.format( epoch, epoch_num, i_num_tot, loss_output / i_num_epoch, loss[0].item()) del loss loss_output = 0 loss_pre_ref = 0 i_num_epoch = 0 log_file = open('logs/log.txt', 'a+') log_file.write(log + '\n') log_file.close() print(log) if validation: model.eval() # val_i_num_tot = 0 val_i_num_epoch = 0 val_loss_output = 0 # val_loss_pre_ref = 0 val_log_file = open('logs/log_val.txt', 'a+') print('Evaluating...') with torch.no_grad(): for val_i, val_data in enumerate(val_dataloader): # val_i_num_tot += 1 val_i_num_epoch += 1 val_inputs, val_labels = val_data val_inputs = val_inputs.cuda() val_labels = val_labels.cuda() val_out = model(val_inputs) val_loss = muti_bce_loss_fusion(val_out, val_labels) val_loss_output += val_loss[0].item() # val_loss_pre_ref += val_loss0.item() pred = val_out[0][:, 0, :, :] pred = normPRED(pred) save_output(val_img_name_list[val_i], pred, prediction_dir) del val_out, val_inputs, val_labels, val_loss log_val = '[val: epoch: {:d}, ite: {:d}] loss_output: {:.6f}\n'.format( epoch, i_num_tot, val_loss_output / val_i_num_epoch) val_log_file.write(log_val + '\n') val_log_file.close() _t = 'Training time: ' + str(time.time() - _s) + '\n' print(_t) log_file = open('logs/log.txt', 'a+') log_file.write(_t) log_file.close()
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile lossgradient = options.lossgradient cfg = {} print(options.vars) print('Model save file:', modelfile) print('Lossgrad file:', lossgradient) for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) sample_rate = cfg['sample_rate'] frame_len = cfg['frame_len'] fps = cfg['fps'] mel_bands = cfg['mel_bands'] mel_min = cfg['mel_min'] mel_max = cfg['mel_max'] blocklen = cfg['blocklen'] batchsize = cfg['batchsize'] print('Occluded amount:',cfg['occlude']) bin_nyquist = frame_len // 2 + 1 bin_mel_max = bin_nyquist * 2 * mel_max // sample_rate # prepare dataset datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) meanstd_file = os.path.join(os.path.dirname(__file__), '%s_meanstd.npz' % options.dataset) dataloader = DatasetLoader(options.dataset, options.cache_spectra, datadir, input_type=options.input_type) batches = dataloader.prepare_batches(sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize) validation_data = DatasetLoader(options.dataset, '../ismir2015/experiments/mel_data/', datadir, dataset_split='valid', input_type='mel_spects') mel_spects_val, labels_val = validation_data.prepare_batches(sample_rate, frame_len, fps, mel_bands, mel_min, mel_max, blocklen, batchsize, batch_data=False) with np.load(meanstd_file) as f: mean = f['mean'] std = f['std'] mean = mean.astype(floatX) istd = np.reciprocal(std).astype(floatX) if(options.input_type=='mel_spects'): mdl = model.CNNModel(input_type='mel_spects_norm', is_zeromean=False, sample_rate=sample_rate, frame_len=frame_len, fps=fps, mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max, bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device) if(lossgradient!='None'): mdl_lossgrad = model.CNNModel(input_type=options.input_type, is_zeromean=False, sample_rate=sample_rate, frame_len=frame_len, fps=fps, mel_bands=mel_bands, mel_min=mel_min, mel_max=mel_max, bin_mel_max=bin_mel_max, meanstd_file=meanstd_file, device=device) mdl_lossgrad.load_state_dict(torch.load(lossgradient)) mdl_lossgrad.to(device) mdl_lossgrad.eval() mdl = mdl.to(device) #Setting up learning rate and learning rate parameters initial_eta = cfg['initial_eta'] eta_decay = cfg['eta_decay'] momentum = cfg['momentum'] eta_decay_every = cfg.get('eta_decay_every', 1) eta = initial_eta #set up loss criterion = torch.nn.BCELoss() #set up optimizer optimizer = torch.optim.SGD(mdl.parameters(),lr=eta,momentum=momentum,nesterov=True) #optimizer = torch.optim.Adam(mdl.parameters(), lr=eta, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=eta_decay_every,gamma=eta_decay) #set up optimizer writer = SummaryWriter(os.path.join(modelfile,'runs')) epochs = cfg['epochs'] epochsize = cfg['epochsize'] batches = iter(batches) #conditions to save model best_val_loss = 100000. best_val_error = 1. #loss gradient values for validation data loss_grad_val = validation_data.prepare_loss_grad_batches(options.loss_grad_save, mel_spects_val, labels_val, mdl_lossgrad, criterion, blocklen, batchsize, device) for epoch in range(epochs): # - Initialize certain parameters that are used to monitor training err = 0 total_norm = 0 loss_accum = 0 mdl.train(True) # - Compute the L-2 norm of the gradients for p in mdl.parameters(): if p.grad is not None: param_norm = p.grad.data.norm(2) total_norm += param_norm.item() ** 2 total_norm = total_norm ** (1. / 2) # - Start the training for this epoch for batch in progress(range(epochsize), min_delay=0.5,desc='Epoch %d/%d: Batch ' % (epoch+1, epochs)): data = next(batches) if(options.input_type=='audio' or options.input_type=='stft'): input_data = data[0] else: input_data = np.transpose(data[0][:,:,:,np.newaxis],(0,3,1,2)) labels = data[1][:,np.newaxis].astype(np.float32) input_data_loss = input_data if lossgradient!='None': g = loss_grad(mdl_lossgrad, torch.from_numpy(input_data_loss).to(device).requires_grad_(True), torch.from_numpy(labels).to(device), criterion) g = np.squeeze(g) input_data = (input_data-mean) * istd for i in range(batchsize): if(options.lossgrad_algorithm=='grad'): rank_matrix = np.abs(g[i]) elif(options.lossgrad_algorithm=='gradxinp'): rank_matrix = np.squeeze(g[i]*input_data[i,:,:,:]) elif(options.lossgrad_algorithm=='gradorig'): rank_matrix = g[i] v = np.argsort(rank_matrix, axis=None)[-cfg['occlude']:] input_data[i,:,v//80,v%80] = 0 else: for i in range(batchsize): #print('random') v = np.random.choice(115*80, cfg['occlude'], replace=False) input_data[i,:,v//80,v%80] = 0 input_data = input_data.astype(floatX) labels = (0.02 + 0.96*labels) optimizer.zero_grad() outputs = mdl(torch.from_numpy(input_data).to(device)) loss = criterion(outputs, torch.from_numpy(labels).to(device)) loss.backward() optimizer.step() #print(loss.item()) loss_accum += loss.item() # - Compute validation loss and error if desired if options.validate: #mdl.model_type = 'mel_spects' from eval import evaluate mdl.train(False) val_loss = 0 preds = [] labs = [] max_len = fps num_iter = 0 for spect, label, g in zip(mel_spects_val, labels_val, loss_grad_val): num_excerpts = len(spect) - blocklen + 1 excerpts = np.lib.stride_tricks.as_strided( spect, shape=(num_excerpts, blocklen, spect.shape[1]), strides=(spect.strides[0], spect.strides[0], spect.strides[1])) # - Pass mini-batches through the network and concatenate results for pos in range(0, num_excerpts, batchsize): input_data = np.transpose(excerpts[pos:pos + batchsize,:,:,np.newaxis],(0,3,1,2)) #if (pos+batchsize>num_excerpts): # label_batch = label[blocklen//2+pos:blocklen//2+num_excerpts, # np.newaxis].astype(np.float32) #else: # label_batch = label[blocklen//2+pos:blocklen//2+pos+batchsize, # np.newaxis].astype(np.float32) if (pos+batchsize>num_excerpts): label_batch = label[pos:num_excerpts, np.newaxis].astype(np.float32) else: label_batch = label[pos:pos+batchsize, np.newaxis].astype(np.float32) #input_data_loss = input_data if lossgradient!='None': #grads = loss_grad(mdl_lossgrad, torch.from_numpy(input_data_loss).to(device).requires_grad_(True), torch.from_numpy(label_batch).to(device), criterion) input_data = (input_data-mean) * istd for i in range(input_data.shape[0]): if(options.lossgrad_algorithm=='grad'): rank_matrix = np.abs(g[i]) elif(options.lossgrad_algorithm=='gradxinp'): rank_matrix = np.squeeze(g[i]*input_data[i,:,:,:]) elif(options.lossgrad_algorithm=='gradorig'): rank_matrix = g[i] v = np.argsort(np.abs(rank_matrix), axis=None)[-cfg['occlude']:] input_data[i,:,v//80,v%80] = 0 else: for i in range(input_data.shape[0]): #print('random') v = np.random.choice(115*80, cfg['occlude'], replace=False) input_data[i,:,v//80,v%80] = 0 input_data = input_data.astype(floatX) pred = mdl(torch.from_numpy(input_data).to(device)) e = criterion(pred,torch.from_numpy(label_batch).to(device)) preds = np.append(preds,pred[:,0].cpu().detach().numpy()) labs = np.append(labs,label_batch) val_loss +=e.item() num_iter+=1 #mdl.model_type = 'mel_spects_norm' print("Validation loss: %.3f" % (val_loss / num_iter)) _, results = evaluate(preds,labs) print("Validation error: %.3f" % (1 - results['accuracy'])) if(1-results['accuracy']<best_val_error): torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth')) best_val_loss = val_loss/num_iter best_val_error = 1-results['accuracy'] print('New saved model',best_val_loss, best_val_error) #Update the learning rate scheduler.step() print('Training Loss per epoch', loss_accum/epochsize) # - Save parameters for examining writer.add_scalar('Training Loss',loss_accum/epochsize,epoch) if(options.validate): writer.add_scalar('Validation loss', val_loss/num_iter,epoch) writer.add_scalar('Gradient norm', total_norm, epoch) writer.add_scalar('Validation error', 1-results['accuracy']) #for param_group in optimizer.param_groups: #print(param_group['lr']) if not options.validate: torch.save(mdl.state_dict(), os.path.join(modelfile, 'model.pth')) with io.open(os.path.join(modelfile, 'model.vars'), 'w') as f: f.writelines('%s=%s\n' % kv for kv in cfg.items())
def main(): global args, best_prec1, use_gpu args = parser.parse_args() use_gpu = torch.cuda.is_available() num_classes = 79 # define state params state = { 'batch_size': args.batch_size, 'image_size': args.image_size, 'max_epochs': args.epochs, 'evaluate': args.evaluate, 'resume': args.resume, 'num_classes': num_classes, 'load': args.load, 'test': args.test } state['difficult_examples'] = True state['save_model_path'] = args.checkpoint state['workers'] = args.workers state['epoch_step'] = args.epoch_step state['lr'] = args.lr state['device_ids'] = args.device_ids if args.evaluate: state['evaluate'] = True if args.test: state['test'] = True if not args.test: #TODO: Make annotation paths more general train_dataset = DatasetLoader( args.data, img_set='train_symm', annotation=os.path.join( '/srv/data1/ashishsingh/Half_and_Half_Data/I2L/annotation', 'symm_trainset_annotation.json')) if args.val_hnh: val_dataset = DatasetLoader_HNH( args.data, img_set='val', annotation=os.path.join( '/srv/data1/ashishsingh/Half_and_Half_Data/I2L/annotation', 'i2l_valset_annotation.json')) else: val_dataset = DatasetLoader( args.data, img_set='val_coco', annotation=os.path.join( '/srv/data1/ashishsingh/Half_and_Half_Data', 'valset_complete_metadata.json')) print("Initializing model: {}".format(args.arch)) if args.pretrained: model = models.init_model(name=args.arch, num_classes=num_classes, pretrained='imagenet', use_gpu=use_gpu) else: model = models.init_model(name=args.arch, num_classes=num_classes, pretrained=None, use_gpu=use_gpu) print("Model size: {:.3f} M".format(count_num_param(model))) # define loss function (criterion) #criterion = nn.MultiLabelSoftMarginLoss() criterion = MultiLabelSoftmaxLoss() if args.val_hnh or args.test: criterion_val = nn.CrossEntropyLoss() else: criterion_val = nn.MultiLabelSoftMarginLoss() # define optimizer optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #Validate using test-like objective if args.val_hnh: engine = SymmetricMultiLabelHNHEngine(state) else: engine = SymmetricMultiLabelMAPEngine(state) engine.learning(model, criterion, criterion_val, train_dataset, val_dataset, optimizer) else: test_dataset = DatasetLoader_HNH( args.data, img_set='test_cleaned', annotation=os.path.join( '/srv/data1/ashishsingh/Half_and_Half_Data/I2L/annotation', 'i2l_testset_annotation.json')) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=num_classes, pretrained=None, use_gpu=use_gpu) criterion_test = nn.CrossEntropyLoss() engine = SymmetricMultiLabelHNHEngine(state) engine.test(model, criterion_test, test_dataset)
def test_DataLoader_init(get_fake_dataset): fake_dataset = get_fake_dataset dataset = DatasetLoader(fake_dataset) if not dataset.current_task == 0: raise AssertionError("Test fail")
def load_data(self, data_param, verbose=False): # load data if data_param['dataset'] == 'old': data_path = data_param['data_path'] + data_param['phase'] + '/' else: data_path = data_param['data_path'] data = DatasetLoader(data_path=data_path, n_sites=data_param['n_sites'], train_size=data_param['train_size'], val_size=data_param['val_size'], test_size=data_param['test_size']) # normalization data.normalization(x=data_param['normalize_input'], y=data_param['normalize_output']) # convert to torch tensor data.torch_tensor(device=data_param['device']) # print out statistics = \ """ ========================================================================== Action: load data. Time: %s Task Id: %s Training Inputs: %s Training Outputs: %s Validation Inputs: %s Validation Outputs: %s Test Inputs: %s Test Outputs: %s X Scaler: %s Y Scaler: %s Device: %s Status: Successful -------------------------------------------------------------------------- """ % \ ( self.str_now(), self.current_hash, data.X_train.shape, data.y_train.shape, data.X_val.shape, data.y_val.shape, data.X_test.shape, data.y_test.shape, data.std_scaler_x, data.std_scaler_y, data_param['device'] ) if verbose: print(statistics) # write log file log_file = open(self.path_log + 'log_' + self.current_hash + '.txt', "a") log_file.write(statistics) log_file.close() return data
flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)") flags.DEFINE_integer("num_epochs", 100, "Number of training epochs") flags.DEFINE_integer( "evaluate_every", 500, "Evaluate model on validation dataset after this many steps") flags.DEFINE_float("lr", 0.001, "Learning rate") FLAGS = flags.FLAGS FLAGS(sys.argv) print("\nParameters:") print("-" * 20) for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr, value.value)) print("") loader = DatasetLoader(sequence_max_length=FLAGS.sequence_length) def preprocess(): # Load data print("Loading data...") train_data, train_label, test_data, test_label = loader.load_dataset( dataset_path=FLAGS.dataset_path, dataset_type=FLAGS.dataset_type) print("Loading data succees...") # Preprocessing steps can go here return train_data, train_label, test_data, test_label def train(x_train, y_train, x_test, y_test):