def __call__(self, config, seed, device_str): # Set random seeds set_global_seeds(seed) # Create device device = torch.device(device_str) # Use log dir for current job (run_experiment) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) # Create dataset for training and testing train_dataset = datasets.MNIST('data/', train=True, download=True, transform=transforms.ToTensor()) test_dataset = datasets.MNIST('data/', train=False, transform=transforms.ToTensor()) # Define GPU-dependent keywords for DataLoader if config['cuda']: kwargs = {'num_workers': 1, 'pin_memory': True} else: kwargs = {} # Create data loader for training and testing train_loader = DataLoader(train_dataset, batch_size=config['train.batch_size'], shuffle=True, **kwargs) test_loader = DataLoader(test_dataset, batch_size=config['eval.batch_size'], shuffle=True, **kwargs) # Create the model if config['network.type'] == 'VAE': model = VAE(config=config) elif config['network.type'] == 'ConvVAE': model = ConvVAE(config=config) model = model.to(device) # Create optimizer optimizer = optim.Adam(model.parameters(), lr=1e-3) # Create engine engine = Engine(agent=model, runner=None, config=config, device=device, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader) # Training and evaluation for epoch in range(config['train.num_epoch']): train_output = engine.train(n=epoch) engine.log_train(train_output, logdir=logdir, epoch=epoch) eval_output = engine.eval(n=epoch) engine.log_eval(eval_output, logdir=logdir, epoch=epoch) return None
def main(): train_loader = return_MVTecAD_loader( image_dir="./mvtec_anomaly_detection/grid/train/good/", batch_size=256, train=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) seed = 42 out_dir = './logs' if not os.path.exists(out_dir): os.mkdir(out_dir) checkpoints_dir = "./checkpoints" if not os.path.exists(checkpoints_dir): os.mkdir(out_dir) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) model = VAE(z_dim=512).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=5e-4) num_epochs = 500 for epoch in range(num_epochs): loss = train(model=model, train_loader=train_loader, device=device, optimizer=optimizer, epoch=epoch) print('epoch [{}/{}], train loss: {:.4f}'.format( epoch + 1, num_epochs, loss)) if (epoch + 1) % 10 == 0: torch.save( model.state_dict(), os.path.join(checkpoints_dir, "{}.pth".format(epoch + 1))) test_loader = return_MVTecAD_loader( image_dir="./mvtec_anomaly_detection/grid/test/metal_contamination/", batch_size=10, train=False) eval(model=model, test_loader=test_loader, device=device) EBM(model, test_loader, device)
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) seed = 42 out_dir = './logs' if not os.path.exists(out_dir): os.mkdir(out_dir) checkpoints_dir = "./checkpoints" if not os.path.exists(checkpoints_dir): os.mkdir(out_dir) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) model = VAE(z_dim=512) model.load_state_dict(torch.load("./checkpoints/500.pth")) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=5e-4) test_loader = return_MVTecAD_loader( image_dir="./mvtec_anomaly_detection/grid/test/metal_contamination/", batch_size=10, train=False) #eval(model=model,test_loader=test_loader,device=device) EBM(model, test_loader, device)
def train(network_architecture, training_data, data, lr=0.001, batch_size=8, training_epochs=1000, display_step=10): print("-" * 15) print("Network Training is Started..") vae = VAE(network_architecture, learning_rate=lr, batch_size=8) #start the network learning #training cycle for epochs in range(training_epochs): avg_cost = 0 #iterate through batch range for batch_num in range(len(training_data[0])): #get features and labels features, labels = np.stack(training_data[0][batch_num], axis=0), np.stack( training_data[1][batch_num], axis=0) #fit the training batch data cost = vae.partial_fit(features) #compute average loss avg_cost += cost / data.n_samples * batch_size #display logs per epoch and steps if epochs % display_step == 0: print("Epochs:{}, cost={}".format(epochs + 1, avg_cost)) return vae #returning whole class object
def __call__(self, config, seed, device): set_global_seeds(seed) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) train_loader, test_loader = self.make_dataset(config) model = VAE(config=config, device=device) model.train_loader = train_loader model.test_loader = test_loader model.optimizer = optim.Adam(model.parameters(), lr=1e-3) engine = Engine(agent=model, runner=None, config=config) for epoch in range(config['train.num_epoch']): train_output = engine.train(n=epoch) engine.log_train(train_output, logdir=logdir, epoch=epoch) eval_output = engine.eval(n=epoch) engine.log_eval(eval_output, logdir=logdir, epoch=epoch) return None
batch_gpu = batch.to(device) z = autoencoder(batch_gpu)["z"] batch_gpu["z"] = z preds = net(batch_gpu) preds_cpu = preds.to('cpu') correct = np.count_nonzero(batch["id"] - preds_cpu["id"].argmax(1) == 0) total += len(batch_gpu["id"]) acc += correct return acc / total # Train VAE device = torch.device("cuda" if torch.cuda.is_available() else "cpu") vae = VAE() # Use multiple GPUs if available if torch.cuda.device_count() > 1: print("Using ", torch.cuda.device_count(), "GPUs") vae = nn.DataParallel(vae) vae.to(device) optimizer = torch.optim.Adam(vae.parameters(), lr=0.001) losses1 = [] valError = [] for epoch in range(50): train_iter = iter(train_loader) batch = None preds = None
tensorboard.add_images('Image', tilde_x.view(args.batch_size, 1, 28, 28), global_step=epoch) save_image(tilde_x.view(args.batch_size, 1, 28, 28), os.path.join(args.save_dir, 'samples%d.jpg' % (epoch))) if __name__ == '__main__': args = config() tensorboard = SummaryWriter(log_dir='logs') if os.path.isdir(args.save_dir): shutil.rmtree(args.save_dir) os.makedirs(args.save_dir) device = torch.device('cuda: {}'.format(args.gpu)) model = VAE(network_type=args.network_type, latent_dim=20).to(device) opt = optim.Adam(model.parameters(), lr=1e-3) train_items = DataLoader(datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, num_workers=multiprocessing.cpu_count(), pin_memory=True) test_items = DataLoader(datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor()),
cudnn.benchmark = True # dataset dataset = ImageFolder( root=opt.dataroot, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) ) dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.workers)) # model netVAE = VAE() criterion = CustomLoss(3e-6) optimizer = optim.Adam(netVAE.parameters(), lr=0.0001, betas=(0.5, 0.999)) if opt.cuda: netVAE.cuda() # train min_loss = float('inf') kld_loss_list, mse_loss_list = [], [] for epoch in range(1, opt.epochs + 1): mse_loss, kld_loss, total_loss = 0, 0, 0 for batch_idx, in_fig in enumerate(dataloader): x = Variable(in_fig) if opt.cuda: x = x.cuda()
if opt.manualSeed is None: opt.manualSeed = random.randint(1, 10000) print("Random Seed: ", opt.manualSeed) random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) # specify the gpu id if using only 1 gpu if opt.ngpu == 1: os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id) if opt.cuda: torch.cuda.manual_seed_all(opt.manualSeed) cudnn.benchmark = True # model netVAE = VAE('test') if opt.netVAE != '': netVAE.load_state_dict(torch.load(opt.netVAE)['state_dict']) print('loading model ....') netVAE.eval() print(netVAE) if opt.cuda: netVAE.cuda() print("Saving random generation......") decoder = netVAE.decoder # method 1 noise_1 = torch.randn(1024, 5, 5) noise_2 = torch.randn(1024, 5, 5)
# Create tf dataset with tf.name_scope('DataPipe'): filenames = tf.placeholder_with_default(get_files(FLAGS.data_path), shape=[None], name='filenames_tensor') dataset = load_and_process_data(filenames, batch_size=FLAGS.batch_size, shuffle=FLAGS.shuffle) iterator = dataset.make_initializable_iterator() input_batch = iterator.get_next() # Create model vae = VAE( input_batch, FLAGS.latent_dim, FLAGS.learning_rate, ) init_vars = [ tf.local_variables_initializer(), tf.global_variables_initializer() ] saver = tf.train.Saver() # Training loop with tf.Session() as sess: writer = tf.summary.FileWriter('./logs', sess.graph) sess.run(init_vars)
def run(config_file, fold=0): cf = imp.load_source('cf', config_file) print('fold:', fold) dataset_root = cf.dataset_root_mmsB print('train path: {}'.format(dataset_root)) # ================================================================================================================== BATCH_SIZE = cf.BATCH_SIZE INPUT_PATCH_SIZE = cf.INPUT_PATCH_SIZE num_classes = cf.num_classes EXPERIMENT_NAME = cf.EXPERIMENT_NAME results_dir = os.path.join(cf.results_dir, "fold%d/" % fold) if not os.path.isdir(results_dir): os.mkdir(results_dir) n_epochs = cf.n_epochs lr_decay = cf.lr_decay base_lr = cf.base_lr n_batches_per_epoch = cf.n_batches_per_epoch # 100 n_test_batches = cf.n_test_batches # 10 n_feedbacks_per_epoch = cf.n_feedbacks_per_epoch # 10 num_workers = cf.num_workers workers_seeds = cf.workers_seeds print('basiclr: {},lr-decay: {}'.format(cf.base_lr, cf.lr_decay)) # ================================================================================================================== # this is seeded, will be identical each time train_keys, test_keys = get_split(fold) print('train_keys:', train_keys) print('val_keys:', test_keys) train_data = load_dataset(train_keys, root_dir=dataset_root) val_data = load_dataset(test_keys, root_dir=dataset_root) x_sym = cf.x_sym seg_sym = T.tensor4() R_mask = VAE(1, x_sym, BATCH_SIZE, 'same', (None, None), 1, lasagne.nonlinearities.leaky_rectify) output_layer_for_loss = R_mask # draw_to_file(lasagne.layers.get_all_layers(net), os.path.join(results_dir, 'network.png')) data_gen_validation = BatchGenerator_2D(val_data, BATCH_SIZE, num_batches=None, seed=False, PATCH_SIZE=INPUT_PATCH_SIZE) # No data augmentation in valuation data_gen_validation = MultiThreadedAugmenter(data_gen_validation, ConvertSegToOnehotTransform(range(num_classes), 0, "seg_onehot"), 1, 2, [0]) # add some weight decay # l2_loss = lasagne.regularization.regularize_network_params(output_layer_for_loss, # lasagne.regularization.l2) * cf.weight_decay # the distinction between prediction_train and test is important only if we enable dropout prediction_train = lasagne.layers.get_output(output_layer_for_loss, x_sym, deterministic=False, batch_norm_update_averages=False, batch_norm_use_averages=False) loss_vec = F_loss(prediction_train, seg_sym) loss = loss_vec.mean() # acc_train = T.mean(T.eq(T.argmax(prediction_train, axis=1), seg_sym.argmax(-1)), dtype=theano.config.floatX) prediction_test = lasagne.layers.get_output(output_layer_for_loss, x_sym, deterministic=True, batch_norm_update_averages=False, batch_norm_use_averages=False) prediction_test = T.round(prediction_test, mode='half_to_even') loss_val = F_loss(prediction_test, seg_sym) loss_val = loss_val.mean() # acc = T.mean(T.eq(T.argmax(prediction_test, axis=1), seg_sym.argmax(-1)), dtype=theano.config.floatX) # learning rate has to be a shared variable because we decrease it with every epoch params = lasagne.layers.get_all_params(output_layer_for_loss, trainable=True) learning_rate = theano.shared(base_lr) updates = lasagne.updates.adam(T.grad(loss, params), params, learning_rate=learning_rate, beta1=0.9, beta2=0.999) train_fn = theano.function([x_sym, seg_sym], [loss], updates=updates) val_fn = theano.function([x_sym, seg_sym], [loss_val]) dice_scores = None data_gen_train = create_data_gen_train(train_data, BATCH_SIZE, num_classes, num_workers=num_workers, do_elastic_transform=True, alpha=(100., 350.), sigma=(14., 17.), do_rotation=True, a_x=(0, 2. * np.pi), a_y=(-0.000001, 0.00001), a_z=(-0.000001, 0.00001), do_scale=True, scale_range=(0.7, 1.3), seeds=workers_seeds) # new se has no brain mask all_training_losses = [] all_validation_losses = [] all_validation_accuracies = [] all_training_accuracies = [] all_val_dice_scores = [] epoch = 0 val_min = 0 while epoch < n_epochs: if epoch == 100: data_gen_train = create_data_gen_train(train_data, BATCH_SIZE, num_classes, num_workers=num_workers, do_elastic_transform=True, alpha=(0., 250.), sigma=(14., 17.), do_rotation=True, a_x=(-2 * np.pi, 2 * np.pi), a_y=(-0.000001, 0.00001), a_z=(-0.000001, 0.00001), do_scale=True, scale_range=(0.75, 1.25), seeds=workers_seeds) # new se has no brain mask if epoch == 125: data_gen_train = create_data_gen_train(train_data, BATCH_SIZE, num_classes, num_workers=num_workers, do_elastic_transform=True, alpha=(0., 150.), sigma=(14., 17.), do_rotation=True, a_x=(-2 * np.pi, 2 * np.pi), a_y=(-0.000001, 0.00001), a_z=(-0.000001, 0.00001), do_scale=True, scale_range=(0.8, 1.2), seeds=workers_seeds) # new se has no brain mask # learning_rate.set_value(np.float32(base_lr * lr_decay ** epoch)) print("epoch: ", epoch, " learning rate: ", learning_rate.get_value()) train_loss = 0 batch_ctr = 0 for data_dict in data_gen_train: # first call "__iter__(self)" in class BatchGenerator_2D for iter # And then call "__next__()" in class BatchGenerator_2D for looping # As a result, it will generate a random batch data every time, much probably different seg = data_dict["seg_onehot"].astype(np.float32) seg = np.argmax(seg,1) seg = seg[:,np.newaxis,...].astype(np.float32) if batch_ctr > (n_batches_per_epoch - 1): break loss = train_fn(seg, seg) # type:numpy.narray # print('batch loss:',loss[0]) train_loss += loss[0].item() batch_ctr += 1 train_loss /= n_batches_per_epoch print("training loss average on epoch: ", train_loss) val_loss = 0 valid_batch_ctr = 0 for data_dict in data_gen_validation: seg = data_dict["seg_onehot"].astype(np.float32) seg = np.argmax(seg, 1) seg = seg[:, np.newaxis, ...].astype(np.float32) loss = val_fn(seg, seg) val_loss += loss[0].item() valid_batch_ctr += 1 if valid_batch_ctr > (n_test_batches - 1): break val_loss /= n_test_batches print('val_loss:',val_loss) with open(os.path.join(results_dir, "%s_Params.pkl" % (EXPERIMENT_NAME)), 'wb') as f: cPickle.dump(lasagne.layers.get_all_param_values(output_layer_for_loss), f) # Making a valuation every epoch # n_test_batches(here is 10) batches in a valuation epoch += 1