def main_fewshot(): """Run the MNIST experiment.""" log_file = os.path.join(ARGS.result_dir, ARGS.experiment_name, "log.txt") print("Result dir: %s", ARGS.result_dir) print("Log file: %s", log_file) # Setup logging in base_dir/log.txt setup_logging(level=ARGS.log_level, filename=log_file) logger.info(" -- MNIST Few Shot Experiment -- Started ") tstart = time.time() try: if not ARGS.cuda: # Set number of CPU threads torch.set_num_threads(1) # Create and run experiment experiment = FewShotExperiment(ARGS) experiment.run() except Exception as e: logger.exception("Experiment crashed.") logger.exception("Exception: %s", str(e)) # Measure time tstr = time_delta_now(tstart) logger.info(" -- MNIST -- Finished, took %s", tstr)
def train_multilabel(model, device, train_loader, optimizer, epoch, log_interval=10): """ Train the model for one epoch. Args: model (nn.Module): Network model. device: Device to train on. train_loader: Torch data loader for training set. optimizer: Torch opitimizer. epoch: Current epoch. """ model.train() # Create clipper dist_clipper = DistributionClipper(device) n_samples = get_n_samples_from_loader(train_loader) loss_fn = nn.BCELoss() t_start = time.time() for batch_idx, (data, target) in enumerate(train_loader): # Send data to correct device data, target = data.to(device), target.to(device) # Reset gradients optimizer.zero_grad() # Inference output = model(data) # Comput loss loss = loss_fn(output.sigmoid(), target) # Backprop loss.backward() optimizer.step() # Clip distribution values and weights model.apply(dist_clipper) # Log stuff if batch_idx % log_interval == 0: logger.info( "Train Epoch: {} [{: >5}/{: <5} ({:.0f}%)]\tLoss: {:.6f}". format( epoch, batch_idx * len(data), n_samples, 100.0 * batch_idx / len(train_loader), loss.item(), )) t_delta = time_delta_now(t_start) logger.info("Train Epoch: {} took {}".format(epoch, t_delta))
def main(): """Run the MNIST experiment.""" os.environ["CUDA_VISIBLE_DEVICES"] = ",".join( [str(x) for x in ARGS.cuda_device_id]) float_formatter = lambda x: "%.3f" % x np.set_printoptions(formatter={"float_kind": float_formatter}) # Setup logging in base_dir/log.txt log_file = os.path.join(ARGS.result_dir, ARGS.experiment_name, "log.txt") setup_logging(level=ARGS.log_level, filename=log_file) logger.info(" -- MNIST Multilabel -- Started ") print("Result dir: ", ARGS.result_dir) print("Log file: ", log_file) # Save commandline arguments tstart = time.time() try: if not ARGS.cuda: # Set number of CPU threads torch.set_num_threads(ARGS.njobs) else: ARGS.cuda_device_id = ARGS.cuda_device_id[0] if ARGS.reuse_base_dir is not None: base_dir = ARGS.reuse_base_dir else: base_dir = generate_run_base_dir( suffix="debug", experiment="multilabel-mnist", result_dir=ARGS.result_dir, timestamp=tstart, ) exp_dir = generate_experiment_dir(base_dir, ARGS.net, "test") save_args(ARGS, exp_dir) # Create and run experiment run_multilabel_mnist(ARGS, exp_dir) except Exception as e: logger.exception("Experiment crashed.") logger.exception("Exception: %s", str(e)) # Measure time tstr = time_delta_now(tstart) logger.info(" -- MNIST -- Finished, took %s", tstr)
def run(main_method, exp_dir, args, cuda_queue): """Run the MNIST experiment.""" # Get cuda device from multiplrocessing queue cuda_device_id = cuda_queue.get() args.cuda_device_id = cuda_device_id print("Starting {} with args \n{}\non device {}.".format( main_method.__name__, args, cuda_device_id)) print("os.environ[CUDA_VISIBLE_DEVICES]=", os.environ["CUDA_VISIBLE_DEVICES"]) float_formatter = lambda x: "%.3f" % x np.set_printoptions(formatter={"float_kind": float_formatter}) # Setup logging in exp_dir/log.txt log_file = os.path.join(exp_dir, "log.txt") setup_logging(level=args.log_level, filename=log_file) logger.info(" -- MNIST Multilabel -- Started ") print("Result dir: ", args.result_dir) print("Base dir: ", exp_dir) print("Log file: ", log_file) # Save commandline arguments save_args(args, exp_dir) tstart = time.time() try: # Set number of CPU threads torch.set_num_threads(args.njobs) # Create and run experiment main_method(args, exp_dir) except Exception as e: logger.exception("Experiment crashed.") logger.exception("Exception: %s", str(e)) # Measure time tstr = time_delta_now(tstart) logger.info(" -- MNIST -- Finished, took %s", tstr) # Free up cuda device cuda_queue.put(cuda_device_id)
def run_torch(n_epochs=100, batch_size=256): """Run the torch code. Args: n_epochs (int, optional): Number of epochs. batch_size (int, optional): Batch size. """ from src.spn.rat_spn import RatSpnConstructor from torch import optim from torch import nn assert len(sys.argv) == 2, "Usage: train.mnist cuda/cpu" dev = sys.argv[1] rg = RatSpnConstructor(in_features=28 * 28, C=10, S=10, I=20, dropout=0.0) n_splits = 2 for _ in range(0, n_splits): rg.random_split(2, 1) if dev == "cpu": device = torch.device("cpu") use_cuda = False else: device = torch.device("cuda:0") use_cuda = True torch.cuda.benchmark = True print("Using device:", device) model = rg.build().to(device) model.train() print(model) print(f"Layer 0: {count_params(model.region_spns[0]._leaf) * n_splits}") for i in range(1, len(model.region_spns[0]._inner_layers) + 1): print( f"Layer {i}: {count_params(model.region_spns[0]._inner_layers[i - 1]) * n_splits}" ) print("Number of pytorch parameters: ", count_params(model)) # Define optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=1e-3) train_loader, test_loader = get_mnist_loaders(use_cuda, batch_size=batch_size, device=device) log_interval = 100 for epoch in range(n_epochs): t_start = time.time() running_loss = 0.0 for batch_idx, (data, target) in enumerate(train_loader): # Send data to correct device data, target = data.to(device), target.to(device) data = data.view(data.shape[0], -1) # Reset gradients optimizer.zero_grad() # Inference output = model(data) # Compute loss loss = loss_fn(output, target) # Backprop loss.backward() optimizer.step() # Log stuff running_loss += loss.item() if batch_idx % log_interval == (log_interval - 1): pred = (output.argmax(1).eq(target).sum().cpu().numpy() / data.shape[0] * 100) print( "Train Epoch: {} [{: >5}/{: <5} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.0f}%" .format( epoch, batch_idx * len(data), 60000, 100.0 * batch_idx / len(train_loader), running_loss / log_interval, pred, ), end="\r", ) running_loss = 0.0 t_delta = time_delta_now(t_start) print("Train Epoch: {} took {}".format(epoch, t_delta)) if epoch % 5 == 4: print("Evaluating model ...") evaluate_model(model, device, train_loader, "Train") evaluate_model(model, device, test_loader, "Test")
spnneuron=SPNNeuronShallow, in_channels=in_channels, ).to(device) else: raise Exception("Invalid network: %s" % tag) return model if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() args = parser.parse_args(args=[]) args.resnet_arch = "resnet18" dev = "cuda:0" resnet = get_model_by_tag("resnet", torch.device(dev), args, 50 ** 2, 10) resnetspn = get_model_by_tag("resnet+spn", torch.device(dev), args, 50 ** 2, 10) shallow = get_model_by_tag("spn-shallow", torch.device(dev), args, 50 ** 2, 10) x = torch.rand(3, 1, 50, 50).to(torch.device(dev)) for net, name in [ (resnet, "resnet"), (resnetspn, "resnetspn"), (shallow, "shallow"), ]: print(f"{name}: {count_params(net)}") t = time.time() net(x) print(name, "took", time_delta_now(t))