예제 #1
0
def main(argv):
    parser = argparse.ArgumentParser(
        description="Train a network to predict primitives")

    parser.add_argument("dataset_directory",
                        help="Path to the directory containing the dataset")

    parser.add_argument("output_directory",
                        help="Save the output files in that directory")

    parser.add_argument(
        "--weight_file",
        default=None,
        help=("The path to a previously trainined model to continue"
              " the training from"))
    parser.add_argument("--continue_from_epoch",
                        default=0,
                        type=int,
                        help="Continue training from epoch (default=0)")

    parser.add_argument("--run_on_gpu", action="store_true", help="Use GPU")

    parser.add_argument("--experiment_tag",
                        default=None,
                        help="Tag that refers to the current experiment")

    parser.add_argument("--cache_size",
                        type=int,
                        default=2000,
                        help="The batch provider cache size")

    parser.add_argument("--seed",
                        type=int,
                        default=27,
                        help="Seed for the PRNG")

    # Parse args
    add_nn_parameters(parser)
    add_dataset_parameters(parser)
    add_datatype_parameters(parser)
    add_training_parameters(parser)
    args = parser.parse_args(argv)

    if args.run_on_gpu:  #and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    print("Running code on", device)

    # Check if output directory exists and if it doesn't create it
    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)

    # Create an experiment directory using the experiment_tag
    if args.experiment_tag is None:
        experiment_tag = id_generator(9)
    else:
        experiment_tag = args.experiment_tag

    experiment_directory = os.path.join(args.output_directory, experiment_tag)
    if not os.path.exists(experiment_directory):
        os.makedirs(experiment_directory)

    # Store the parameters for the current experiment in a json file
    save_experiment_params(args, experiment_tag, experiment_directory)
    print("Save experiment statistics in %s" % (experiment_tag, ))

    # Create two files to store the training and test evolution
    train_stats = os.path.join(experiment_directory, "train.txt")
    val_stats = os.path.join(experiment_directory, "val.txt")
    if args.weight_file is None:
        train_stats_f = open(train_stats, "w")
    else:
        train_stats_f = open(train_stats, "a+")
    train_stats_f.write(("epoch loss\n"))

    # Set the random seed
    np.random.seed(args.seed)
    torch.manual_seed(np.random.randint(np.iinfo(np.int32).max))
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(np.random.randint(np.iinfo(np.int32).max))

    # TODO
    M = 11
    data_output_shape = (M, 7)

    # Create a factory that returns the appropriate data type based on the
    # input argument
    data_factory = DataFactory(
        args.data_type, tuple([data_input_shape(args), data_output_shape]))

    # Create a dataset instance to generate the samples for training
    training_dataset = get_dataset_type("matrix_loss")(
        (DatasetBuilder().with_dataset(args.dataset_type).build(
            args.dataset_directory)),
        data_factory,
        transform=compose_transformations(args.data_type))

    training_loader = DataLoader(training_dataset,
                                 batch_size=32,
                                 num_workers=4,
                                 pin_memory=True,
                                 drop_last=True,
                                 shuffle=True)

    # Build the model to be used for training
    network_params = NetworkParameters(args.architecture, M, False)
    model = network_params.network(network_params)

    # Move model to the device to be used
    model.to(device)

    # Check whether there is a weight file provided to continue training from
    if args.weight_file is not None:
        model.load_state_dict(torch.load(args.weight_file))
    model.train()

    # Build an optimizer object to compute the gradients of the parameters
    optimizer = optimizer_factory(args, model)

    # Loop over the dataset multiple times
    losses = []
    for i in range(args.epochs):
        bar = get_logger("matrix_loss", i + 1, args.epochs,
                         args.steps_per_epoch)

        j = 0
        for sample in training_loader:
            X, y_target = sample

            # if j == 0:
            #     import matplotlib.pyplot as plt
            #     import matplotlib.image as mpimg

            #     print(np.shape(X))
            #     print(X)
            #     img = X.numpy()[0]
            #     img = np.transpose(img, (1,2,0))
            #     img = img.reshape((224, 224, 3))
            #     print(img)

            #     imgplot = plt.imshow(img)
            #     print(imgplot)
            #     plt.show()

            # print(j)
            # j +=1
            # if j > 20:
            #     break
            # continue
            # print(X.shape)
            # print(y_target.shape)
            # #exit(1)

            # exit(1)

            X, y_target = X.to(device), y_target.to(device)

            # Train on batch
            batch_loss, metrics, debug_stats = train_on_batch(
                model,
                lr_schedule(optimizer, i, args.lr, args.lr_factor,
                            args.lr_epochs), matrix_loss, X, y_target, device)

            # The losses
            bar.loss = moving_average(bar.loss, batch_loss, b)

            # Record in list
            losses.append(bar.loss)

            # TODO: Update the file that keeps track of the statistics
            if (j % 50) == 0:
                train_stats_f.write(("%d %5.8f") % (i, bar.loss))
                train_stats_f.write("\n")
                train_stats_f.flush()
            j += 1
            bar.next()

            if j >= args.steps_per_epoch:
                break

        # Finish the progress bar and save the model after every epoch
        bar.finish()

        if (i % 5) == 0:
            torch.save(
                model.state_dict(),
                os.path.join(experiment_directory,
                             "model_%d" % (i + args.continue_from_epoch, )))

    torch.save(model.state_dict(),
               os.path.join(experiment_directory, "model_final"))

    # TODO: print final training stats
    print([
        sum(losses[args.steps_per_epoch:]) / float(args.steps_per_epoch),
        sum(losses[:args.steps_per_epoch]) / float(args.steps_per_epoch)
    ])
예제 #2
0
def main(argv):
    parser = argparse.ArgumentParser(
        description="Train a network to predict primitives")
    parser.add_argument("dataset_directory",
                        help="Path to the directory containing the dataset")
    parser.add_argument("output_directory",
                        help="Save the output files in that directory")

    parser.add_argument(
        "--tsdf_directory",
        default="",
        help="Path to the directory containing the precomputed tsdf files")
    parser.add_argument(
        "--weight_file",
        default=None,
        help=("The path to a previously trainined model to continue"
              " the training from"))
    parser.add_argument("--continue_from_epoch",
                        default=0,
                        type=int,
                        help="Continue training from epoch (default=0)")
    parser.add_argument("--n_primitives",
                        type=int,
                        default=32,
                        help="Number of primitives")
    parser.add_argument(
        "--use_deformations",
        action="store_true",
        help="Use Superquadrics with deformations as the shape configuration")
    parser.add_argument("--train_test_splits_file",
                        default=None,
                        help="Path to the train-test splits file")
    parser.add_argument("--run_on_gpu", action="store_true", help="Use GPU")
    parser.add_argument("--probs_only",
                        action="store_true",
                        help="Optimize only using the probabilities")

    parser.add_argument("--experiment_tag",
                        default=None,
                        help="Tag that refers to the current experiment")

    parser.add_argument("--cache_size",
                        type=int,
                        default=2000,
                        help="The batch provider cache size")

    parser.add_argument("--seed",
                        type=int,
                        default=27,
                        help="Seed for the PRNG")

    add_nn_parameters(parser)
    add_dataset_parameters(parser)
    add_voxelizer_parameters(parser)
    add_training_parameters(parser)
    add_sq_mesh_sampler_parameters(parser)
    add_regularizer_parameters(parser)
    add_gaussian_noise_layer_parameters(parser)
    # Parameters related to the loss function and the loss weights
    add_loss_parameters(parser)
    # Parameters related to loss options
    add_loss_options_parameters(parser)
    args = parser.parse_args(argv)

    if args.train_test_splits_file is not None:
        train_test_splits = parse_train_test_splits(
            args.train_test_splits_file, args.model_tags)
        training_tags = np.hstack(
            [train_test_splits["train"], train_test_splits["val"]])
    else:
        training_tags = args.model_tags

    #device = torch.device("cuda:0")
    if args.run_on_gpu:  #and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    print("Running code on {}".format(device))

    # Check if output directory exists and if it doesn't create it
    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)

    # Create an experiment directory using the experiment_tag
    if args.experiment_tag is None:
        experiment_tag = id_generator(9)
    else:
        experiment_tag = args.experiment_tag

    experiment_directory = os.path.join(args.output_directory, experiment_tag)
    if not os.path.exists(experiment_directory):
        os.makedirs(experiment_directory)

    # Store the parameters for the current experiment in a json file
    save_experiment_params(args, experiment_tag, experiment_directory)
    print("Save experiment statistics in %s" % (experiment_tag, ))

    # Create two files to store the training and test evolution
    train_stats = os.path.join(experiment_directory, "train.txt")
    val_stats = os.path.join(experiment_directory, "val.txt")
    if args.weight_file is None:
        train_stats_f = open(train_stats, "w")
    else:
        train_stats_f = open(train_stats, "a+")
    train_stats_f.write(
        ("epoch loss pcl_to_prim_loss prim_to_pcl_loss bernoulli_regularizer "
         "entropy_bernoulli_regularizer parsimony_regularizer "
         "overlapping_regularizer sparsity_regularizer\n"))

    # Set the random seed
    np.random.seed(args.seed)
    torch.manual_seed(np.random.randint(np.iinfo(np.int32).max))
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(np.random.randint(np.iinfo(np.int32).max))

    # Create an object that will sample points in equal distances on the
    # surface of the primitive
    sampler = get_sampler(args.use_cuboids, args.n_points_from_sq_mesh,
                          args.D_eta, args.D_omega)

    # Create a factory that returns the appropriate voxelizer based on the
    # input argument
    voxelizer_factory = VoxelizerFactory(args.voxelizer_factory,
                                         np.array(voxelizer_shape(args)),
                                         args.save_voxels_to)

    # Create a dataset instance to generate the samples for training
    training_dataset = get_dataset_type("euclidean_dual_loss")(
        (DatasetBuilder().with_dataset(args.dataset_type).lru_cache(
            2000).filter_tags(training_tags).build(args.dataset_directory)),
        voxelizer_factory,
        args.n_points_from_mesh,
        transform=compose_transformations(args.voxelizer_factory))
    # Create a batchprovider object to start generating batches
    train_bp = BatchProvider(training_dataset,
                             batch_size=args.batch_size,
                             cache_size=args.cache_size)
    train_bp.ready()

    network_params = NetworkParameters.from_options(args)
    # Build the model to be used for training
    model = network_params.network(network_params)

    # Move model to the device to be used
    model.to(device)
    # Check whether there is a weight file provided to continue training from
    if args.weight_file is not None:
        model.load_state_dict(torch.load(args.weight_file))
    model.train()

    # Build an optimizer object to compute the gradients of the parameters
    optimizer = optimizer_factory(args, model)

    # Loop over the dataset multiple times
    pcl_to_prim_losses = []
    prim_to_pcl_losses = []
    losses = []
    for i in range(args.epochs):
        bar = get_logger("euclidean_dual_loss", i + 1, args.epochs,
                         args.steps_per_epoch)
        for b, sample in zip(range(args.steps_per_epoch),
                             yield_infinite(train_bp)):

            tags, X, y_target = sample
            X, y_target = X.to(device), y_target.to(device)

            # based on `tag`
            part_point_samples = []
            P = []
            indices_w_parts = []
            for idx_batchwide, tag in enumerate(tags):
                # TODO: does this sample have any part pt samples?
                target_part_samples_path = os.path.exists(
                    os.path.join(RANDINDEX_SAMPLES_DIR, '{}.pkl'.format(tag)))

                if os.path.exists(target_part_samples_path):
                    with open(target_part_samples_path, 'rb') as f:
                        foo = pickle.load(f)
                    point_samples = foo['samples']
                    point_samples = torch.Tensor(point_samples).to(device)
                    part_point_samples.append(point_samples)

                    N = point_samples.shape[0]
                    nonzero_indices = foo['neighbor_pairs']
                    assert nonzero_indices.shape[0] == 2
                    val = np.ones(nonzero_indices.shape[1])
                    curr_P = torch.sparse_coo_tensor(nonzero_indices, val,
                                                     (N, N))
                    curr_P = curr_P.to(device)
                    P.append(curr_P)

                    # turn into matrices here?
                    indices_w_parts.append(idx_batchwide)

            P = torch.stack(P, axis=0)
            part_point_samples = torch.stack(part_point_samples, axis=0)
            import ipdb
            ipdb.set_trace()

            # Train on batch
            batch_loss, metrics, debug_stats = train_on_batch_w_parts(
                model,
                lr_schedule(optimizer, i, args.lr, args.lr_factor,
                            args.lr_epochs), euclidean_dual_loss, X, y_target,
                get_regularizer_terms(args, i), sampler,
                get_loss_options(args), P, part_point_samples, indices_w_parts)

            # Get the regularizer terms
            reg_values = debug_stats["regularizer_terms"]
            sparsity_regularizer = reg_values["sparsity_regularizer"]
            overlapping_regularizer = reg_values["overlapping_regularizer"]
            parsimony_regularizer = reg_values["parsimony_regularizer"]
            entropy_bernoulli_regularizer = reg_values[
                "entropy_bernoulli_regularizer"]
            bernoulli_regularizer = reg_values["bernoulli_regularizer"]

            # The lossess
            pcl_to_prim_loss = debug_stats["pcl_to_prim_loss"].item()
            prim_to_pcl_loss = debug_stats["prim_to_pcl_loss"].item()
            bar.loss = moving_average(bar.loss, batch_loss, b)
            bar.pcl_to_prim_loss = \
                moving_average(bar.pcl_to_prim_loss, pcl_to_prim_loss, b)
            bar.prim_to_pcl_loss = \
                moving_average(bar.prim_to_pcl_loss, prim_to_pcl_loss, b)

            losses.append(bar.loss)
            prim_to_pcl_losses.append(bar.prim_to_pcl_loss)
            pcl_to_prim_losses.append(bar.pcl_to_prim_loss)

            bar.bernoulli_regularizer =\
                (bar.bernoulli_regularizer * b + bernoulli_regularizer) / (b+1)
            bar.parsimony_regularizer =\
                (bar.parsimony_regularizer * b + parsimony_regularizer) / (b+1)
            bar.overlapping_regularizer =\
                (bar.overlapping_regularizer * b + overlapping_regularizer) / (b+1)
            bar.entropy_bernoulli_regularizer = \
                (bar.entropy_bernoulli_regularizer * b +
                 entropy_bernoulli_regularizer) / (b+1)
            bar.sparsity_regularizer =\
                (bar.sparsity_regularizer * b + sparsity_regularizer) / (b+1)

            bar.exp_n_prims = metrics[0].sum(-1).mean()
            # Update the file that keeps track of the statistics
            train_stats_f.write(
                ("%d %.8f %.8f %.8f %.6f %.6f %.6f %.6f %.6f") %
                (i, bar.loss, bar.pcl_to_prim_loss, bar.prim_to_pcl_loss,
                 bar.bernoulli_regularizer, bar.entropy_bernoulli_regularizer,
                 bar.parsimony_regularizer, bar.overlapping_regularizer,
                 bar.sparsity_regularizer))
            train_stats_f.write("\n")
            train_stats_f.flush()

            bar.next()
        # Finish the progress bar and save the model after every epoch
        bar.finish()
        # Stop the batch provider
        train_bp.stop()
        torch.save(
            model.state_dict(),
            os.path.join(experiment_directory,
                         "model_%d" % (i + args.continue_from_epoch, )))

    print([
        sum(losses[args.steps_per_epoch:]) / float(args.steps_per_epoch),
        sum(losses[:args.steps_per_epoch]) / float(args.steps_per_epoch),
        sum(pcl_to_prim_losses[args.steps_per_epoch:]) /
        float(args.steps_per_epoch),
        sum(pcl_to_prim_losses[:args.steps_per_epoch]) /
        float(args.steps_per_epoch),
        sum(prim_to_pcl_losses[args.steps_per_epoch:]) /
        float(args.steps_per_epoch),
        sum(prim_to_pcl_losses[:args.steps_per_epoch]) /
        float(args.steps_per_epoch),
    ])
예제 #3
0
def main(argv):
    parser = argparse.ArgumentParser(
        description="Train a network to predict primitives")
    parser.add_argument(
        "config_file",
        help="Path to the file that contains the experiment configuration")
    parser.add_argument("output_directory",
                        help="Save the output files in that directory")

    parser.add_argument(
        "--weight_file",
        default=None,
        help=("The path to a previously trained model to continue"
              " the training from"))
    parser.add_argument("--continue_from_epoch",
                        default=0,
                        type=int,
                        help="Continue training from epoch (default=0)")
    parser.add_argument("--run_on_gpu", action="store_true", help="Use GPU")
    parser.add_argument("--probs_only",
                        action="store_true",
                        help="Optimize only using the probabilities")
    parser.add_argument("--experiment_tag",
                        default=None,
                        help="Tag that refers to the current experiment")
    parser.add_argument("--credentials",
                        default=os.path.join(os.path.dirname(__file__),
                                             ".credentials"),
                        help="The credentials file for the Google API")

    parser.add_argument("--cache_size",
                        type=int,
                        default=0,
                        help="The batch provider cache size")

    parser.add_argument(
        "--n_processes",
        type=int,
        default=8,
        help="The numper of processed spawned by the batch provider")

    parser.add_argument("--seed",
                        type=int,
                        default=27,
                        help="Seed for the PRNG")

    add_dataset_parameters(parser)
    # Parameters related to the loss function and the loss weights
    args = parser.parse_args(argv)
    set_num_threads(1)

    if args.run_on_gpu:  # and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")
    print("Running code on", device)

    # Check if output directory exists and if it doesn't create it
    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)

    # Create an experiment directory using the experiment_tag
    if args.experiment_tag is None:
        experiment_tag = id_generator(9)
    else:
        experiment_tag = args.experiment_tag

    experiment_directory = os.path.join(args.output_directory, experiment_tag)
    if not os.path.exists(experiment_directory):
        os.makedirs(experiment_directory)

    # Get the parameters and their ordering for the spreadsheet
    save_experiment_params(args, experiment_tag, experiment_directory)
    print("Save experiment statistics in %s" % (experiment_tag, ))

    # Set the random seed
    np.random.seed(args.seed)
    torch.manual_seed(np.random.randint(np.iinfo(np.int32).max))
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(np.random.randint(np.iinfo(np.int32).max))

    config = load_config(args.config_file)
    # Build the network architecture to be used for training
    network = build_network(args.config_file, args.weight_file, device=device)
    network.train()
    loss_options = get_loss_options(config)

    # Build an optimizer object to compute the gradients of the parameters
    optimizer = optimizer_factory(config, network)

    # Create an object that will sample points in equal distances on the
    # surface of the primitive
    n_points_from_sq_mesh = config["data"].get("n_points_from_sq_mesh", 200)
    sampler = PrimitiveSampler(n_points_from_sq_mesh)

    # Instantiate a dataloader to generate the samples for training
    dataloader = build_dataloader(
        config,
        args.model_tags,
        args.category_tags,
        config["data"].get("train_split", ["train", "val"]),
        config["loss"].get("batch_size", 32),
        args.n_processes,
        cache_size=args.cache_size,
    )
    # Instantiate a dataloader to generate the samples for validation
    val_dataset = build_dataset(config, [], [],
                                config["data"].get("test_split", ["test"]),
                                random_subset=args.val_random_subset,
                                cache_size=args.cache_size)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config["data"].get(
                                    "validation_batch_size", 8),
                                num_workers=args.n_processes,
                                shuffle=True)

    epochs = config["loss"].get("epochs", 150)
    steps_per_epoch = config["loss"].get("steps_per_epoch", 500)
    # Create logger to keep track of the training statistics
    logger = get_logger(epochs, steps_per_epoch,
                        os.path.join(experiment_directory, "train.txt"))

    # Create logger to keep track of the validation statistics
    val_every = config["data"].get("validation_every", 1000)
    val_logger = get_logger(epochs // val_every,
                            len(val_dataset),
                            os.path.join(experiment_directory, "val.txt"),
                            prefix="Validation Epoch")
    # Counter to keep track of the validation epochs
    val_epochs = 0

    save_every = config["data"].get("save_every", 5)

    for i in range(epochs):
        logger.new_epoch(i)
        for b, sample in zip(list(range(steps_per_epoch)),
                             yield_infinite(dataloader)):
            X = sample[0].to(device)
            y_target = [yi.to(device) for yi in sample[1:]]
            if len(y_target) == 1:
                y_target = y_target[0]

            # Train on batch
            reg_terms, reg_options = get_regularizer_options(config, i)
            loss_options.update(reg_options)
            batch_loss, preds = train_on_batch(
                network, lr_schedule(optimizer, i, config),
                get_loss(config["loss_type"], reg_terms, sampler,
                         loss_options), X, y_target, i)

            logger.new_batch(b, batch_loss)
        if i % save_every == 0:
            torch.save(
                network.state_dict(),
                os.path.join(experiment_directory,
                             "model_%d" % (i + args.continue_from_epoch, )))

        # Perform validation every validation every epochs
        if i % val_every == 0 and i > 0:
            val_logger.new_epoch(val_epochs)
            total = 0
            for sample in val_dataloader:
                X = sample[0].to(device)
                y_target = [yi.to(device) for yi in sample[1:]]
                if len(y_target) == 1:
                    y_target = y_target[0]
                val_l, preds = validate_on_batch(
                    network,
                    get_loss(config["loss_type"], reg_terms, sampler,
                             loss_options), X, y_target)
                total += X.shape[0]
                val_logger.new_batch(total, val_l)
            # Increment counter by one
            val_epochs += 1

    print("Saved statistics in %s" % (experiment_tag, ))