def main():
    options_parser = argparse.ArgumentParser(description="Sample data with ARAE.")

    options_parser.add_argument("autoencoder", type=str, help="Autoencoder input file.")
    options_parser.add_argument("generator", type=str, help="Generator input file.")
    options_parser.add_argument("num_samples", type=int, help="Number of output samples.")
    options_parser.add_argument("num_features", type=int, help="Number of output features.")
    options_parser.add_argument("data", type=str, help="Output data.")

    options_parser.add_argument("--metadata", type=str,
                                help="Information about the categorical variables in json format.")

    options_parser.add_argument(
        "--code_size",
        type=int,
        default=128,
        help="Dimension of the autoencoder latent space."
    )

    options_parser.add_argument(
        "--noise_size",
        type=int,
        default=128,
        help="Dimension of the generator input noise."
    )

    options_parser.add_argument(
        "--encoder_hidden_sizes",
        type=str,
        default="",
        help="Size of each hidden layer in the encoder separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--decoder_hidden_sizes",
        type=str,
        default="",
        help="Size of each hidden layer in the decoder separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--batch_size",
        type=int,
        default=100,
        help="Amount of samples per batch."
    )

    options_parser.add_argument(
        "--generator_hidden_sizes",
        type=str,
        default="256,128",
        help="Size of each hidden layer in the generator separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--generator_bn_decay",
        type=float,
        default=0.01,
        help="Generator batch normalization decay."
    )

    options_parser.add_argument(
        "--temperature",
        type=float,
        default=None,
        help="Gumbel-Softmax temperature."
    )

    options = options_parser.parse_args()

    if options.metadata is not None and options.temperature is not None:
        variable_sizes = load_variable_sizes_from_metadata(options.metadata)
        temperature = options.temperature
    else:
        variable_sizes = None
        temperature = None

    autoencoder = AutoEncoder(
        options.num_features,
        code_size=options.code_size,
        encoder_hidden_sizes=parse_int_list(options.encoder_hidden_sizes),
        decoder_hidden_sizes=parse_int_list(options.decoder_hidden_sizes),
        variable_sizes=variable_sizes
    )

    load_without_cuda(autoencoder, options.autoencoder)

    generator = Generator(
        options.noise_size,
        options.code_size,
        hidden_sizes=parse_int_list(options.generator_hidden_sizes),
        bn_decay=options.generator_bn_decay
    )

    load_without_cuda(generator, options.generator)

    data = sample(
        autoencoder,
        generator,
        options.num_samples,
        options.num_features,
        batch_size=options.batch_size,
        noise_size=options.noise_size,
        temperature=temperature,
        round_features=(temperature is None)
    )

    np.save(options.data, data)
Esempio n. 2
0
def main():
    options_parser = argparse.ArgumentParser(
        description="Train MedGAN or MC-MedGAN. " +
        "Define 'metadata' and 'temperature' to use MC-MedGAN.")

    options_parser.add_argument(
        "data", type=str, help="Training data. See 'data_format' parameter.")

    options_parser.add_argument("input_autoencoder",
                                type=str,
                                help="Autoencoder input file.")
    options_parser.add_argument("output_autoencoder",
                                type=str,
                                help="Autoencoder output file.")
    options_parser.add_argument("output_generator",
                                type=str,
                                help="Generator output file.")
    options_parser.add_argument("output_discriminator",
                                type=str,
                                help="Discriminator output file.")
    options_parser.add_argument("output_loss",
                                type=str,
                                help="Loss output file.")

    options_parser.add_argument("--input_generator",
                                type=str,
                                help="Generator input file.",
                                default=None)
    options_parser.add_argument("--input_discriminator",
                                type=str,
                                help="Discriminator input file.",
                                default=None)

    options_parser.add_argument(
        "--metadata",
        type=str,
        help="Information about the categorical variables in json format." +
        " Only used if temperature is also provided.")

    options_parser.add_argument("--validation_proportion",
                                type=float,
                                default=.1,
                                help="Ratio of data for validation.")

    options_parser.add_argument(
        "--data_format",
        type=str,
        default="sparse",
        choices=data_formats,
        help=
        "Either a dense numpy array, a sparse csr matrix or any of those formats in split into several files."
    )

    options_parser.add_argument(
        "--code_size",
        type=int,
        default=128,
        help="Dimension of the autoencoder latent space.")

    options_parser.add_argument(
        "--encoder_hidden_sizes",
        type=str,
        default="",
        help=
        "Size of each hidden layer in the encoder separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--decoder_hidden_sizes",
        type=str,
        default="",
        help=
        "Size of each hidden layer in the decoder separated by commas (no spaces)."
    )

    options_parser.add_argument("--batch_size",
                                type=int,
                                default=1000,
                                help="Amount of samples per batch.")

    options_parser.add_argument("--start_epoch",
                                type=int,
                                default=0,
                                help="Starting epoch.")

    options_parser.add_argument("--num_epochs",
                                type=int,
                                default=1000,
                                help="Number of epochs.")

    options_parser.add_argument(
        "--l2_regularization",
        type=float,
        default=0.001,
        help="L2 regularization weight for every parameter.")

    options_parser.add_argument("--learning_rate",
                                type=float,
                                default=0.001,
                                help="Adam learning rate.")

    options_parser.add_argument(
        "--generator_hidden_layers",
        type=int,
        default=2,
        help="Number of hidden layers in the generator.")

    options_parser.add_argument("--generator_bn_decay",
                                type=float,
                                default=0.99,
                                help="Generator batch normalization decay.")

    options_parser.add_argument(
        "--discriminator_hidden_sizes",
        type=str,
        default="256,128",
        help=
        "Size of each hidden layer in the discriminator separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--num_discriminator_steps",
        type=int,
        default=2,
        help="Number of successive training steps for the discriminator.")

    options_parser.add_argument(
        "--num_generator_steps",
        type=int,
        default=1,
        help="Number of successive training steps for the generator.")

    options_parser.add_argument(
        "--temperature",
        type=float,
        default=None,
        help=
        "Gumbel-Softmax temperature. Only used if metadata is also provided.")

    options_parser.add_argument("--seed",
                                type=int,
                                help="Random number generator seed.",
                                default=42)

    options = options_parser.parse_args()

    if options.seed is not None:
        np.random.seed(options.seed)
        torch.manual_seed(options.seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(options.seed)

    features = loaders[options.data_format](options.data)
    data = Dataset(features)
    train_data, val_data = data.split(1.0 - options.validation_proportion)

    if options.metadata is not None and options.temperature is not None:
        variable_sizes = load_variable_sizes_from_metadata(options.metadata)
        temperature = options.temperature
    else:
        variable_sizes = None
        temperature = None

    autoencoder = AutoEncoder(
        features.shape[1],
        code_size=options.code_size,
        encoder_hidden_sizes=parse_int_list(options.encoder_hidden_sizes),
        decoder_hidden_sizes=parse_int_list(options.decoder_hidden_sizes),
        variable_sizes=variable_sizes)

    load_without_cuda(autoencoder, options.input_autoencoder)

    generator = Generator(code_size=options.code_size,
                          num_hidden_layers=options.generator_hidden_layers,
                          bn_decay=options.generator_bn_decay)

    load_or_initialize(generator, options.input_generator)

    discriminator = Discriminator(features.shape[1],
                                  hidden_sizes=parse_int_list(
                                      options.discriminator_hidden_sizes))

    load_or_initialize(discriminator, options.input_discriminator)

    train(autoencoder,
          generator,
          discriminator,
          train_data,
          val_data,
          options.output_autoencoder,
          options.output_generator,
          options.output_discriminator,
          options.output_loss,
          batch_size=options.batch_size,
          start_epoch=options.start_epoch,
          num_epochs=options.num_epochs,
          num_disc_steps=options.num_discriminator_steps,
          num_gen_steps=options.num_generator_steps,
          code_size=options.code_size,
          l2_regularization=options.l2_regularization,
          learning_rate=options.learning_rate,
          temperature=temperature)
Esempio n. 3
0
def main():
    options_parser = argparse.ArgumentParser(
        description="Sample data with MedGAN.")

    options_parser.add_argument("generator",
                                type=str,
                                help="Generator input file.")

    options_parser.add_argument(
        "metadata",
        type=str,
        help="Information about the categorical variables in json format.")

    options_parser.add_argument("num_samples",
                                type=int,
                                help="Number of output samples.")
    options_parser.add_argument("num_features",
                                type=int,
                                help="Number of output features.")
    options_parser.add_argument("data", type=str, help="Output data.")

    options_parser.add_argument("--noise_size",
                                type=int,
                                default=128,
                                help="Dimension of the generator input noise.")

    options_parser.add_argument("--batch_size",
                                type=int,
                                default=100,
                                help="Amount of samples per batch.")

    options_parser.add_argument(
        "--generator_hidden_sizes",
        type=str,
        default="256,128",
        help=
        "Size of each hidden layer in the generator separated by commas (no spaces)."
    )

    options_parser.add_argument("--generator_bn_decay",
                                type=float,
                                default=0.01,
                                help="Generator batch normalization decay.")

    options_parser.add_argument("--temperature",
                                type=float,
                                default=0.666,
                                help="Gumbel-Softmax temperature.")

    options = options_parser.parse_args()

    generator = Generator(options.noise_size,
                          load_variable_sizes_from_metadata(options.metadata),
                          hidden_sizes=parse_int_list(
                              options.generator_hidden_sizes),
                          bn_decay=options.generator_bn_decay)

    load_without_cuda(generator, options.generator)

    data = sample(
        generator,
        options.temperature,
        options.num_samples,
        options.num_features,
        batch_size=options.batch_size,
        noise_size=options.noise_size,
    )

    np.save(options.data, data)
def main():
    options_parser = argparse.ArgumentParser(description="Train MC-Gumbel.")

    options_parser.add_argument("data", type=str, help="Training data. See 'data_format' parameter.")

    options_parser.add_argument("metadata", type=str,
                                help="Information about the categorical variables in json format.")

    options_parser.add_argument("output_generator", type=str, help="Generator output file.")
    options_parser.add_argument("output_discriminator", type=str, help="Discriminator output file.")
    options_parser.add_argument("output_loss", type=str, help="Loss output file.")

    options_parser.add_argument("--input_generator", type=str, help="Generator input file.", default=None)
    options_parser.add_argument("--input_discriminator", type=str, help="Discriminator input file.", default=None)

    options_parser.add_argument(
        "--validation_proportion", type=float,
        default=.1,
        help="Ratio of data for validation."
    )

    options_parser.add_argument(
        "--data_format",
        type=str,
        default="sparse",
        choices=data_formats,
        help="Either a dense numpy array or a sparse csr matrix."
    )

    options_parser.add_argument(
        "--noise_size",
        type=int,
        default=128,
        help=""
    )

    options_parser.add_argument(
        "--batch_size",
        type=int,
        default=1000,
        help="Amount of samples per batch."
    )

    options_parser.add_argument(
        "--start_epoch",
        type=int,
        default=0,
        help="Starting epoch."
    )

    options_parser.add_argument(
        "--num_epochs",
        type=int,
        default=1000,
        help="Number of epochs."
    )

    options_parser.add_argument(
        "--l2_regularization",
        type=float,
        default=0.001,
        help="L2 regularization weight for every parameter."
    )

    options_parser.add_argument(
        "--learning_rate",
        type=float,
        default=0.001,
        help="Adam learning rate."
    )

    options_parser.add_argument(
        "--generator_hidden_sizes",
        type=str,
        default="256,128",
        help="Size of each hidden layer in the generator separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--bn_decay",
        type=float,
        default=0.9,
        help="Batch normalization decay for the generator and discriminator."
    )

    options_parser.add_argument(
        "--discriminator_hidden_sizes",
        type=str,
        default="256,128",
        help="Size of each hidden layer in the discriminator separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--num_discriminator_steps",
        type=int,
        default=2,
        help="Number of successive training steps for the discriminator."
    )

    options_parser.add_argument(
        "--num_generator_steps",
        type=int,
        default=1,
        help="Number of successive training steps for the generator."
    )

    options_parser.add_argument(
        "--temperature",
        type=float,
        default=0.666,
        help="Gumbel-Softmax temperature."
    )

    options_parser.add_argument("--seed", type=int, help="Random number generator seed.", default=42)

    options = options_parser.parse_args()

    if options.seed is not None:
        np.random.seed(options.seed)
        torch.manual_seed(options.seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(options.seed)

    features = loaders[options.data_format](options.data)
    data = Dataset(features)
    train_data, val_data = data.split(1.0 - options.validation_proportion)

    variable_sizes = load_variable_sizes_from_metadata(options.metadata)

    generator = Generator(
        options.noise_size,
        variable_sizes,
        hidden_sizes=parse_int_list(options.generator_hidden_sizes),
        bn_decay=options.bn_decay
    )

    load_or_initialize(generator, options.input_generator)

    discriminator = Discriminator(
        features.shape[1],
        hidden_sizes=parse_int_list(options.discriminator_hidden_sizes),
        bn_decay=options.bn_decay,
        critic=False
    )

    load_or_initialize(discriminator, options.input_discriminator)

    train(
        generator,
        discriminator,
        train_data,
        val_data,
        options.output_generator,
        options.output_discriminator,
        options.output_loss,
        batch_size=options.batch_size,
        start_epoch=options.start_epoch,
        num_epochs=options.num_epochs,
        num_disc_steps=options.num_discriminator_steps,
        num_gen_steps=options.num_generator_steps,
        noise_size=options.noise_size,
        l2_regularization=options.l2_regularization,
        learning_rate=options.learning_rate,
        temperature=options.temperature
    )
Esempio n. 5
0
def main():
    options_parser = argparse.ArgumentParser(description="Train ARAE or MC-ARAE. "
                                                         + "Define 'metadata' and 'temperature' to use MC-ARAE.")

    options_parser.add_argument("data", type=str, help="Training data. See 'data_format' parameter.")

    options_parser.add_argument("output_autoencoder", type=str, help="Autoencoder output file.")
    options_parser.add_argument("output_generator", type=str, help="Generator output file.")
    options_parser.add_argument("output_discriminator", type=str, help="Discriminator output file.")
    options_parser.add_argument("output_loss", type=str, help="Loss output file.")

    options_parser.add_argument("--input_autoencoder", type=str, help="Autoencoder input file.", default=None)
    options_parser.add_argument("--input_generator", type=str, help="Generator input file.", default=None)
    options_parser.add_argument("--input_discriminator", type=str, help="Discriminator input file.", default=None)

    options_parser.add_argument("--metadata", type=str,
                                help="Information about the categorical variables in json format.")

    options_parser.add_argument(
        "--validation_proportion", type=float,
        default=.1,
        help="Ratio of data for validation."
    )

    options_parser.add_argument(
        "--data_format",
        type=str,
        default="sparse",
        choices=data_formats,
        help="Either a dense numpy array or a sparse csr matrix."
    )

    options_parser.add_argument(
        "--code_size",
        type=int,
        default=128,
        help="Dimension of the autoencoder latent space."
    )

    options_parser.add_argument(
        "--noise_size",
        type=int,
        default=128,
        help="Dimension of the generator input noise."
    )

    options_parser.add_argument(
        "--encoder_hidden_sizes",
        type=str,
        default="",
        help="Size of each hidden layer in the encoder separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--decoder_hidden_sizes",
        type=str,
        default="",
        help="Size of each hidden layer in the decoder separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--batch_size",
        type=int,
        default=100,
        help="Amount of samples per batch."
    )

    options_parser.add_argument(
        "--start_epoch",
        type=int,
        default=0,
        help="Starting epoch."
    )

    options_parser.add_argument(
        "--num_epochs",
        type=int,
        default=5000,
        help="Number of epochs."
    )

    options_parser.add_argument(
        "--l2_regularization",
        type=float,
        default=0,
        help="L2 regularization weight for every parameter."
    )

    options_parser.add_argument(
        "--learning_rate",
        type=float,
        default=1e-5,
        help="Adam learning rate."
    )

    options_parser.add_argument(
        "--generator_hidden_sizes",
        type=str,
        default="100,100,100",
        help="Size of each hidden layer in the generator separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--bn_decay",
        type=float,
        default=0.9,
        help="Batch normalization decay for the generator and discriminator."
    )

    options_parser.add_argument(
        "--discriminator_hidden_sizes",
        type=str,
        default="100",
        help="Size of each hidden layer in the discriminator separated by commas (no spaces)."
    )

    options_parser.add_argument(
        "--num_autoencoder_steps",
        type=int,
        default=1,
        help="Number of successive training steps for the autoencoder."
    )

    options_parser.add_argument(
        "--num_discriminator_steps",
        type=int,
        default=1,
        help="Number of successive training steps for the discriminator."
    )

    options_parser.add_argument(
        "--num_generator_steps",
        type=int,
        default=1,
        help="Number of successive training steps for the generator."
    )

    options_parser.add_argument(
        "--autoencoder_noise_radius",
        type=float,
        default=0,
        help="Gaussian noise standard deviation for the latent code (autoencoder regularization)."
    )

    options_parser.add_argument(
        "--autoencoder_noise_anneal",
        type=float,
        default=0.995,
        help="Anneal the noise radius by this value after every epoch."
    )

    options_parser.add_argument(
        "--temperature",
        type=float,
        default=None,
        help="Gumbel-Softmax temperature."
    )

    options_parser.add_argument(
        "--penalty",
        type=float,
        default=0.1,
        help="WGAN-GP gradient penalty lambda."
    )

    options_parser.add_argument("--seed", type=int, help="Random number generator seed.", default=42)

    options = options_parser.parse_args()

    if options.seed is not None:
        np.random.seed(options.seed)
        torch.manual_seed(options.seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(options.seed)

    features = loaders[options.data_format](options.data)
    data = Dataset(features)
    train_data, val_data = data.split(1.0 - options.validation_proportion)

    if options.metadata is not None and options.temperature is not None:
        variable_sizes = load_variable_sizes_from_metadata(options.metadata)
        temperature = options.temperature
    else:
        variable_sizes = None
        temperature = None

    autoencoder = AutoEncoder(
        features.shape[1],
        code_size=options.code_size,
        encoder_hidden_sizes=parse_int_list(options.encoder_hidden_sizes),
        decoder_hidden_sizes=parse_int_list(options.decoder_hidden_sizes),
        variable_sizes=variable_sizes
    )

    load_or_initialize(autoencoder, options.input_autoencoder)

    generator = Generator(
        options.noise_size,
        options.code_size,
        hidden_sizes=parse_int_list(options.generator_hidden_sizes),
        bn_decay=options.bn_decay
    )

    load_or_initialize(generator, options.input_generator)

    discriminator = Discriminator(
        options.code_size,
        hidden_sizes=parse_int_list(options.discriminator_hidden_sizes),
        bn_decay=0,  # no batch normalization for the critic
        critic=True
    )

    load_or_initialize(discriminator, options.input_discriminator)

    train(
        autoencoder,
        generator,
        discriminator,
        train_data,
        val_data,
        options.output_autoencoder,
        options.output_generator,
        options.output_discriminator,
        options.output_loss,
        batch_size=options.batch_size,
        start_epoch=options.start_epoch,
        num_epochs=options.num_epochs,
        num_ae_steps=options.num_autoencoder_steps,
        num_disc_steps=options.num_discriminator_steps,
        num_gen_steps=options.num_generator_steps,
        noise_size=options.noise_size,
        l2_regularization=options.l2_regularization,
        learning_rate=options.learning_rate,
        ae_noise_radius=options.autoencoder_noise_radius,
        ae_noise_anneal=options.autoencoder_noise_anneal,
        variable_sizes=variable_sizes,
        temperature=temperature,
        penalty=options.penalty
    )