Ejemplo n.º 1
0
def main():
    # define the command line arguments
    g_help = "teacher + student activation function: 'erf' or 'relu'"
    M_help = "number of teacher hidden nodes"
    K_help = "number of student hidden nodes"
    device_help = "which device to run on: 'cuda' or 'cpu'"
    generator_help = "Generator of the inputs: dcgan_rand, dcgan_cifar10, dcgan_cifar100_grey, nvp_cifar10."
    transform_help = "Transform: identity, scattering, ..."
    steps_help = "training steps as multiples of N"
    seed_help = "random number generator seed."
    parser = argparse.ArgumentParser()
    parser.add_argument("-g", "--g", default="erf", help=g_help)
    parser.add_argument("-M", "--M", type=int, default=2, help=M_help)
    parser.add_argument("-K", "--K", type=int, default=2, help=K_help)
    parser.add_argument("--generator", help=generator_help, default="rand")
    parser.add_argument("--transform", help=transform_help)
    parser.add_argument("--device", "-d", help=device_help)
    parser.add_argument("--lr", type=float, default=0.2, help="learning rate")
    parser.add_argument("--bs", type=int, default=1, help="mini-batch size")
    parser.add_argument("--steps", type=int, default=10000, help=steps_help)
    parser.add_argument("-q", "--quiet", help="be quiet", action="store_true")
    parser.add_argument("-s", "--seed", type=int, default=0, help=seed_help)
    parser.add_argument("--store",
                        action="store_true",
                        help="store initial conditions")
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    if args.device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    else:
        device = torch.device(args.device)

    (M, K, lr) = (args.M, args.K, args.lr)

    # Find the right generator for the given scenario
    generator = utils.get_generator(args.generator, device)
    # transformation of the inputs
    transformation = utils.get_transformation(args.transform, generator,
                                              device)

    model_desc = generator.name()
    if transformation is not None:
        model_desc += "_" + transformation.name()

    # Define the dimensions of the problem
    D = generator.N_in
    N = generator.N_out if transformation is None else transformation.N_out

    # get the moments of the generator to center its outputs
    try:
        generator_mean_vec = torch.load("moments/%s_mean_x.pt" %
                                        generator.name(),
                                        map_location=device)
        generator_cov = torch.load("moments/%s_omega.pt" % generator.name(),
                                   map_location=device)
    except FileNotFoundError:
        print("Could not find moments of generator %s. Will exit now!" %
              generator.name())
        exit()
    # define the scalar moments of the generator's output distribution
    generator_mean, generator_std = utils.get_scalar_mean_std(
        generator_mean_vec, generator_cov)

    # Now get the moments of the inputs that come out of the transformation
    transformation_mean = None
    transformation_std = None
    # Either load pre-computed Omega and Phi, or generate from the test set
    Omega = None  # the student input - input covariance
    Phi = None  # the generator input - student input covariance
    try:
        mean_x = torch.load(
            "moments/%s_mean_x.pt" % model_desc,
            map_location=device,
        )
        Omega = torch.load(
            "moments/%s_Omega.pt" % model_desc,
            map_location=device,
        )
        Phi = torch.load(
            "moments/%s_phi.pt" % model_desc,
            map_location=device,
        )

        transformation_mean, transformation_std = utils.get_scalar_mean_std(
            mean_x, Omega)
    except FileNotFoundError:
        pass

    # networks and loss
    g = erfscaled if args.g == "erf" else F.relu
    gs = (g, identity)
    student = TwoLayer(gs, N, args.K, 1, normalise1=True, std0=1e-2)
    student.to(device)

    teacher = TwoLayer(gs, D, args.M, 1, normalise1=True, std0=1)
    nn.init.constant_(teacher.fc2.weight, 1)
    teacher.freeze()
    teacher.to(device)
    B = teacher.fc1.weight.data
    A = teacher.fc2.weight.data

    # collect the parameters that are going to be optimised by SGD
    params = []
    params += [{"params": student.fc1.parameters()}]
    # If we train the last layer, ensure its learning rate scales correctly
    params += [{"params": student.fc2.parameters(), "lr": lr / N}]
    optimizer = optim.SGD(params, lr=lr)
    criterion = HalfMSELoss()

    # when to print?
    end = torch.log10(torch.tensor([1.0 * args.steps])).item()
    times_to_print = list(torch.logspace(-1, end, steps=200))

    # generate the test set
    test_cs, test_xs, test_ys = utils.get_samples(
        device,
        NUM_TESTSAMPLES,
        generator,
        generator_mean,
        teacher,
        transformation,
        transformation_mean,
    )

    # If we didn't found a pre-computed Omega and Phi (which we need to store the
    # initial conditions), we can compute them from the test set
    if Omega is None:
        Omega = 1 / NUM_TESTSAMPLES * test_xs.T @ test_xs
        Phi = 1 / NUM_TESTSAMPLES * test_xs.T @ test_cs

    nus = B.mm(test_cs.T) / math.sqrt(D)

    # output file + welcome message
    log_fname = "transform_online_%s_D%d_N%d_%s_M%d_K%d_lr%g_i2_s%d.dat" % (
        model_desc,
        D,
        N,
        args.g,
        M,
        K,
        lr,
        args.seed,
    )
    logfile = open(log_fname, "w", buffering=1)
    welcome = "# Two-layer nets on inputs from generator %s" % generator.name()
    if transformation is None:
        welcome += "\n"
    else:
        welcome += " with transformation %s\n" % transformation.name()
    welcome += "# M=%d, K=%d, lr=%g, batch size=%d, seed=%d\n" % (
        M,
        K,
        lr,
        args.bs,
        args.seed,
    )
    welcome += "# Using device:" + str(device)
    log(welcome, logfile)

    print("# Generator, Teacher and Student: ")
    for net in [generator, teacher, student]:
        msg = "# " + str(net).replace("\n", "\n# ")
        log(msg, logfile)

    msg = "# test xs: mean=%g, std=%g; test ys: std=%g" % (
        torch.mean(test_xs),
        torch.std(test_xs),
        torch.std(test_ys),
    )
    log(msg, logfile)

    T = 1.0 / B.shape[1] * B @ B.T
    rotation = Phi.T @ Phi
    tildeT = 1 / N * B @ rotation @ B.T
    if args.store:
        with torch.no_grad():
            # compute the exact densities of r and q
            exq = torch.zeros((K, K, N), device=device)
            exr = torch.zeros((K, M, N), device=device)
            extildet = torch.zeros((M, M, N), device=device)
            sqrtN = math.sqrt(N)
            w = student.fc1.weight.data
            v = student.fc2.weight.data

            rhos, psis = torch.symeig(Omega, eigenvectors=True)
            rhos.to(device)
            psis.to(device)
            #  make sure to normalise, orient evectors according to the note
            psis = sqrtN * psis.T

            GammaB = 1.0 / sqrtN * B @ Phi.T @ psis.T
            GammaW = 1.0 / sqrtN * w @ psis.T

            for k in range(K):
                for l in range(K):
                    exq[k, l] = GammaW[k, :] * GammaW[l, :]
                for n in range(M):
                    exr[k, n] = GammaW[k, :] * GammaB[n, :]
            for n in range(M):
                for m in range(M):
                    extildet[n, m] = GammaB[n, :] * GammaB[m, :]

            root_name = log_fname[:-4]
            np.savetxt(root_name + "_T.dat", T.cpu().numpy(), delimiter=",")
            np.savetxt(root_name + "_rhos.dat",
                       rhos.cpu().numpy(),
                       delimiter=",")
            np.savetxt(root_name + "_T.dat", T.cpu().numpy(), delimiter=",")
            np.savetxt(root_name + "_A.dat", A[0].cpu().numpy(), delimiter=",")
            np.savetxt(root_name + "_v0.dat",
                       v[0].cpu().numpy(),
                       delimiter=",")

            write_density(root_name + "_q0.dat", exq)
            write_density(root_name + "_r0.dat", exr)
            write_density(root_name + "_tildet.dat", extildet)

    time = 0
    dt = 1 / N

    msg = eval_student(time, student, test_xs, test_ys, nus, T, tildeT, A,
                       criterion)
    log(msg, logfile)
    while len(times_to_print) > 0:
        # get the inputs
        cs, inputs, targets = utils.get_samples(
            device,
            args.bs,
            generator,
            generator_mean,
            teacher,
            transformation,
            transformation_mean,
        )

        for i in range(args.bs):
            student.train()
            preds = student(inputs[i])
            loss = criterion(preds, targets[i])

            # TRAINING
            student.zero_grad()
            loss.backward()
            optimizer.step()

            time += dt

            if time >= times_to_print[0].item() or time == 0:
                msg = eval_student(time, student, test_xs, test_ys, nus, T,
                                   tildeT, A, criterion)
                log(msg, logfile)
                times_to_print.pop(0)

    print("Bye-bye")
from visual_model_selector import ModelFactory
from configs import argHandler  # Import the default arguments
from utils import set_gpu_usage, get_multilabel_evaluation_metrics, get_generator, get_evaluation_metrics
from tensorflow.keras.models import load_model
from tensorflow.keras import metrics
import os

FLAGS = argHandler()
FLAGS.setDefaults()

set_gpu_usage(FLAGS.gpu_percentage)

model_factory = ModelFactory()

train_generator = get_generator(FLAGS.train_csv, FLAGS)
test_generator = get_generator(FLAGS.test_csv, FLAGS)

if FLAGS.load_model_path != '' and FLAGS.load_model_path is not None:
    visual_model = load_model(FLAGS.load_model_path)
    if FLAGS.show_model_summary:
        visual_model.summary()
else:
    visual_model = model_factory.get_model(FLAGS)


def get_metrics_from_generator(generator,
                               threshold_range=(0.01, 0.99),
                               verbose=1):
    y_hat = visual_model.predict_generator(
        generator,
    os.makedirs(write_path)
except:
    print("path already exists")

set_gpu_usage(FLAGS.gpu_percentage)

model_factory = ModelFactory()

if FLAGS.load_model_path != '' and FLAGS.load_model_path is not None:
    visual_model = load_model(FLAGS.load_model_path)
    if FLAGS.show_model_summary:
        visual_model.summary()
else:
    visual_model = model_factory.get_model(FLAGS)
FLAGS.batch_size = 1
test_generator = get_generator(FLAGS.test_csv, FLAGS)

images_names = test_generator.x_path

for batch_i in tqdm(range(test_generator.steps)):
    batch, _ = test_generator.__getitem__(batch_i)
    image_path = os.path.join(FLAGS.image_directory, images_names[batch_i])
    original = cv2.imread(image_path)
    if original is None:
        print(f"There was an error loading {image_path} using opencv")
        continue
    preds = visual_model.predict(batch)
    predicted_class = np.argmax(preds[0])
    label = FLAGS.classes[predicted_class]
    cam = GradCAM(visual_model, predicted_class)
    heatmap = cam.compute_heatmap(batch)
Ejemplo n.º 4
0
import loss

if __name__ == '__main__':
    # Make directory to save plots
    path = os.path.join(
        os.getcwd(), 'plots', args.loss + ("_top_k" if args.topk else "") +
        ("_sn" if args.spectral_norm else "") +
        ("_clip" if args.clip_weights else ""))
    os.makedirs(path, exist_ok=True)
    # Init hyperparameters
    fixed_generator_noise: torch.Tensor = torch.randn(
        [args.samples // 10, args.latent_size], device=args.device)
    # Get data
    data: torch.Tensor = utils.get_data(samples=args.samples).to(args.device)
    # Get generator
    generator: nn.Module = utils.get_generator(latent_size=args.latent_size)
    # Get discriminator
    discriminator: nn.Module = utils.get_discriminator(
        use_spectral_norm=args.spectral_norm)
    # Init Loss function
    if args.loss == 'standard':
        loss_generator: nn.Module = loss.GANLossGenerator()
        loss_discriminator: nn.Module = loss.GANLossDiscriminator()
    elif args.loss == 'non-saturating':
        loss_generator: nn.Module = loss.NSGANLossGenerator()
        loss_discriminator: nn.Module = loss.NSGANLossDiscriminator()
    elif args.loss == 'hinge':
        loss_generator: nn.Module = loss.HingeGANLossGenerator()
        loss_discriminator: nn.Module = loss.HingeGANLossDiscriminator()
    elif args.loss == 'wasserstein':
        loss_generator: nn.Module = loss.WassersteinGANLossGenerator()
Ejemplo n.º 5
0
from tensorflow.keras import metrics
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, TensorBoard, CSVLogger
import os
from tensorflow.keras.models import load_model
from augmenter import augmenter
from auroc import MultipleClassAUROC
import json

FLAGS = argHandler()
FLAGS.setDefaults()

model_factory = ModelFactory()

# load training and test set file names

train_generator = get_generator(FLAGS.train_csv, FLAGS, augmenter)
test_generator = get_generator(FLAGS.test_csv, FLAGS)

class_weights = None
if FLAGS.use_class_balancing:
    if FLAGS.multi_label_classification:
        class_weights = get_multilabel_class_weights(
            train_generator.y, FLAGS.positive_weights_multiply)
    else:
        class_weights = get_class_weights(train_generator.get_class_counts(),
                                          FLAGS.positive_weights_multiply)

# load classifier from saved weights or get a new one
training_stats = {}
learning_rate = FLAGS.learning_rate
Ejemplo n.º 6
0
train_X, train_Y, train_F, train_S, test_X, test_Y, test_F = load_DRIVE(
    PATCH_SIZE)

# loading weights
if USE_PRETRAINED:
    path = '../weights/' + MODEL.lower() + '_' + LOSS_TYPE.lower(
    ) + '_weights.pth'
    model.load_state_dict(torch.load(path))

# training procedure
else:
    optimizer = optim.Adam(
        [p for p in model.parameters() if p.requires_grad == True], 1e-3)

    # create a train generator
    train_gen = get_generator(train_X, train_Y, train_S, PATCH_SIZE,
                              BATCH_SIZE, SEED)

    # training routine
    model.train()
    bce_loss = nn.BCELoss(reduction='none')

    for e in range(EPOCHS):

        train_loss = 0
        t1_loss = 0
        t2_loss = 0

        alpha = get_mix_coef(MIX_COEFF_INIT, MIX_COEFF_DECAY, START_DECAY,
                             MIX_COEFF_MIN, e)
        print(alpha)
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    device_help = "which device to run on: 'cuda:x' or 'cpu'"
    generator_help = "Generator of the inputs: dcgan_rand, dcgan_cifar10, dcgan_cifar100_grey, nvp_cifar10."
    transform_help = "Transform: identity, ..."
    checkpoint_help = "checkpoint every ... steps"
    seed_help = "random number generator seed."
    parser.add_argument("--generator", help=generator_help, default="rand")
    parser.add_argument("--transform", help=transform_help)
    parser.add_argument("--device", "-d", help=device_help)
    parser.add_argument("--bs", type=int, default=4096, help="batch size.")
    parser.add_argument("--steps",
                        type=int,
                        default=1e9,
                        help="number of steps")
    parser.add_argument("--checkpoint",
                        type=int,
                        default=1000,
                        help=checkpoint_help)
    parser.add_argument("-q", "--quiet", help="be quiet", action="store_true")
    parser.add_argument("-s", "--seed", type=int, default=0, help=seed_help)
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    if args.device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    else:
        device = torch.device(args.device)

    # Will use chunks of data of size (batch_size, N) or (batch_size, D) etc.
    batch_size = args.bs

    # Find the right generator...
    generator = utils.get_generator(args.generator, device)
    # ... and transformation of the inputs
    transformation = utils.get_transformation(args.transform, generator,
                                              device)

    # Define the dimensions of the problem
    D = generator.N_in
    N = generator.N_out if transformation is None else transformation.N_out
    # and its moments
    generator_mean = None
    generator_std = None

    # If we want to estimate the moments of generator + transform, load moments
    # of the generator first
    if transformation is not None:
        try:
            generator_mean_vec = torch.load("moments/%s_mean_x.pt" %
                                            generator.name(),
                                            map_location=device)
            generator_cov = torch.load("moments/%s_omega.pt" %
                                       generator.name(),
                                       map_location=device)
            generator_mean, generator_std = utils.get_scalar_mean_std(
                generator_mean_vec, generator_cov)
        except FileNotFoundError:
            print(
                "Could not find moments of generator. Can therefore not estimate "
                "moments of generator + transformation. Will exit now!")
            exit()

    max_P = args.steps * args.bs
    transform_name = "" if transformation is None else transformation.name(
    ) + "_"
    log_fname = "covariance_%s_%sP%g_s%d.dat" % (
        generator.name(),
        transform_name,
        max_P,
        args.seed,
    )
    logfile = open(log_fname, "w", buffering=1)
    welcome = "# Computing the covariance for %s" % generator.name()
    if transformation is None:
        welcome += "\n"
    else:
        welcome += " with transformation %s\n" % transformation.name()
    welcome += "# batch size=%d, seed=%d\n" % (batch_size, args.seed)
    welcome += "# Using device: %s\n" % str(device)
    welcome += "# samples, diff E c, diff E x, diff Omega, diff Phi"
    log(welcome, logfile)

    # Hold the Monte Carlo estimators computed here
    variables = ["mean_c", "mean_x", "omega", "phi"]
    mc = {
        "mean_c": torch.zeros(D).to(device),  # estimate of mean of c
        "mean_x": torch.zeros(N).to(device),  # estimate of mean of x
        "omega": torch.zeros(N, N).to(device),  # input-input covariance
        "phi": torch.zeros(N, D).to(device),  # input-latent covariance
    }
    M2_omega = torch.zeros(N, N).to(device)  # running estimate of residuals
    M2_phi = torch.zeros(N, D).to(device)  # running estimate of residuals

    # store the values of the covariance matrices at the last checkpoint
    mc_last = dict()
    for name in variables:
        mc_last[name] = torch.zeros(mc[name].shape).to(device)

    step = -1
    with torch.no_grad():
        while step < args.steps:
            for _ in tqdm(range(args.checkpoint)):
                # slighly unsual place for step increment; is to preserve the usual notation
                # when computing the current estimate of the covariance outside this loop
                step += 1

                # Generate a new batch of data
                cs, xs, _ = utils.get_samples(
                    device,
                    batch_size,
                    generator,
                    generator_mean,
                    teacher=None,
                    transformation=transformation,
                )

                # Update the estimators.
                ########################
                mc_mean_x_old = mc["mean_x"]
                # Start with the means
                dmean_c = torch.mean(cs, axis=0) - mc["mean_c"]
                mc["mean_c"] += dmean_c / (step + 1)
                dmean_x = torch.mean(xs, axis=0) - mc["mean_x"]
                mc["mean_x"] += dmean_x / (step + 1)
                # now the residuals
                M2_omega += (xs - mc_mean_x_old).T @ (
                    xs - mc["mean_x"]) / batch_size
                M2_phi += (xs - mc_mean_x_old).T @ (cs -
                                                    mc["mean_c"]) / batch_size

            mc["omega"] = M2_omega / (step + 1)
            mc["phi"] = M2_phi / (step + 1)

            # Build status message
            status = "%g" % (step * args.bs)
            for name in variables:
                diff = torch.sqrt(torch.mean((mc[name] - mc_last[name])**2))
                status += ", %g" % diff

            log(status, logfile)

            # Write the estimates to files
            for name in variables:
                fname = log_fname[:-4] + ("_%s_%g.pt" %
                                          (name, step * batch_size))
                torch.save(mc[name], fname)

            for name in variables:
                mc_last[name] = mc[name].clone().detach()

        # Write the estimates to files
        for name in variables:
            fname = log_fname[:-4] + ("_%s_%g.pt" % (name, step * batch_size))
            torch.save(mc[name], fname)
Ejemplo n.º 8
0
def main(argv):
    del argv

    utils.make_output_dir(FLAGS.output_dir)
    data_processor = utils.DataProcessor()
    images = utils.get_train_dataset(data_processor, FLAGS.dataset,
                                     FLAGS.batch_size)

    logging.info('Learning rate: %d', FLAGS.learning_rate)

    # Construct optimizers.
    optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)

    # Create the networks and models.
    generator = utils.get_generator(FLAGS.dataset)
    metric_net = utils.get_metric_net(FLAGS.dataset, FLAGS.num_measurements)

    model = cs.CS(metric_net, generator, FLAGS.num_z_iters, FLAGS.z_step_size,
                  FLAGS.z_project_method)
    prior = utils.make_prior(FLAGS.num_latents)
    generator_inputs = prior.sample(FLAGS.batch_size)

    model_output = model.connect(images, generator_inputs)
    optimization_components = model_output.optimization_components
    debug_ops = model_output.debug_ops
    reconstructions, _ = utils.optimise_and_sample(generator_inputs,
                                                   model,
                                                   images,
                                                   is_training=False)

    global_step = tf.train.get_or_create_global_step()
    update_op = optimizer.minimize(optimization_components.loss,
                                   var_list=optimization_components.vars,
                                   global_step=global_step)

    sample_exporter = file_utils.FileExporter(
        os.path.join(FLAGS.output_dir, 'reconstructions'))

    # Hooks.
    debug_ops['it'] = global_step
    # Abort training on Nans.
    nan_hook = tf.train.NanTensorHook(optimization_components.loss)
    # Step counter.
    step_conter_hook = tf.train.StepCounterHook()

    checkpoint_saver_hook = tf.train.CheckpointSaverHook(
        checkpoint_dir=utils.get_ckpt_dir(FLAGS.output_dir), save_secs=10 * 60)

    loss_summary_saver_hook = tf.train.SummarySaverHook(
        save_steps=FLAGS.summary_every_step,
        output_dir=os.path.join(FLAGS.output_dir, 'summaries'),
        summary_op=utils.get_summaries(debug_ops))

    hooks = [
        checkpoint_saver_hook, nan_hook, step_conter_hook,
        loss_summary_saver_hook
    ]

    if FLAGS.phase == 'train':
        # Start training.
        with tf.train.MonitoredSession(hooks=hooks) as sess:
            logging.info('starting training')

            for i in range(FLAGS.num_training_iterations):
                sess.run(update_op)

                if i % FLAGS.export_every == 0:
                    reconstructions_np, data_np = sess.run(
                        [reconstructions, images])
                    # Create an object which gets data and does the processing.
                    data_np = data_processor.postprocess(data_np)
                    reconstructions_np = data_processor.postprocess(
                        reconstructions_np)
                    sample_exporter.save(reconstructions_np, 'reconstructions')
                    sample_exporter.save(data_np, 'data')
    else:
        saver = tf.train.Saver()
        # Start testing
        with tf.Session() as sess:

            init_op = tf.global_variables_initializer()
            sess.run(init_op)

            print(" [*] Reading checkpoint...")
            checkpoint_dir = utils.get_ckpt_dir(FLAGS.output_dir)

            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
                saver.restore(sess, os.path.join(checkpoint_dir, ckpt_name))

            reconstructions_np, data_np = sess.run([reconstructions, images])
            # Create an object which gets data and does the processing.
            data_np = data_processor.postprocess(data_np)
            reconstructions_np = data_processor.postprocess(reconstructions_np)
            sample_exporter.save(reconstructions_np, 'reconstructions')
            sample_exporter.save(data_np, 'data')