Esempio n. 1
0
def main(cfg, num_workers):
    # Shortened
    out_dir = cfg['training']['out_dir']
    batch_size = cfg['training']['batch_size']
    utils.save_config(os.path.join(out_dir, 'config.yml'), cfg)

    model_selection_metric = cfg['training']['model_selection_metric']
    model_selection_sign = 1 if cfg['training'][
        'model_selection_mode'] == 'maximize' else -1

    # Output directory
    utils.cond_mkdir(out_dir)

    # Dataset
    test_dataset = config.get_dataset('test', cfg)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size,
                                              num_workers=num_workers,
                                              shuffle=False)

    # Model
    model = config.get_model(cfg)
    trainer = config.get_trainer(model, None, cfg)

    # Print model
    print(model)
    logger = logging.getLogger(__name__)
    logger.info(
        f'Total number of parameters: {sum(p.numel() for p in model.parameters())}'
    )

    ckp = checkpoints.CheckpointIO(out_dir, model, None, cfg)
    try:
        load_dict = ckp.load('model_best.pt')
        logger.info('Model loaded')
    except FileExistsError:
        logger.info('Model NOT loaded')
        load_dict = dict()

    metric_val_best = load_dict.get('loss_val_best',
                                    -model_selection_sign * np.inf)

    logger.info(
        f'Current best validation metric ({model_selection_metric}): {metric_val_best:.6f}'
    )

    eval_dict = trainer.evaluate(test_loader)
    metric_val = eval_dict[model_selection_metric]
    logger.info(
        f'Validation metric ({model_selection_metric}): {metric_val:.8f}')

    eval_dict_path = os.path.join(out_dir, 'eval_dict.yml')
    with open(eval_dict_path, 'w') as f:
        yaml.dump(config, f)

    print(f'Results saved in {eval_dict_path}')
        coord.join(threads)
        summary_writer.close()
        pbar.close()


if __name__ == "__main__":
    parser = build_parser()
    FLAGS = parser.parse_args()
    FLAGS.model = FLAGS.model.upper()
    FLAGS.dataset = FLAGS.dataset.lower()
    if FLAGS.name is None:
        FLAGS.name = FLAGS.model.lower()
    config.pprint_args(FLAGS)

    # get information for dataset
    dataset_pattern, n_examples = config.get_dataset(FLAGS.dataset)

    # input pipeline
    X = input_pipeline(dataset_pattern,
                       batch_size=FLAGS.batch_size,
                       num_threads=FLAGS.num_threads,
                       num_epochs=FLAGS.num_epochs)
    model = config.get_model(FLAGS.model, FLAGS.name, training=True)
    train(model=model,
          input_op=X,
          num_epochs=FLAGS.num_epochs,
          batch_size=FLAGS.batch_size,
          n_examples=n_examples,
          renew=FLAGS.renew)
Esempio n. 3
0
import tensorflow as tf
import os
from config import get_dataset, get_models
import numpy as np
from model import *
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
max_epsilon = 25.0
num_iter = 32
momentum = 1
configs = {'batch_size': 64, 'epoch': 5, 'kernel_size': 7}
(X_train, y_train), (X_test, y_test) = get_dataset()
to_categorical = tf.keras.utils.to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
ind = np.random.permutation(X_train.shape[0])
X_train, y_train = X_train[ind], y_train[ind]

VALIDATION_SPLIT = 0.1
n = int(X_train.shape[0] * (1 - VALIDATION_SPLIT))
X_valid = X_train[n:]
X_train = X_train[:n]
y_valid = y_train[n:]
y_train = y_train[:n]

# model_index = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
# models = get_models(model_index)


class Dummy:
    pass
Esempio n. 4
0
    merged_samples = utils.merge(fake_samples, size=shape)
    fn = "{:0>6d}.png".format(global_step)
    scipy.misc.imsave(os.path.join(dir_name, fn), merged_samples)


if __name__ == "__main__":
    parser = build_parser()
    FLAGS = parser.parse_args()
    FLAGS.model = FLAGS.model.upper()
    FLAGS.dataset = FLAGS.dataset.lower()
    if FLAGS.name is None:
        FLAGS.name = FLAGS.model.lower()
    config.pprint_args(FLAGS)

    # get information for dataset
    dataset_pattern, n_examples, n_channels = config.get_dataset(FLAGS.dataset)

    # input pipeline
    X = input_pipeline(dataset_pattern,
                       batch_size=FLAGS.batch_size,
                       num_threads=FLAGS.num_threads,
                       num_epochs=FLAGS.num_epochs,
                       image_size=FLAGS.image_size,
                       dataset=FLAGS.dataset)

    # Arbitrarily sized crops will be resized to 64x64x3. Model will be constructed accordingly

    image_shape = [FLAGS.image_size, FLAGS.image_size, n_channels]
    batch_norm = True

    if FLAGS.name == 'dragan' or FLAGS.name == 'dcgan-cons':
Esempio n. 5
0
def main(outdir):
    for subdir in ['all', 'snapshots', 'clusters']:
        if not os.path.exists(os.path.join(outdir, subdir)):
            os.makedirs(os.path.join(outdir, subdir), exist_ok=True)

    if data_type == 'grid':
        get_data = inputs.get_data_grid
        percent_good = evaluation.percent_good_grid
    elif data_type == 'ring':
        get_data = inputs.get_data_ring
        percent_good = evaluation.percent_good_ring
    else:
        raise NotImplementedError()

    zdist = distributions.Normal(torch.zeros(z_dim, device=device),
                                 torch.ones(z_dim, device=device))
    z_test = zdist.sample((test_batch_size, ))

    x_test, y_test = get_test(get_data=get_data,
                              batch_size=test_batch_size,
                              variance=variance,
                              k_value=k_value,
                              device=device)

    x_cluster, _ = get_test(get_data=get_data,
                            batch_size=10000,
                            variance=variance,
                            k_value=k_value,
                            device=device)

    train_loader = get_dataset(get_data=get_data,
                               batch_size=train_batch_size,
                               npts=npts,
                               variance=variance,
                               k_value=k_value)

    def train(trainer, g, d, clusterer, exp_dir):
        it = 0
        if os.path.exists(os.path.join(exp_dir, 'log.txt')):
            os.remove(os.path.join(exp_dir, 'log.txt'))

        for epoch in range(nepochs):
            for x_real, y in train_loader:
                z = zdist.sample((train_batch_size, ))
                x_real, y = x_real.to(device), y.to(device)
                y = clusterer.get_labels(x_real, y)

                dloss, _ = trainer.discriminator_trainstep(x_real, y, z)
                gloss = trainer.generator_trainstep(y, z)

                if it % args.recluster_every == 0 and args.clusterer != 'supervised':
                    if args.clusterer != 'burnin' or it >= args.burnin_time:
                        clusterer.recluster(discriminator, x_batch=x_real)

                if it % 1000 == 0:
                    x_fake = g(z_test, clusterer.get_labels(
                        x_test, y_test)).detach().cpu().numpy()

                    visualize_generated(x_fake,
                                        x_test.detach().cpu().numpy(), y, it,
                                        exp_dir)

                    visualize_clusters(x_test.detach().cpu().numpy(),
                                       clusterer.get_labels(x_test, y_test),
                                       it, exp_dir)

                    torch.save(
                        {
                            'generator': g.state_dict(),
                            'discriminator': d.state_dict(),
                            'g_optimizer': g_optimizer.state_dict(),
                            'd_optimizer': d_optimizer.state_dict()
                        },
                        os.path.join(exp_dir, 'snapshots', 'model_%d.pt' % it))

                if it % 1000 == 0:
                    g.eval()
                    d.eval()

                    x_fake = g(z_test, clusterer.get_labels(
                        x_test, y_test)).detach().cpu().numpy()
                    percent, modes, kl = percent_good(x_fake, var=variance)
                    log_message = f'[epoch {epoch} it {it}] dloss = {dloss}, gloss = {gloss}, prop_real = {percent}, modes = {modes}, kl = {kl}'
                    with open(os.path.join(exp_dir, 'log.txt'), 'a+') as f:
                        f.write(log_message + '\n')
                    print(log_message)

                it += 1

    # train a G/D from scratch
    generator, discriminator = get_models(args.model_type, 'conditional',
                                          num_clusters, args.d_act_dim, device)
    g_optimizer, d_optimizer = get_optimizers(generator, discriminator)
    trainer = Trainer(generator,
                      discriminator,
                      g_optimizer,
                      d_optimizer,
                      gan_type='standard',
                      reg_type='none',
                      reg_param=0)
    clusterer = clusterer_dict[args.clusterer](discriminator=discriminator,
                                               k_value=num_clusters,
                                               x_cluster=x_cluster)
    clusterer.recluster(discriminator=discriminator)
    train(trainer, generator, discriminator, clusterer, os.path.join(outdir))
def main(cnf=None):
    config.load_config(cnf)

    print "Responding to emails"

    # Connect and grab the 10 latest emails
    messages = get_latest_messages()

    # Iterate and process
    for message in messages:
        print "----"

        to = message['Delivered-to']
        frm = message['From']
        email = raw_email(frm)

        if not email_allowed(email):
            print "{} is not an allowed address".format(email)
            continue

        if not '+' in to:
            print "Generating schedule for {}".format(email)
            generate_schedule(email)
            continue

        # We need to extract the token from the email address (first+TOKEN@....)
        # and ensure it matches the email address we received.
        token = token_from_email(to)

        print "Looking for token - {}".format(token)
        success, record = validate_token(token, frm)
        if not success:
            # Notify user of failure?
            print "FAILED to find a record for the token"
            continue

        dataset = record['dataset']
        date = record.get('date')

        print "Looking for URL to add to {}".format(dataset)
        process = None
        payloads = message.get_payload()

        (payload, ) = [
            p for p in payloads if p.get_content_type() == "text/plain"
        ]

        m = [
            r for r in re.findall(URL_REGEX,
                                  payload.as_string()[0:1024])
            if not config.ckan_host() in r
        ]
        if not m:
            print "Could not find any URLs"
            continue

        first_url = m[0]
        print "Processing first URL: {} and adding to {}".format(
            first_url, dataset)

        res = config.ckan().action.resource_create(
            **{
                'package_id': dataset,
                'url': first_url,
                'description': 'CSV',
                'format': 'CSV',
                'date': datetime.now().strftime("%d/%m/%Y")
            })

        print res

        print "Sending success message"
        send_success(email, config.this_email(), config.get_dataset(dataset))
        invalidate_token(token)
Esempio n. 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-n', '--network-name', type=str, required=True)
    parser.add_argument('-d', '--dataset-name', type=str, required=True)
    parser.add_argument('-c', '--num-classes', type=int, required=True)
    parser.add_argument('-m', '--multilabel', type=bool, default=False)
    parser.add_argument('-p', '--pretrained', type=bool, default=False)
    parser.add_argument('-l', '--load')
    parser.add_argument('--batchSz', type=int, default=64)
    parser.add_argument('--nEpochs', type=int, default=300)
    parser.add_argument('--sEpoch', type=int, default=1)
    parser.add_argument('--no-cuda', action='store_true')
    parser.add_argument('--save')
    parser.add_argument('--seed', type=int, default=50)
    parser.add_argument('--opt', type=str, default='sgd', choices=('sgd', 'adam', 'rmsprop'))
    args = parser.parse_args()

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    args.save = args.save or 'work/%s/%s' % (args.network_name, args.dataset_name)
    setproctitle.setproctitle(args.save)

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    if os.path.exists(args.save):
        shutil.rmtree(args.save)
    os.makedirs(args.save, exist_ok=True)

    normMean = [0.49139968, 0.48215827, 0.44653124]
    normStd = [0.24703233, 0.24348505, 0.26158768]
    normTransform = transforms.Normalize(normMean, normStd)

    trainTransform = transforms.Compose([
        transforms.Scale(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
	RandomBrightness(-0.25, 0.25),
        normTransform
    ])
    testTransform = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normTransform
    ])

    kwargs = {'num_workers': 6, 'pin_memory': True} if args.cuda else {}
    trainLoader = DataLoader(
        config.get_dataset(args.dataset_name, 'train', trainTransform),
        batch_size=args.batchSz, shuffle=True, **kwargs)
    testLoader = DataLoader(
        config.get_dataset(args.dataset_name, 'test', testTransform),
        batch_size=args.batchSz, shuffle=False, **kwargs)

    if args.load:
        print("Loading network: {}".format(args.load))
        net = torch.load(args.load)
    else:
        net = config.get_network(args.network_name, args.num_classes, args.pretrained)
    
    if True: # make this an optional
        net = torch.nn.DataParallel(net)
    
    print('  + Number of params: {}'.format(sum([p.data.nelement() for p in net.parameters()])))
    if args.cuda:
        net = net.cuda().half()

    if args.opt == 'sgd':
        optimizer = optim.SGD(net.parameters(), lr=1e-1, momentum=0.9, weight_decay=1e-4)
    elif args.opt == 'adam':
        optimizer = optim.Adam(net.parameters(), weight_decay=1e-4)
    elif args.opt == 'rmsprop':
        optimizer = optim.RMSprop(net.parameters(), weight_decay=1e-4)

    trainF = open(os.path.join(args.save, 'train.csv'), 'a')
    testF = open(os.path.join(args.save, 'test.csv'), 'a')

    for epoch in range(args.sEpoch, args.nEpochs + args.sEpoch):
        adjust_opt(args.opt, optimizer, epoch)
        train(args, epoch, net, trainLoader, optimizer, trainF)
        test(args, epoch, net, testLoader, optimizer, testF)
        torch.save(net, os.path.join(args.save, '%d.pth' % epoch))

    trainF.close()
    testF.close()
Esempio n. 8
0
def main(cfg, num_workers):
    # Shortened
    out_dir = cfg['training']['out_dir']
    batch_size = cfg['training']['batch_size']
    backup_every = cfg['training']['backup_every']
    utils.save_config(os.path.join(out_dir, 'config.yml'), cfg)

    model_selection_metric = cfg['training']['model_selection_metric']
    model_selection_sign = 1 if cfg['training'][
        'model_selection_mode'] == 'maximize' else -1

    # Output directory
    utils.cond_mkdir(out_dir)

    # Dataset
    train_dataset = config.get_dataset('train', cfg)
    val_dataset = config.get_dataset('val', cfg)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               num_workers=num_workers,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             num_workers=num_workers,
                                             shuffle=False)

    # Model
    model = config.get_model(cfg)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    trainer = config.get_trainer(model, optimizer, cfg)

    # Print model
    print(model)
    logger = logging.getLogger(__name__)
    logger.info(
        f'Total number of parameters: {sum(p.numel() for p in model.parameters())}'
    )

    # load pretrained model
    tb_logger = tensorboardX.SummaryWriter(os.path.join(out_dir, 'logs'))
    ckp = checkpoints.CheckpointIO(out_dir, model, optimizer, cfg)
    try:
        load_dict = ckp.load('model_best.pt')
        logger.info('Model loaded')
    except FileExistsError:
        logger.info('Model NOT loaded')
        load_dict = dict()

    epoch_it = load_dict.get('epoch_it', -1)
    it = load_dict.get('it', -1)
    metric_val_best = load_dict.get('loss_val_best',
                                    -model_selection_sign * np.inf)

    logger.info(
        f'Current best validation metric ({model_selection_metric}): {metric_val_best:.6f}'
    )

    # Shortened
    print_every = cfg['training']['print_every']
    validate_every = cfg['training']['validate_every']
    max_iterations = cfg['training']['max_iterations']
    max_epochs = cfg['training']['max_epochs']

    while True:
        epoch_it += 1

        for batch in train_loader:
            it += 1
            loss_dict = trainer.train_step(batch)
            loss = loss_dict['total_loss']
            for k, v in loss_dict.items():
                tb_logger.add_scalar(f'train/{k}', v, it)

            # Print output
            if print_every > 0 and (it % print_every) == 0:
                logger.info(
                    f'[Epoch {epoch_it:02d}] it={it:03d}, loss={loss:.8f}')

            # Backup if necessary
            if backup_every > 0 and (it % backup_every) == 0:
                logger.info('Backup checkpoint')
                ckp.save(f'model_{it:d}.pt',
                         epoch_it=epoch_it,
                         it=it,
                         loss_val_best=metric_val_best)

            # Run validation
            if validate_every > 0 and (it % validate_every) == 0:
                eval_dict = trainer.evaluate(val_loader)
                print('eval_dict=\n', eval_dict)
                metric_val = eval_dict[model_selection_metric]
                logger.info(
                    f'Validation metric ({model_selection_metric}): {metric_val:.8f}'
                )

                for k, v in eval_dict.items():
                    tb_logger.add_scalar(f'val/{k}', v, it)

                if model_selection_sign * (metric_val - metric_val_best) > 0:
                    metric_val_best = metric_val
                    logger.info(f'New best model (loss {metric_val_best:.8f}')
                    ckp.save('model_best.pt',
                             epoch_it=epoch_it,
                             it=it,
                             loss_val_best=metric_val_best)

            if (0 < max_iterations <= it) or (0 < max_epochs <= epoch_it):
                logger.info(
                    f'Maximum iteration/epochs ({epoch_it}/{it}) reached. Exiting.'
                )
                ckp.save(f'model_{it:d}.pt',
                         epoch_it=epoch_it,
                         it=it,
                         loss_val_best=metric_val_best)
                exit(3)