Exemple #1
0
    def eval_on_dataset(self, data_loader):
        def reset_confidence_interval_95():
            try:
                del self.eval_acc
            except AttributeError as e:
                pass
            self.eval_acc = {"Top1Acc": [], "Top5Acc": []}

        #
        reset_confidence_interval_95()
        self._model.eval()
        set_random_seeds(0)  # always evaluate on the same dataset.
        eval_stats = DAverageMeter()
        n = len(data_loader)
        with torch.no_grad():
            for i, batch in enumerate(
                    tqdm(data_loader) if self.is_tqdm else data_loader):
                eval_stats_this = self.eval_on_batch(batch)
                eval_stats.update(eval_stats_this)
                self.eval_acc['Top1Acc'].append(eval_stats_this['Top1Acc'])
                self.eval_acc['Top5Acc'].append(eval_stats_this['Top5Acc'])
                if (i + 1) == n:
                    for k, v in self.eval_acc.items():
                        stds = torch.std(torch.tensor(v).float())
                        ci95 = 1.96 * stds / torch.sqrt(
                            torch.tensor(n).float())
                        eval_stats.update({
                            k + '_std': stds.item(),
                            k + '_cnf95': ci95.item()
                        })
        return eval_stats.average()
Exemple #2
0
def main(unused_argv):
  set_random_seeds()

  get_datapath() # The dataset path
  get_steps() # setting steps according data_size

  tf.logging.set_verbosity(tf.logging.INFO)
  print('Now the mode of this mode is {} !'.format(FLAGS.mode))

  # if log_dir is not exited, create it.
  if not os.path.exists(FLAGS.log_dir): os.makedirs(FLAGS.log_dir)

  if FLAGS.mode == 'decode':
    FLAGS.branch_batch_size = FLAGS.beam_size  # for beam search
    FLAGS.TS_mode = False

  hps = make_hps() # make a hps namedtuple

  # Vocabulary
  vocab = Vocab(hps.vocab_path, hps.vocab_size)
  # Train or Inference
  if hps.mode == 'train':
    batcher = Batcher(hps.data_path, vocab, hps)
    eval_hps = hps._replace(mode='eval')
    eval_batcher = Batcher(hps.eval_data_path, vocab, eval_hps)

    model = GSNModel(hps, vocab)
    train(model, batcher, eval_batcher, vocab, hps)
  elif hps.mode == 'decode':
    decode_mdl_hps = hps._replace(max_dec_steps=1)
    batcher = Batcher(hps.test_data_path, vocab, decode_mdl_hps)  # for test

    model = GSNModel(decode_mdl_hps, vocab)
    decoder = BeamSearchDecoder(model, batcher, vocab)
    decoder._decode()
Exemple #3
0
def test_epoch(epoch, experiment):
    testloaders, testsets = experiment.create_test_dataloaders()
    use_cuda = experiment.use_cuda
    net = experiment.net
    summaries = experiment.summaries
    criterion = experiment.criterion

    net.eval()
    utils.set_random_seeds(1234)

    with torch.no_grad():
        for i, (testloader, testname) in enumerate(testloaders):
            stats = get_stats()
            print("Testing on {}".format(testname))
            for batch_idx, input_set in enumerate(testloader):
                experiment.step = epoch * len(experiment.trainloader) + int(
                    batch_idx / len(testloader) * len(experiment.trainloader))
                experiment.iter = batch_idx
                torch.cuda.empty_cache()
                inputs, targets = input_set
                if use_cuda:
                    inputs = inputs.cuda()
                    targets = targets.cuda()
                # inputs, targets = experiment.data_preprocessing(inputs)
                # inputs, targets = Variable(inputs, requires_grad=False), Variable(targets, requires_grad=False)
                pred = torch.clamp(net(inputs), 0.0, 1.0)
                batch_loss = criterion(pred, targets)
                loss = batch_loss.mean()
                stats["loss"].update(loss.data)
                psnr_iter = metrics.psnr(pred, targets, maxval=1).mean().data
                ssim_iter = metrics.ssim(pred, targets)

                stats["psnr"].update(psnr_iter, pred.size(0))
                stats["ssim"].update(ssim_iter.data, pred.size(0))

                progress_bar(
                    batch_idx, len(testloader),
                    'Loss: %.5f | PSNR: %.2f | SSIM: %.3f' %
                    (stats["loss"].avg, stats["psnr"].avg, stats["ssim"].avg))

                # save predicted image
                learned_img = Image.fromarray(
                    (255 * pred[0, 0].cpu().data.numpy()).astype(np.uint8))
                filename = os.path.join(
                    './n3net-results', testsets[0][i].at(batch_idx).split(
                        '/home/pacole2/Projects/datasets/DeepLesionTestPreprocessed/miniStudies/'
                    )[1])
                directory = os.path.dirname(filename)
                if not os.path.exists(directory):
                    os.makedirs(directory)
                learned_img.save(os.path.join(filename))

                del pred, inputs, targets

            add_summary(experiment, summaries, testname + "/epoch", epoch)
            for k, stat in stats.items():
                add_summary(experiment, summaries, testname + "/" + k,
                            stat.avg)
Exemple #4
0
def run_one_config(opt, model_type, case_study=False):
    set_random_seeds()
    dataset = DataSet(opt, model_type)
    model_manager = ModelManager(opt)
    model, train_time = model_manager.build_model(model_type, dataset)
    evaluator = Evaluator(opt)
    metrics = evaluator.eval(model, model_type, dataset.test_loader)
    evaluator.write_performance(model_type, metrics, train_time)
    run_case_study(model, dataset, opt, case_study)
Exemple #5
0
 def train_on_dataset(self, data_loader):
     self._model.train()
     set_random_seeds(
         self.curr_epoch
     )  # train on a different subset each time and all epoches come in static order.
     train_stats = DAverageMeter()
     for i, batch in enumerate(
             tqdm(data_loader) if self.is_tqdm else data_loader):
         train_stats_this = self.train_on_batch(batch)
         train_stats.update(train_stats_this)
     return train_stats.average()
Exemple #6
0
def main():

    random_seed = 0
    num_classes = 10
    l1_regularization_strength = 0
    l2_regularization_strength = 1e-4
    learning_rate = 1e-1
    num_epochs = 200
    cuda_device = torch.device("cuda:0")
    cpu_device = torch.device("cpu:0")

    model_dir = "saved_models"
    model_filename = "resnet18_cifar10.pt"
    model_filepath = os.path.join(model_dir, model_filename)

    set_random_seeds(random_seed=random_seed)

    # Create an untrained model.
    model = create_model(num_classes=num_classes)

    train_loader, test_loader, classes = prepare_dataloader(
        num_workers=8, train_batch_size=128, eval_batch_size=256)

    # Train model.
    print("Training Model...")
    model = train_model(model=model,
                        train_loader=train_loader,
                        test_loader=test_loader,
                        device=cuda_device,
                        l1_regularization_strength=l1_regularization_strength,
                        l2_regularization_strength=l2_regularization_strength,
                        learning_rate=learning_rate,
                        num_epochs=num_epochs)

    # Save model.
    save_model(model=model, model_dir=model_dir, model_filename=model_filename)
    # Load a pretrained model.
    model = load_model(model=model,
                       model_filepath=model_filepath,
                       device=cuda_device)

    _, eval_accuracy = evaluate_model(model=model,
                                      test_loader=test_loader,
                                      device=cuda_device,
                                      criterion=None)

    classification_report = create_classification_report(
        model=model, test_loader=test_loader, device=cuda_device)

    print("Test Accuracy: {:.3f}".format(eval_accuracy))
    print("Classification Report:")
    print(classification_report)
Exemple #7
0
def run_weight_test(reset_rmsprop):
    tf.reset_default_graph()
    utils.set_random_seeds(0)
    sess = tf.Session()
    env = generic_preprocess(gym.make('Pong-v0'), max_n_noops=0)
    env.seed(0)

    with tf.variable_scope('global'):
        make_inference_network(n_actions=env.action_space.n,
                               weight_inits='glorot')
    shared_variables = tf.global_variables()

    optimizer = tf.train.RMSPropOptimizer(learning_rate=5e-4,
                                          decay=0.99,
                                          epsilon=1e-5)

    network1 = Network(scope="worker_1",
                       n_actions=env.action_space.n,
                       entropy_bonus=0.01,
                       value_loss_coef=0.5,
                       weight_inits='glorot',
                       max_grad_norm=0.5,
                       optimizer=optimizer,
                       summaries=False,
                       debug=False)
    w1 = Worker(sess=sess, env=env, network=network1, log_dir='/tmp')

    network2 = Network(scope="worker_2",
                       n_actions=env.action_space.n,
                       entropy_bonus=0.01,
                       value_loss_coef=0.5,
                       weight_inits='glorot',
                       max_grad_norm=0.5,
                       optimizer=optimizer,
                       summaries=False,
                       debug=False)
    w2 = Worker(sess=sess, env=env, network=network2, log_dir='/tmp')

    rmsprop_init_ops = [v.initializer for v in optimizer.variables()]

    sess.run(tf.global_variables_initializer())

    vars_sum_init = sess.run(get_var_sum(shared_variables))
    w1.run_update(n_steps=1)
    vars_sum_post_w1_update = sess.run(get_var_sum(shared_variables))
    if reset_rmsprop:
        sess.run(rmsprop_init_ops)
    w2.run_update(n_steps=1)
    vars_sum_post_w2_update = sess.run(get_var_sum(shared_variables))

    return vars_sum_init, vars_sum_post_w1_update, vars_sum_post_w2_update
Exemple #8
0
def test_epoch(epoch, experiment):
    testloaders = experiment.create_test_dataloaders()
    use_cuda = experiment.use_cuda
    net = experiment.net
    summaries = experiment.summaries
    criterion = experiment.criterion

    net.eval()
    utils.set_random_seeds(1234)

    with torch.no_grad():
        for testloader, testname in testloaders:
            stats = get_stats()
            print("Testing on {}".format(testname))
            for batch_idx, inputs in enumerate(testloader):
                experiment.step = epoch * len(experiment.trainloader) + int(
                    batch_idx / len(testloader) * len(experiment.trainloader))
                experiment.iter = batch_idx
                torch.cuda.empty_cache()
                if use_cuda:
                    inputs = inputs.cuda()
                inputs, targets = experiment.data_preprocessing(inputs)

                # CLAMP values to [0,1] after adding noise
                inputs = torch.clamp(inputs, min=0, max=1)

                inputs, targets = Variable(
                    inputs, requires_grad=False), Variable(targets,
                                                           requires_grad=False)
                pred = net(inputs)
                batch_loss = criterion(pred, targets)
                loss = batch_loss.mean()
                stats["loss"].update(loss.data)
                psnr_iter = metrics.psnr(pred, targets, maxval=1).mean().data
                ssim_iter = metrics.ssim(pred, targets)

                stats["psnr"].update(psnr_iter, pred.size(0))
                stats["ssim"].update(ssim_iter.data, pred.size(0))

                progress_bar(
                    batch_idx, len(testloader),
                    'Loss: %.5f | PSNR: %.2f | SSIM: %.3f' %
                    (stats["loss"].avg, stats["psnr"].avg, stats["ssim"].avg))

                del pred, inputs, targets

            add_summary(experiment, summaries, testname + "/epoch", epoch)
            for k, stat in stats.items():
                add_summary(experiment, summaries, testname + "/" + k,
                            stat.avg)
def main() -> None:
    """
    Program entry point. Parses command line arguments to decide which dataset and model to use.
    Originally written as a group for the common pipeline. Later amended by Adam Jaamour.
    :return: None.
    """
    set_random_seeds()
    parse_command_line_arguments()
    print_num_gpus_available()

    # Create label encoder.
    l_e = create_label_encoder()

    # Run in training mode.
    if config.run_mode == "train":

        print("-- Training model --\n")

        # Start recording time.
        start_time = time.time()

        # Multi-class classification (mini-MIAS dataset)
        if config.dataset == "mini-MIAS":
            # Import entire dataset.
            images, labels = import_minimias_dataset(data_dir="../data/{}/images_processed".format(config.dataset),
                                                     label_encoder=l_e)

            # Split dataset into training/test/validation sets (80/20% split).
            X_train, X_test, y_train, y_test = dataset_stratified_split(split=0.20, dataset=images, labels=labels)

            # Create CNN model and split training/validation set (80/20% split).
            model = CnnModel(config.model, l_e.classes_.size)
            X_train, X_val, y_train, y_val = dataset_stratified_split(split=0.25,
                                                                      dataset=X_train,
                                                                      labels=y_train)

            # Calculate class weights.
            class_weights = calculate_class_weights(y_train, l_e)

            # Data augmentation.
            y_train_before_data_aug = y_train
            X_train, y_train = generate_image_transforms(X_train, y_train)
            y_train_after_data_aug = y_train
            np.random.shuffle(y_train)

            if config.verbose_mode:
                print("Before data augmentation:")
                print(Counter(list(map(str, y_train_before_data_aug))))
                print("After data augmentation:")
                print(Counter(list(map(str, y_train_after_data_aug))))

            # Fit model.
            if config.verbose_mode:
                print("Training set size: {}".format(X_train.shape[0]))
                print("Validation set size: {}".format(X_val.shape[0]))
                print("Test set size: {}".format(X_test.shape[0]))
            model.train_model(X_train, X_val, y_train, y_val, class_weights)

        # Binary classification (binarised mini-MIAS dataset)
        elif config.dataset == "mini-MIAS-binary":
            # Import entire dataset.
            images, labels = import_minimias_dataset(data_dir="../data/{}/images_processed".format(config.dataset),
                                                     label_encoder=l_e)

            # Split dataset into training/test/validation sets (80/20% split).
            X_train, X_val, y_train, y_val = dataset_stratified_split(split=0.20, dataset=images, labels=labels)

            # Create CNN model and split training/validation set (80/20% split).
            model = CnnModel(config.model, l_e.classes_.size)
            # model.load_minimias_weights()
            # model.load_minimias_fc_weights()

            # Fit model.
            if config.verbose_mode:
                print("Training set size: {}".format(X_train.shape[0]))
                print("Validation set size: {}".format(X_val.shape[0]))
            model.train_model(X_train, X_val, y_train, y_val, None)

        # Binary classification (CBIS-DDSM dataset).
        elif config.dataset == "CBIS-DDSM":
            images, labels = import_cbisddsm_training_dataset(l_e)

            # Split training dataset into training/validation sets (75%/25% split).
            X_train, X_val, y_train, y_val = dataset_stratified_split(split=0.25, dataset=images, labels=labels)
            train_dataset = create_dataset(X_train, y_train)
            validation_dataset = create_dataset(X_val, y_val)

            # Calculate class weights.
            class_weights = calculate_class_weights(y_train, l_e)

            # Create and train CNN model.
            model = CnnModel(config.model, l_e.classes_.size)
            # model.load_minimias_fc_weights()
            # model.load_minimias_weights()

            # Fit model.
            if config.verbose_mode:
                print("Training set size: {}".format(X_train.shape[0]))
                print("Validation set size: {}".format(X_val.shape[0]))
            model.train_model(train_dataset, validation_dataset, None, None, class_weights)

        # Save training runtime.
        runtime = round(time.time() - start_time, 2)

        # Save the model and its weights/biases.
        model.save_model()
        model.save_weights()

        # Evaluate training results.
        print_cli_arguments()
        if config.dataset == "mini-MIAS":
            model.make_prediction(X_val)
            model.evaluate_model(y_val, l_e, 'N-B-M', runtime)
        elif config.dataset == "mini-MIAS-binary":
            model.make_prediction(X_val)
            model.evaluate_model(y_val, l_e, 'B-M', runtime)
        elif config.dataset == "CBIS-DDSM":
            model.make_prediction(validation_dataset)
            model.evaluate_model(y_val, l_e, 'B-M', runtime)
        print_runtime("Training", runtime)

    # Run in testing mode.
    elif config.run_mode == "test":

        print("-- Testing model --\n")

        # Start recording time.
        start_time = time.time()

        # Test multi-class classification (mini-MIAS dataset).
        if config.dataset == "mini-MIAS":
            images, labels = import_minimias_dataset(data_dir="../data/{}/images_processed".format(config.dataset),
                                                     label_encoder=l_e)
            _, X_test, _, y_test = dataset_stratified_split(split=0.20, dataset=images, labels=labels)
            model = load_trained_model()
            predictions = model.predict(x=X_test)
            runtime = round(time.time() - start_time, 2)
            test_model_evaluation(y_test, predictions, l_e, 'N-B-M', runtime)

        # Test binary classification (binarised mini-MIAS dataset).
        elif config.dataset == "mini-MIAS-binary":
            pass

        # Test binary classification (CBIS-DDSM dataset).
        elif config.dataset == "CBIS-DDSM":
            images, labels = import_cbisddsm_testing_dataset(l_e)
            test_dataset = create_dataset(images, labels)
            model = load_trained_model()
            predictions = model.predict(x=test_dataset)
            runtime = round(time.time() - start_time, 2)
            test_model_evaluation(labels, predictions, l_e, 'B-M', runtime)

        print_runtime("Testing", runtime)
Exemple #10
0
#   Required Imports
import os, sys
import numpy as np
import pandas as pd
import argparse
from datetime import datetime
import multiprocessing as mp
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from utils import debug, get_parser, timeit, parse_arg, get_seed, set_random_seeds
from data_utils import RandomForestFeatureSelector, get_bins

#   Keras imports in main() so that we can select cpu or specific gpu

#   Set random seeds
set_random_seeds()

#   Set matplotlib params
font = {
    'family': 'sans-serif',
    'style': 'normal',
    'weight': 'bold',
    'size': 22
}
plt.rc('font', **font)


#   Debug (print) arguments
###########################################
def debug_args(args):
    """Print out all of the arguments if debugging."""
Exemple #11
0
def main(argv):
    args = parser.parse_args(argv)

    # Load configuration
    conf = Configuration.from_json(args.config)
    conf.args = args
    if args.conf:
        new_conf_entries = {}
        for arg in args.conf:
            key, value = arg.split('=')
            new_conf_entries[key] = value
        conf.update(new_conf_entries)

    # Setup log directory
    if args.run_dir:
        conf.run_dir = args.run_dir
    elif args.resume:
        if os.path.exists(args.resume):
            conf.run_dir = os.path.dirname(args.resume)
    if not conf.has_attr('run_dir'):
        run_name = conf.get_attr('run_name', default='unnamed_run')
        conf.run_dir = get_run_dir(args.log_dir, run_name)
    if not args.dry:
        if not os.path.isdir(conf.run_dir):
            os.mkdir(conf.run_dir)

    setup_logging(conf.run_dir, 'train', args.verbose, args.dry)

    logging.info('Commandline arguments: {}'.format(' '.join(argv)))

    if not args.dry:
        logging.info('This run is saved to: {}'.format(conf.run_dir))
        config_path = get_config_path(conf.run_dir)
        conf.serialize(config_path)

    if args.cuda != '':
        try:
            args.cuda = utils.set_cuda_env(args.cuda)
        except Exception:
            logging.critical('No free GPU on this machine. Aborting run.')
            return
        logging.info('Running on GPU {}'.format(args.cuda))

    if args.verbose:
        logging.debug(str(conf))

    utils.set_random_seeds(conf.seed)

    # Setup model
    logging.info('Setting up training runner {}'.format(conf.runner_type))
    runner = build_runner(conf, conf.runner_type, args.cuda, mode='train')

    if args.print_model:
        print(str(runner))

    if args.print_parameters:
        print_model_parameters(runner)

    # Handle resuming from checkpoint
    restore_state = None
    if args.resume:
        if os.path.exists(args.resume):
            restore_state = restore_checkpoint(args.resume, runner)
            logging.info('Restored checkpoint from {}'.format(args.resume))
        else:
            logging.critical(('Checkpoint {} to restore '
                              'from not found').format(args.resume))
            return

    use_tensorboard = conf.get_attr('use_tensorboard',
                                    default=DEFAULT_USE_TENSORBOARD)
    if use_tensorboard and not args.dry:
        from tensorboardX import SummaryWriter
        summary_writer = SummaryWriter(conf.run_dir)
        logging.debug('Using tensorboardX summary writer')
    else:
        summary_writer = None

    # Load datasets
    num_workers = conf.get_attr('num_data_workers',
                                default=DEFAULT_NUM_WORKERS)
    num_train_samples = conf.get_attr('num_train_subset_samples', default=None)
    num_val_samples = conf.get_attr('num_validation_subset_samples',
                                    default=None)

    train_dataset_name = conf.get_attr('train_dataset', alternative='dataset')
    logging.info('Loading training dataset {}'.format(train_dataset_name))
    train_dataset = load_dataset(conf, args.data_dir, train_dataset_name,
                                 'train')
    train_sampler = maybe_get_subset_sampler(num_train_samples, train_dataset)
    train_loader = DataLoader(dataset=train_dataset,
                              num_workers=num_workers,
                              batch_size=conf.batch_size,
                              sampler=train_sampler,
                              shuffle=train_sampler is None,
                              worker_init_fn=utils.set_worker_seeds)

    val_dataset_name = conf.get_attr('validation_dataset',
                                     alternative='dataset')
    logging.info('Loading validation dataset {}'.format(val_dataset_name))
    val_dataset = load_dataset(conf, args.data_dir, val_dataset_name, 'val')
    val_sampler = maybe_get_subset_sampler(num_val_samples, val_dataset)
    val_loader = DataLoader(dataset=val_dataset,
                            num_workers=num_workers,
                            batch_size=conf.get_attr('validation_batch_size',
                                                     default=conf.batch_size),
                            sampler=val_sampler,
                            shuffle=False,
                            worker_init_fn=utils.set_worker_seeds)

    # Setup validation checkpoints
    chkpt_metrics = conf.get_attr('validation_checkpoint_metrics', default=[])
    chkpt_metric_dirs = {
        metric: os.path.join(conf.run_dir, 'best_' + metric)
        for metric in chkpt_metrics
    }
    for metric_dir in chkpt_metric_dirs.values():
        if not args.dry and not os.path.isdir(metric_dir):
            os.mkdir(metric_dir)

    # Setup early stopping
    if conf.has_attr('early_stopping'):
        from training.early_stopping import EarlyStopper
        early_stoppers = [
            EarlyStopper(conf.early_stopping['metric_name'],
                         conf.early_stopping['patience'],
                         conf.early_stopping.get('min_value', None),
                         conf.early_stopping.get('max_difference', None))
        ]
    elif conf.has_attr('early_stoppers'):
        from training.early_stopping import EarlyStopper
        early_stoppers = []
        for early_stopping_conf in conf.early_stoppers:
            min_value = early_stopping_conf.get('min_value', None)
            max_diff = early_stopping_conf.get('max_difference', None)
            early_stoppers.append(
                EarlyStopper(early_stopping_conf['metric_name'],
                             early_stopping_conf['patience'], min_value,
                             max_diff))
    else:
        early_stoppers = []

    logging.info('Starting training run of {} epochs'.format(conf.num_epochs))

    # Train
    try:
        train_net(conf, runner, train_loader, val_loader, args.cuda,
                  chkpt_metric_dirs, restore_state, summary_writer,
                  early_stoppers)
    except KeyboardInterrupt:
        if summary_writer is not None:
            summary_writer.close()
Exemple #12
0
parser.add_argument('--use-linear-lr-decay', action='store_true')
parser.add_argument('--use-clipped-value-loss', action='store_true')
parser.add_argument('--use-tensorboard', action='store_true')
parser.add_argument('--debug', action='store_true')
parser.add_argument('--no-render', action='store_true', default=False)

if __name__ == '__main__':
    # parse arguments
    args = parser.parse_args()
    args.cuda = False
    args.render = not args.no_render

    # set device and random seeds
    device = torch.device("cpu")
    torch.set_num_threads(1)
    utils.set_random_seeds(args.seed, args.cuda, args.debug)

    # setup environment
    envs = make_vec_envs(env_id=args.env_id,
                         seed=args.seed,
                         num_processes=args.num_processes,
                         gamma=None,
                         log_dir=None,
                         device=device,
                         obs_keys=['observation', 'desired_goal'] if not args.env_id.startswith(
                             'metaworld') else None,
                         allow_early_resets=True,
                         max_steps=args.num_steps,
                         evaluating=True)

    # create agent
Exemple #13
0
def main():
    args, lr_args, log_dir, preprocess_wrapper, ckpt_timer = parse_args()
    easy_tf_log.set_dir(log_dir)

    utils.set_random_seeds(args.seed)
    sess = tf.Session()

    envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops,
                     args.n_workers, args.seed, args.debug, log_dir)

    step_counter = utils.GraphCounter(sess)
    update_counter = utils.GraphCounter(sess)
    lr = make_lr(lr_args, step_counter.value)
    optimizer = make_optimizer(lr)

    networks = make_networks(n_workers=args.n_workers,
                             n_actions=envs[0].action_space.n,
                             weight_inits=args.weight_inits,
                             value_loss_coef=args.value_loss_coef,
                             entropy_bonus=args.entropy_bonus,
                             max_grad_norm=args.max_grad_norm,
                             optimizer=optimizer,
                             debug=args.debug)

    # Why save_relative_paths=True?
    # So that the plain-text 'checkpoint' file written uses relative paths,
    # which seems to be needed in order to avoid confusing saver.restore()
    # when restoring from FloydHub runs.
    global_vars = tf.trainable_variables('global')
    saver = tf.train.Saver(global_vars,
                           max_to_keep=1,
                           save_relative_paths=True)
    checkpoint_dir = osp.join(log_dir, 'checkpoints')
    os.makedirs(checkpoint_dir)
    checkpoint_file = osp.join(checkpoint_dir, 'network.ckpt')

    if args.load_ckpt:
        print("Restoring from checkpoint '%s'..." % args.load_ckpt,
              end='',
              flush=True)
        saver.restore(sess, args.load_ckpt)
        print("done!")
    else:
        sess.run(tf.global_variables_initializer())

    workers = make_workers(sess=sess,
                           envs=envs,
                           networks=networks,
                           n_workers=args.n_workers,
                           log_dir=log_dir)

    worker_threads = start_workers(n_steps=args.n_steps,
                                   steps_per_update=args.steps_per_update,
                                   step_counter=step_counter,
                                   update_counter=update_counter,
                                   workers=workers)
    ckpt_timer.reset()
    step_rate = utils.RateMeasure()
    step_rate.reset(int(step_counter))
    while True:
        time.sleep(args.wake_interval_seconds)

        steps_per_second = step_rate.measure(int(step_counter))
        easy_tf_log.tflog('misc/steps_per_second', steps_per_second)
        easy_tf_log.tflog('misc/steps', int(step_counter))
        easy_tf_log.tflog('misc/updates', int(update_counter))
        easy_tf_log.tflog('misc/lr', sess.run(lr))

        alive = [t.is_alive() for t in worker_threads]

        if ckpt_timer.done() or not any(alive):
            saver.save(sess, checkpoint_file, int(step_counter))
            print("Checkpoint saved to '{}'".format(checkpoint_file))
            ckpt_timer.reset()

        if not any(alive):
            break

    for env in envs:
        env.close()
Exemple #14
0
def run_worker(env_id, preprocess_wrapper, seed, worker_n, n_steps_to_run,
               ckpt_timer, load_ckpt_file, render, log_dir, max_n_noops, debug,
               steps_per_update):
    utils.set_random_seeds(seed)

    mem_log = osp.join(log_dir, "worker_{}_memory.log".format(worker_n))
    memory_profiler = MemoryProfiler(pid=-1, log_path=mem_log)
    memory_profiler.start()

    worker_log_dir = osp.join(log_dir, "worker_{}".format(worker_n))
    easy_tf_log_dir = osp.join(worker_log_dir, 'easy_tf_log')
    os.makedirs(easy_tf_log_dir)
    easy_tf_log.set_dir(easy_tf_log_dir)

    server = tf.train.Server(cluster, job_name="worker", task_index=worker_n)
    sess = tf.Session(server.target)

    with tf.device("/job:worker/task:0"):
        create_network('global')
    with tf.device("/job:worker/task:%d" % worker_n):
        w = Worker(sess=sess,
                   env_id=env_id,
                   preprocess_wrapper=preprocess_wrapper,
                   worker_n=worker_n,
                   seed=seed,
                   log_dir=worker_log_dir,
                   max_n_noops=max_n_noops,
                   debug=debug)
        init_op = tf.global_variables_initializer()
        if render:
            w.render = True

    # Worker 0 initialises the global network as well as the per-worker networks
    # Other workers only initialise their own per-worker networks
    sess.run(init_op)

    if worker_n == 0:
        saver = tf.train.Saver()
        checkpoint_dir = osp.join(log_dir, 'checkpoints')
        os.makedirs(checkpoint_dir)
        checkpoint_file = osp.join(checkpoint_dir, 'network.ckpt')

    if load_ckpt_file is not None:
        print("Restoring from checkpoint '%s'..." % load_ckpt_file,
              end='',
              flush=True)
        saver.restore(sess, load_ckpt_file)
        print("done!")

    updates = 0
    steps = 0
    ckpt_timer.reset()
    while steps < n_steps_to_run:
        start_time = time.time()

        steps_ran = w.run_update(steps_per_update)
        steps += steps_ran
        updates += 1

        end_time = time.time()
        steps_per_second = steps_ran / (end_time - start_time)

        easy_tf_log.tflog('misc/steps_per_second', steps_per_second)
        easy_tf_log.tflog('misc/steps', steps)
        easy_tf_log.tflog('misc/updates', updates)

        if worker_n == 0 and ckpt_timer.done():
            saver.save(sess, checkpoint_file)
            print("Checkpoint saved to '{}'".format(checkpoint_file))
            ckpt_timer.reset()

    memory_profiler.stop()
Exemple #15
0
        if early_stopper.early_stop_check(early_stopper_metric):
            logger.info('No improvement over {} epochs, stop training'.format(early_stopper.max_round))
            logger.info(f'Loading the best model at epoch {early_stopper.best_epoch}')
            encoder.load_state_dict(torch.load(MODEL_SAVE_PATH+get_model_name('Encoder')))
            decoder.load_state_dict(torch.load(MODEL_SAVE_PATH+get_model_name('Decoder')))

            test_result = [early_stopper.best_ap, early_stopper.best_auc, early_stopper.best_acc, early_stopper.best_loss]
            break

        test_ap, test_auc, test_acc, test_loss = eval_epoch(args, logger, g, test_loader, encoder, decoder, msg2mail, loss_fcn, device, num_test_samples)
        logger.info('Test {} Task | ap: {:.4f} | auc: {:.4f} | acc: {:.4f} | Loss: {:.4f}'.format(args.tasks, test_ap, test_auc, test_acc, test_loss))
        test_result = [test_ap, test_auc, test_acc, test_loss]

        if early_stopper.best_epoch == epoch: 
            early_stopper.best_ap = test_ap
            early_stopper.best_auc = test_auc
            early_stopper.best_acc = test_acc
            early_stopper.best_loss = test_loss
            logger.info(f'Saving the best model at epoch {early_stopper.best_epoch}')
            torch.save(encoder.state_dict(), MODEL_SAVE_PATH+get_model_name('Encoder'))
            torch.save(decoder.state_dict(), MODEL_SAVE_PATH+get_model_name('Decoder'))

    
if __name__ == '__main__':
    args = get_args()
    logger = set_logger()
    logger.info(args)
    set_random_seeds(args.seed)

    train(args, logger)
Exemple #16
0
    def test_random_seed(self):
        # Note: TensorFlow random seeding doesn't work completely as expected.
        # tf.set_random_seed sets a the graph-level seed in the current graph.
        # But operations also have their own operation-level seed, which is
        # chosen deterministically based on the graph-level seed, but also
        # based on other things.
        #
        # So if you create multiple operations in the same graph,
        # each one will be given a different operation-level seed.
        # The  graph-level seed just determines what the sequence of
        # operation-level seeds will be.
        #
        # To get a bunch of operations with the same sequence of
        # operation-level seeds, we need to reset the graph before creation
        # of each bunch of operations.

        # Generate some random numbers from a specific seed
        tf.reset_default_graph()
        sess = tf.Session()
        set_random_seeds(0)
        tf_rand_var = tf.random_normal([10])
        numpy_rand_1 = np.random.rand(10)
        numpy_rand_2 = np.random.rand(10)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 numpy_rand_1, numpy_rand_2)
        tensorflow_rand_1 = sess.run(tf_rand_var)
        tensorflow_rand_2 = sess.run(tf_rand_var)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 tensorflow_rand_1, tensorflow_rand_2)
        python_rand_1 = [random.random() for _ in range(10)]
        python_rand_2 = [random.random() for _ in range(10)]
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 python_rand_1, python_rand_2)

        # Put the seed back and check we get the same numbers
        tf.reset_default_graph()
        sess = tf.Session()
        set_random_seeds(0)
        tf_rand_var = tf.random_normal([10])
        numpy_rand_3 = np.random.rand(10)
        numpy_rand_4 = np.random.rand(10)
        np.testing.assert_equal(numpy_rand_1, numpy_rand_3)
        np.testing.assert_equal(numpy_rand_2, numpy_rand_4)
        tensorflow_rand_3 = sess.run(tf_rand_var)
        tensorflow_rand_4 = sess.run(tf_rand_var)
        np.testing.assert_equal(tensorflow_rand_1, tensorflow_rand_3)
        np.testing.assert_equal(tensorflow_rand_2, tensorflow_rand_4)
        python_rand_3 = [random.random() for _ in range(10)]
        python_rand_4 = [random.random() for _ in range(10)]
        np.testing.assert_equal(python_rand_1, python_rand_3)
        np.testing.assert_equal(python_rand_2, python_rand_4)

        # Set a different seed and make sure we get different numbers
        set_random_seeds(1)
        numpy_rand_5 = np.random.rand(10)
        numpy_rand_6 = np.random.rand(10)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 numpy_rand_5, numpy_rand_1)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 numpy_rand_6, numpy_rand_2)
        tensorflow_rand_5 = sess.run(tf_rand_var)
        tensorflow_rand_6 = sess.run(tf_rand_var)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 tensorflow_rand_5, tensorflow_rand_1)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 tensorflow_rand_6, tensorflow_rand_2)
        python_rand_5 = [random.random() for _ in range(10)]
        python_rand_6 = [random.random() for _ in range(10)]
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 python_rand_5, python_rand_1)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 python_rand_6, python_rand_2)
from utils import get_configuration
from utils import set_random_seeds
from utils import set_configuration
from TSC_Env import TSC_Env

if __name__ == '__main__':
    args = set_configuration()
    para_config = get_configuration(args.para_dir)
    env_name = args.env
    port = args.port
    gui = args.gui
    print(para_config)
    print(env_name)
    total_episodes = para_config['total_episodes']
    sim_seed = para_config['sim_seed']
    set_random_seeds(sim_seed)
    env = TSC_Env(env_name, para_config, gui=gui, port=args.port)
    if args.load_model:
        env.agents.load_model(args.load_model_dir)
    env.run()
    if args.save_model:
        env.agents.save_model(args.save_model_dir)
    env.output_data()
    env.close()
Exemple #18
0
def main(argv):
  args = parser.parse_args(argv)

  setup_logging(os.path.dirname(args.checkpoint), 'eval',
                args.verbose, args.dry)

  logging.info('Commandline arguments: {}'.format(' '.join(argv)))

  if args.cuda != '':
    try:
      args.cuda = utils.set_cuda_env(args.cuda)
    except Exception:
      logging.critical('No free GPU on this machine. Aborting run.')
      return
    logging.info('Running on GPU {}'.format(args.cuda))

  # Load configuration
  conf = Configuration.from_json(args.config)
  conf.args = args
  if args.conf:
    new_conf_entries = {}
    for arg in args.conf:
      key, value = arg.split('=')
      new_conf_entries[key] = value
    conf.update(new_conf_entries)

  if args.verbose:
    logging.debug(conf)

  utils.set_random_seeds(conf.seed)

  if args.raw:
    # This is a hack to suppress the output transform when we request raw data
    conf.application = 'none'
    if conf.has_attr('tasks'):
      for name, task in conf.tasks.items():
        if 'application' in task:
          logging.debug(('Changing output transform in task {} '
                         'from {} to none').format(name,
                                                   task['application']))
          task['application'] = 'none'

  # Setup model
  runner = build_runner(conf, conf.runner_type, args.cuda, mode='test')

  # Handle resuming from checkpoint
  if args.checkpoint != 'NONE':
    if os.path.exists(args.checkpoint):
      _ = restore_checkpoint(args.checkpoint, runner, cuda=args.cuda)
      logging.info('Restored checkpoint from {}'.format(args.checkpoint))
    else:
      logging.critical(('Checkpoint {} to restore '
                       'from not found').format(args.checkpoint))
      return

  # Load datasets
  mode = 'dataset'
  if len(args.files_or_dirs) == 0:
    datasets = [load_dataset(conf, args.data_dir,
                             conf.validation_dataset, args.fold)]
  else:
    datasets = []
    for f in args.files_or_dirs:
      if is_dataset(f):
        dataset = load_dataset(conf, args.data_dir, f, args.fold)
        datasets.append(dataset)

  if args.raw:
    mode = 'raw'

  num_samples = conf.get_attr('num_validation_subset_samples',
                              default=None)

  # Evaluate all datasets
  for dataset in datasets:
    logging.info('Evaluating dataset {}'.format(dataset.name))

    sampler = maybe_get_subset_sampler(num_samples, dataset)
    loader = DataLoader(dataset=dataset,
                        num_workers=DEFAULT_NUM_WORKERS,
                        batch_size=1,
                        sampler=sampler,
                        shuffle=False)

    if mode == 'dataset':
      data, _, val_metrics = runner.validate(loader, len(loader))

      res_str = 'Average metrics for {}\n'.format(dataset.name)
      for metric_name, metric in val_metrics.items():
        res_str += '     {}: {}\n'.format(metric_name, metric)
      logging.info(res_str)
    else:
      data = runner.infer(loader)

    if not args.dry and (args.infer or args.dump):
      if mode == 'dataset' or mode == 'raw':
        conf_name = os.path.splitext(os.path.basename(conf.file))[0]
        output_dir = get_run_dir(args.out_dir, '{}_{}'.format(dataset.name,
                                                              conf_name))
        if not os.path.isdir(output_dir):
          os.mkdir(output_dir)

      logging.info('Writing images to {}'.format(output_dir))

      file_idx = 0
      for batch in data:
        if mode == 'image':
          output_dir = os.path.dirname(dataset.images[file_idx])

        named_batch = runner.get_named_outputs(batch)
        inp = named_batch['input']

        if 'prediction' in named_batch:
          batch_size = named_batch['prediction'].shape[0]
          filenames = [dataset.get_filename(idx)
                       for idx in range(file_idx, file_idx + batch_size)]
          save_output_images(dataset, inp, named_batch['prediction'],
                             named_batch['target'], output_dir,
                             filenames, 'default', args.dump, args.raw)

        file_idx += len(filenames)

      logging.info(('Finished writing images for '
                   'dataset {}').format(dataset.name))
Exemple #19
0
def train(params, _run=None):
    params = Params(params)

    set_random_seeds(params.seed)

    time_now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    params.save_root = params.save_root + f'/{params.project_name}_{time_now}_{params.version}'
    os.makedirs(params.save_root, exist_ok=True)

    logging.basicConfig(filename=f'{params.save_root}/{params.project_name}_{time_now}_{params.version}.log',
                        filemode='a', format='%{asctime}s - %(levalname)s: %(message)s')

    if params.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    logging.info(f'Available GPUs: {torch.cuda.device_count()}')

    train2007, train_label_2007, train_bb_2007 = load_annotation(os.path.join(params.data_root, 'VOC2007'), 'trainval')
    test2007, test_label_2007, test_bb_2007 = load_annotation(os.path.join(params.data_root, 'VOC2007'), 'test')
    train2012, train_label_2012, train_bb_2012 = load_annotation(os.path.join(params.data_root, 'VOC2012'), 'trainval')
    test2012, test_label_2012, test_bb_2012 = load_annotation(os.path.join(params.data_root, 'VOC2012'), 'test')
    train_data = train2007+test2007+train2012
    train_label = train_label_2007+test_label_2007+train_label_2012
    train_bb = train_bb_2007 + test_bb_2007 + train_bb_2012
    test_data = test2012
    test_label = test_label_2012
    test_bb = test_bb_2012

    train_dataset = YoloDataset(train_data, train_bb, train_label, params, train=True)
    eval_dataset = YoloDataset(test_data, test_bb, test_label, params, train=False)
    train_loader = DataLoader(dataset=train_dataset, num_workers=params.num_gpus*8, batch_size=params.batch_size,
                              shuffle=True, drop_last=True, pin_memory=True)
    eval_loader = DataLoader(dataset=eval_dataset, num_workers=1, batch_size=1,
                             shuffle=False, pin_memory=True)

    model = Backbone()
    last_step = 0
    last_epoch = 0

    if params.num_gpus > 0:
        model = model.cuda()
        if params.num_gpus > 1:
            model = nn.DataParallel(model)

    if params.optim == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate)
    else:
        optimizer = torch.optim.SGD(model.parameters(), lr=params.learning_rate, momentum=0.9, nesterov=True, weight_decay=0.0005)

    criterion = SumSquareError()
    schedule = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=0.5, verbose=True, patience=10)

    epoch = 0
    begin_epoch = max(0, last_epoch)
    step = max(0, last_step)
    best_loss = 1e6
    logging.info('Begin to train...')
    model.train()
    import cv2 as cv
    try:
        for epoch in range(begin_epoch, params.epoch):
            for iter, (img, annotation) in enumerate(train_loader):
                output = model(img.cuda())
                loss = criterion(output, annotation.cuda())
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                if iter % params.save_interval == 0:
                    logging.info(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} '
                                 f'Train Epoch: {epoch} iter: {iter} loss: {loss.item()}')
                step += 1
            if epoch % params.eval_interval == 0:
                model.eval()
                epoch_loss = 0
                with torch.no_grad():
                    for iter, (img, annotation) in enumerate(eval_loader):
                        output = model(img.cuda())
                        loss = criterion(output, annotation.cuda()).item()
                        epoch_loss += loss * len(img)
                    loss = epoch_loss / len(eval_dataset)
                    logging.info(f'{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} '
                                 f'Eval Epoch: {epoch} loss: {loss}')
                    schedule.step(loss)
                    if loss < best_loss:
                        best_loss = loss
                        save_checkpoint(model, f'{params.save_root}/{epoch}_{step}.pth')
                model.train()

    except KeyboardInterrupt:
        save_checkpoint(model, f'{params.save_root}/Interrupt_{epoch}_{step}.pth')
Exemple #20
0
import torch
from torch import nn
from torch import optim
import torchvision
import torchvision.transforms as transforms
import config as con
from models import Net
from time import time
from utils import accuracy, get_cifar10_data, set_random_seeds, get_model_layers, get_prms_rqr_grd
try:
    from apex import amp
except:
    print('the apex module does not exists')

set_random_seeds(con.random_seed)
trainset, trainloader, testset, testloader = next(get_cifar10_data())

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
           'ship', 'truck')

if con.use_cuda:
    net = Net().cuda()
else:
    net = Net()
optimizer = optim.Adam(net.parameters())
if con.use_apex:
    net, optimizer = amp.initialize(net,
                                    optimizer,
                                    opt_level=con.apex_opt_level)
criterion = nn.CrossEntropyLoss()
print('num of net parameters:', sum(p.numel() for p in net.parameters()))
Exemple #21
0
def main(argv):
    args = parser.parse_args(argv)

    if args.cuda != '':
        try:
            args.cuda = utils.set_cuda_env(args.cuda)
        except Exception:
            print('No free GPU on this machine. Aborting run.')
            return
        print('Running on GPU {}'.format(args.cuda))

    # Load configuration
    conf = Configuration.from_json(args.config)
    conf.args = args
    if args.conf:
        new_conf_entries = {}
        for arg in args.conf:
            key, value = arg.split('=')
            new_conf_entries[key] = value
        conf.update(new_conf_entries)
    if args.verbose:
        print(conf)

    utils.set_random_seeds(conf.seed)

    # Setup model
    runner = build_runner(conf,
                          conf.runner_type,
                          args.cuda,
                          mode='train',
                          resume=args.resume is not None)

    if args.print_model:
        print(str(runner))

    # Handle resuming from checkpoint
    restore_state = None
    if args.resume:
        if os.path.exists(args.resume):
            restore_state = restore_checkpoint(args.resume, runner)
            conf.run_dir = os.path.dirname(args.resume)
            print('Restored checkpoint from {}'.format(args.resume))
        else:
            print('Checkpoint {} to restore from not found'.format(
                args.resume))
            return

    # Setup log directory
    if args.run_dir:
        conf.run_dir = args.run_dir
    if not conf.has_attr('run_dir'):
        run_name = conf.get_attr('run_name', default='unnamed_run')
        conf.run_dir = get_run_dir(args.log_dir, run_name)
    if not args.dry:
        if not os.path.isdir(conf.run_dir):
            os.mkdir(conf.run_dir)
        print('This run is saved to: {}'.format(conf.run_dir))
        config_path = get_config_path(conf.run_dir)
        conf.serialize(config_path)

    use_tensorboard = conf.get_attr('use_tensorboard',
                                    default=DEFAULT_USE_TENSORBOARD)
    if use_tensorboard and not args.dry:
        from tensorboardX import SummaryWriter
        summary_writer = SummaryWriter(conf.run_dir)
    else:
        summary_writer = None

    # Load datasets
    num_workers = conf.get_attr('num_data_workers',
                                default=DEFAULT_NUM_WORKERS)
    num_train_samples = conf.get_attr('num_train_subset_samples', default=None)
    num_val_samples = conf.get_attr('num_validation_subset_samples',
                                    default=None)

    train_dataset_name = conf.get_attr('train_dataset', alternative='dataset')
    train_dataset = load_dataset(conf, args.data_dir, train_dataset_name,
                                 'train')
    train_sampler = maybe_get_subset_sampler(num_train_samples, train_dataset)
    train_loader = DataLoader(dataset=train_dataset,
                              num_workers=num_workers,
                              batch_size=conf.batch_size,
                              sampler=train_sampler,
                              shuffle=train_sampler is None)

    val_dataset_name = conf.get_attr('validation_dataset',
                                     alternative='dataset')
    val_dataset = load_dataset(conf, args.data_dir, val_dataset_name, 'val')
    val_sampler = maybe_get_subset_sampler(num_val_samples, val_dataset)
    val_loader = DataLoader(dataset=val_dataset,
                            num_workers=num_workers,
                            batch_size=conf.get_attr('validation_batch_size',
                                                     default=conf.batch_size),
                            sampler=val_sampler,
                            shuffle=False)

    chkpt_metrics = conf.get_attr('validation_checkpoint_metrics', default=[])
    chkpt_metric_dirs = {
        metric: os.path.join(conf.run_dir, 'best_' + metric)
        for metric in chkpt_metrics
    }
    for metric_dir in chkpt_metric_dirs.values():
        if not args.dry and not os.path.isdir(metric_dir):
            os.mkdir(metric_dir)

    # Train
    try:
        train_net(conf, runner, train_loader, val_loader, args.cuda,
                  chkpt_metric_dirs, restore_state, summary_writer)
    except KeyboardInterrupt:
        if summary_writer is not None:
            summary_writer.close()
Exemple #22
0
def main():

    num_classes = 10
    random_seed = 1
    l1_regularization_strength = 0
    l2_regularization_strength = 1e-4
    learning_rate = 1e-3
    learning_rate_decay = 1

    cuda_device = torch.device("cuda:0")
    cpu_device = torch.device("cpu:0")

    model_dir = "saved_models"
    model_filename = "resnet18_cifar10.pt"
    model_filename_prefix = "pruned_model"
    pruned_model_filename = "resnet18_pruned_cifar10.pt"
    model_filepath = os.path.join(model_dir, model_filename)
    pruned_model_filepath = os.path.join(model_dir, pruned_model_filename)

    set_random_seeds(random_seed=random_seed)

    # Create an untrained model.
    model = create_model(num_classes=num_classes)

    # Load a pretrained model.
    model = load_model(model=model,
                       model_filepath=model_filepath,
                       device=cuda_device)

    train_loader, test_loader, classes = prepare_dataloader(
        num_workers=8, train_batch_size=128, eval_batch_size=256)

    _, eval_accuracy = evaluate_model(model=model,
                                      test_loader=test_loader,
                                      device=cuda_device,
                                      criterion=None)

    classification_report = create_classification_report(
        model=model, test_loader=test_loader, device=cuda_device)

    num_zeros, num_elements, sparsity = measure_global_sparsity(model)

    print("Test Accuracy: {:.3f}".format(eval_accuracy))
    print("Classification Report:")
    print(classification_report)
    print("Global Sparsity:")
    print("{:.2f}".format(sparsity))

    print("Iterative Pruning + Fine-Tuning...")

    pruned_model = copy.deepcopy(model)

    # iterative_pruning_finetuning(
    #     model=pruned_model,
    #     train_loader=train_loader,
    #     test_loader=test_loader,
    #     device=cuda_device,
    #     learning_rate=learning_rate,
    #     learning_rate_decay=learning_rate_decay,
    #     l1_regularization_strength=l1_regularization_strength,
    #     l2_regularization_strength=l2_regularization_strength,
    #     conv2d_prune_amount=0.3,
    #     linear_prune_amount=0,
    #     num_iterations=8,
    #     num_epochs_per_iteration=50,
    #     model_filename_prefix=model_filename_prefix,
    #     model_dir=model_dir,
    #     grouped_pruning=True)

    iterative_pruning_finetuning(
        model=pruned_model,
        train_loader=train_loader,
        test_loader=test_loader,
        device=cuda_device,
        learning_rate=learning_rate,
        learning_rate_decay=learning_rate_decay,
        l1_regularization_strength=l1_regularization_strength,
        l2_regularization_strength=l2_regularization_strength,
        conv2d_prune_amount=0.98,
        linear_prune_amount=0,
        num_iterations=1,
        num_epochs_per_iteration=200,
        model_filename_prefix=model_filename_prefix,
        model_dir=model_dir,
        grouped_pruning=True)

    # Apply mask to the parameters and remove the mask.
    remove_parameters(model=pruned_model)

    _, eval_accuracy = evaluate_model(model=pruned_model,
                                      test_loader=test_loader,
                                      device=cuda_device,
                                      criterion=None)

    classification_report = create_classification_report(
        model=pruned_model, test_loader=test_loader, device=cuda_device)

    num_zeros, num_elements, sparsity = measure_global_sparsity(pruned_model)

    print("Test Accuracy: {:.3f}".format(eval_accuracy))
    print("Classification Report:")
    print(classification_report)
    print("Global Sparsity:")
    print("{:.2f}".format(sparsity))

    save_model(model=model, model_dir=model_dir, model_filename=model_filename)
Exemple #23
0
def main(argv):
  args = parser.parse_args(argv)

  if args.cuda != '':
    try:
      args.cuda = utils.set_cuda_env(args.cuda)
    except Exception:
      print('No free GPU on this machine. Aborting run.')
      return
    print('Running on GPU {}'.format(args.cuda))

  # Load configuration
  conf = Configuration.from_json(args.config)
  conf.args = args
  if args.conf:
    new_conf_entries = {}
    for arg in args.conf:
      key, value = arg.split('=')
      new_conf_entries[key] = value
    conf.update(new_conf_entries)
  if args.verbose:
    print(conf)

  utils.set_random_seeds(conf.seed)

  # Setup model
  runner = build_runner(conf, conf.runner_type, args.cuda, mode='test')

  # Handle resuming from checkpoint
  if args.checkpoint != 'NONE':
    if os.path.exists(args.checkpoint):
      _ = restore_checkpoint(args.checkpoint, runner, cuda=args.cuda)
      print('Restored checkpoint from {}'.format(args.checkpoint))
    else:
      print('Checkpoint {} to restore from not found'.format(args.checkpoint))
      return

  # Evaluate on full image, not crops
  conf.full_image = True

  # Load datasets
  mode = 'dataset'
  if len(args.files_or_dirs) == 0:
    datasets = [load_dataset(conf, args.data_dir, conf.validation_dataset, args.fold)]
  else:
    datasets = []
    for f in args.files_or_dirs:
      if is_dataset(f):
        dataset = load_dataset(conf, args.data_dir, f, args.fold)
        datasets.append(dataset)
      else:
        mode = 'image'
        transform = get_sr_transform(conf, 'test', downscale=False)
        datasets = [make_sr_dataset_from_folder(conf, f, transform,
                                                inference=True)
                    for f in args.files_or_dirs]

  num_workers = conf.get_attr('num_data_workers', default=DEFAULT_NUM_WORKERS)

  # Evaluate all datasets
  for dataset in datasets:
    loader = DataLoader(dataset=dataset,
                        num_workers=num_workers,
                        batch_size=1,
                        shuffle=False)

    if mode == 'dataset':
      data, _, val_metrics = runner.validate(loader, len(loader))

      print('Average metrics for {}'.format(dataset.name))
      for metric_name, metric in val_metrics.items():
        print('     {}: {}'.format(metric_name, metric))
    else:
      data = runner.infer(loader)

    if args.infer or args.dump:
      if mode == 'dataset':
        output_dir = get_run_dir(args.out_dir, dataset.name)
        if not os.path.isdir(output_dir):
          os.mkdir(output_dir)

      file_idx = 0
      for batch in data:
        if mode == 'image':
          output_dir = os.path.dirname(dataset.images[file_idx])

        named_batch = runner.get_named_outputs(batch)
        inputs = named_batch['input']
        predictions = named_batch['prediction']
        targets = named_batch['target']
        for (inp, target, prediction) in zip(inputs, targets, predictions):
          image_file = os.path.basename(dataset.images[file_idx])
          name, _ = os.path.splitext(image_file)
          file_idx += 1

          if args.dump:
            input_file = os.path.join(output_dir,
                                      '{}_input.png'.format(name))
            save_image(inp.data, input_file)
            target_file = os.path.join(output_dir,
                                       '{}_target.png'.format(name))
            save_image(target.data, target_file)
          pred_file = os.path.join(output_dir,
                                   '{}_pred.png'.format(name))
          save_image(prediction.data, pred_file)