Esempio n. 1
0
 def clean(self):
     """
     Clean the trainer for a new patch.
     Don't touch the model, as it depends on transfer learning options.
     """
     self.iiter = 0
     print(colored('Finished patch %s' % self.image_name, 'yellow'))
     torch.cuda.empty_cache()
     self.loss_min = None
     self.history = u.History(self.args.epochs)
Esempio n. 2
0
    def __init__(self, args, outpath, dtype=torch.cuda.FloatTensor):

        self.args = args
        self.dtype = dtype
        self.outpath = outpath
        if args.loss == 'mse':
            self.loss_fn = torch.nn.MSELoss().type(self.dtype)
        else:
            self.loss_fn = torch.nn.L1Loss().type(self.dtype)
        self.elapsed = None
        self.iiter = 0
        self.iter_to_be_saved = list(range(0, self.args.epochs, int(self.args.save_every))) \
            if self.args.save_every is not None else [0]
        self.loss_min = None
        self.outchannel = args.imgchannel
        self.history = u.History(self.args.epochs)

        self.imgpath = None
        self.image_name = None
        self.img = None
        self.img_ = None
        self.mask = None
        self.mask_ = None
        self.out_best = None
        self.out_old = None
        self.zfill = u.ten_digit(self.args.epochs)

        # build input tensors
        self.input_type = 'noise3d' if args.datadim == '3d' else 'noise'
        self.input_ = None
        self.input_old = None
        self.add_noise_ = None
        self.add_data_ = None
        self.add_data_weight = None
        self.input_list = []

        # build network
        self.net = None
        self.parameters = None
        self.num_params = None
        self.optimizer = None
def train_model(dataset, paths, device):
    """The main function for executing network training. It loads the specified
       dataset iterator, saliency model, and helper classes. Training is then
       performed in a new session by iterating over all batches for a number of
       epochs. After validation on an independent set, the model is saved and
       the training history is updated.

    Args:
        dataset (str): Denotes the dataset to be used during training.
        paths (dict, str): A dictionary with all path elements.
        device (str): Represents either "cpu" or "gpu".
    """

    iterator = data.get_dataset_iterator("train", dataset, paths["data"])

    next_element, train_init_op, valid_init_op = iterator

    input_images, ground_truths = next_element[:2]

    input_plhd = tf.placeholder_with_default(input_images,
                                             (None, None, None, 3),
                                             name="input")
    
    #training = tf.placeholder(tf.bool, name="training")  ## For BN
    
    msi_net = model_bn.MSINET(is_train=True)

    predicted_maps = msi_net.forward(input_plhd)

    optimizer, loss = msi_net.train(ground_truths, predicted_maps,
                                    config.PARAMS["learning_rate"])

    n_train_data = getattr(data, dataset.upper()).n_train
    n_valid_data = getattr(data, dataset.upper()).n_valid

    n_train_batches = int(np.ceil(n_train_data / config.PARAMS["batch_size"]))
    n_valid_batches = int(np.ceil(n_valid_data / config.PARAMS["batch_size"]))

    history = utils.History(n_train_batches,
                            n_valid_batches,
                            dataset,
                            paths["history"],
                            device)

    progbar = utils.Progbar(n_train_data,
                            n_train_batches,
                            config.PARAMS["batch_size"],
                            config.PARAMS["n_epochs"],
                            history.prior_epochs)

    #training = tf.placeholder(tf.bool, name="training")   ## For BN
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = msi_net.restore(sess, dataset, paths, device)

        print(">> Start training on %s..." % dataset.upper())

        for epoch in range(config.PARAMS["n_epochs"]):
            sess.run(train_init_op)

            for batch in range(n_train_batches):
                #_, error = sess.run([optimizer, loss], feed_dict={training: True})
                _, error = sess.run([optimizer, loss])

                history.update_train_step(error)
                progbar.update_train_step(batch)

            sess.run(valid_init_op)

            for batch in range(n_valid_batches):
                #error = sess.run(loss, feed_dict={training: False})
                error = sess.run(loss)

                history.update_valid_step(error)
                progbar.update_valid_step()

            msi_net.save(saver, sess, dataset, paths["latest"], device)

            history.save_history()

            progbar.write_summary(history.get_mean_train_error(),
                                  history.get_mean_valid_error())

            if history.valid_history[-1] == min(history.valid_history):
                msi_net.save(saver, sess, dataset, paths["best"], device)
                msi_net.optimize(sess, dataset, paths["best"], device)

                print("\tBest model!", flush=True)
Esempio n. 4
0
#     log_dir = 'logs/' + str(np.max(existing_logs)+1)
#     print('Logging output via tensorboard to', log_dir)
# else:
#     log_dir = None
#     print('Not logging output')

# Set up dataset splits and ranking evaluation.
#train_triples, val_triples, test_triples = utils.train_val_test_split(all_triples, val_size=5000, test_size=5000, random_state=0)


filtered = False
train_ranker = RankingEvaluation(train_triples[:5000], num_nodes, triples_to_filter=all_triples if filtered else None, device=device, show_progress=True)
val_ranker = RankingEvaluation(val_triples, num_nodes, triples_to_filter=all_triples if filtered else None, device=device, show_progress=True)
#test_ranker = RankingEvaluation(test_triples, num_nodes, filter_triples=all_triples if filtered else None, show_progress=True)

history = utils.History()

#node_features = load_image_features(num_nodes, entity_map)
node_features = None

utils.seed_all(0)
# TODO: Make device parameter obsolete by moving everything to the device once .to(device) is called.
# net = UnsupervisedRGCN(num_nodes, num_relations, train_triples, embedding_size=200, dropout=0,  # embedding_size=500, dropout=0.5
#                        num_sample_train=10, num_sample_eval=10, activation=F.elu,
#                        node_features=node_features, device=device)
net = DistMult(500, num_nodes, num_relations, 0)
net.to(device)
optimizer = torch.optim.Adam(filter(lambda parameter: parameter.requires_grad, net.parameters()), lr=0.001)

train_via_classification(net, train_triples, val_triples, optimizer, num_nodes, train_ranker, val_ranker,
                  num_epochs=35, batch_size=64, batch_size_eval=512, device=device,
Esempio n. 5
0
def logp(z):  # log posterior distribution
    x = netG(z)
    lpr = -0.5 * (z**2).view(z.shape[0], -1).sum(-1)  # log prior
    llh = -0.5 * ((x[..., ij[:, 0], ij[:, 1]] - vals)**2).view(
        x.shape[0], -1).sum(-1) / args.alpha  # log likelihood
    return llh + lpr


optimizer = optim.Adam(netI.parameters(),
                       lr=args.lr,
                       amsgrad=True,
                       betas=(0.5, 0.9))
w = torch.FloatTensor(args.batch_size, args.nw).to(device)

history = utils.History(args.outdir)
plotter = utils.Plotter(args.outdir, netG, netI, args.condfile,
                        torch.randn(64, args.nw).to(device))

for i in xrange(args.niter):

    optimizer.zero_grad()
    w.normal_(0, 1)
    z = netI(w)
    z = z.view(z.shape[0], z.shape[1], 1, 1)
    err = -logp(z).mean()
    ent = utils.sample_entropy(z)
    kl = err - ent
    kl.backward()
    optimizer.step()
Esempio n. 6
0
def train_via_ranking(net,
                      train_triples,
                      val_triples,
                      optimizer,
                      num_nodes,
                      train_ranker,
                      val_ranker,
                      num_epochs,
                      batch_size,
                      batch_size_eval,
                      device,
                      margin=1,
                      history=None,
                      save_best_to=None,
                      dry_run=False,
                      ranking_eval=True):

    #writer = SummaryWriter()

    if history is None:
        history = utils.History()
    loss_function = SimplifiedMarginRankingLoss(margin)

    if dry_run:  # use first batch only
        train_triples = train_triples[:batch_size]
        val_triples = val_triples[:batch_size_eval]

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))

        # -------------------- Training --------------------
        net.train()
        train_dataset = TriplesDatasetRanking(train_triples, num_nodes)
        train_loader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True)
        train_loader_tqdm = tqdm(train_loader)
        batches_history = utils.History()

        #running_metrics = collections.defaultdict(lambda: 0)

        for batch, (batch_triples,
                    batch_negative_triples) in enumerate(train_loader_tqdm):

            batch_triples = batch_triples.to(device)
            batch_negative_triples = batch_negative_triples.to(device)

            # Sanity check: Train on 0 inputs.
            #print('WARNING: Sanity check enabled')
            #batch_triples = torch.zeros_like(batch_triples)
            #batch_negative_triples = torch.zeros_like(batch_negative_triples)

            optimizer.zero_grad()
            output = net(batch_triples)
            output_negative = net(batch_negative_triples)
            loss = loss_function(output, output_negative)
            loss.backward()
            optimizer.step()

            batches_history.log_metric('loss', loss.item())
            batches_history.log_metric(
                'acc', (output > output_negative).float().mean().item())
            batches_history.log_metric(
                'mean_diff', (output - output_negative).mean().item())
            batches_history.log_metric(
                'median_diff', (output - output_negative).median().item())

            if batch % 10 == 0:
                train_loader_tqdm.set_postfix(batches_history.latest())

        #for key in running_metrics:
        #    running_metrics[key] /= len(batches)

        del batch_triples, batch_negative_triples, output, output_negative, loss
        torch.cuda.empty_cache()

        # -------------------- Testing --------------------
        net.eval()
        with torch.no_grad():
            val_dataset = TriplesDatasetRanking(val_triples, num_nodes)
            val_loader = DataLoader(val_dataset,
                                    batch_size=batch_size_eval,
                                    shuffle=False)
            val_batches_history = utils.History()

            for batch, (batch_triples,
                        batch_negative_triples) in enumerate(val_loader):

                # TODO: Does it actually make sense to move these to CUDA? They are just used as indices.
                batch_triples = batch_triples.to(device)
                batch_negative_triples = batch_negative_triples.to(device)
                output = net(batch_triples)
                output_negative = net(batch_negative_triples)
                loss = loss_function(output, output_negative)

                # TODO: Especially getting the loss takes quite some time (as much as a single prediction for dist mult), maybe replace it by a running metric directly in torch.
                val_batches_history.log_metric('loss', loss.item())
                val_batches_history.log_metric(
                    'acc', (output > output_negative).float().mean().item())
                val_batches_history.log_metric(
                    'mean_diff', (output - output_negative).mean().item())
                val_batches_history.log_metric(
                    'median_diff', (output - output_negative).median().item())

            del batch_triples, batch_negative_triples, output, output_negative, loss
            torch.cuda.empty_cache()

        #for key in running_metrics:
        #    running_metrics[key] /= len(batches)

        # TODO: Maybe implement these metrics in a batched fashion.
        history.log_metric('loss',
                           batches_history.mean('loss'),
                           val_batches_history.mean('loss'),
                           'Loss',
                           print_=True)
        #writer.add_scalar('test/loss', batches_history.mean('loss'), epoch)
        #writer.add_scalar('test/val_loss', val_batches_history.mean('loss'), epoch)
        history.log_metric('acc',
                           batches_history.mean('acc'),
                           val_batches_history.mean('acc'),
                           'Accuracy',
                           print_=True)
        history.log_metric('mean_diff',
                           batches_history.mean('mean_diff'),
                           val_batches_history.mean('mean_diff'),
                           'Mean Difference',
                           print_=True)
        history.log_metric('median_diff',
                           batches_history.mean('median_diff'),
                           val_batches_history.mean('median_diff'),
                           'Median Difference',
                           print_=True)

        # -------------------- Ranking --------------------
        if ranking_eval:
            mean_rank, mean_rec_rank, hits_1, hits_3, hits_10 = train_ranker(
                net, batch_size=batch_size_eval)
            val_mean_rank, val_mean_rec_rank, val_hits_1, val_hits_3, val_hits_10 = val_ranker(
                net, batch_size=batch_size_eval)

            history.log_metric('mean_rank',
                               mean_rank,
                               val_mean_rank,
                               'Mean Rank',
                               print_=True)
            history.log_metric('mean_rec_rank',
                               mean_rec_rank,
                               val_mean_rec_rank,
                               'Mean Rec Rank',
                               print_=True)
            history.log_metric('hits_1',
                               hits_1,
                               val_hits_1,
                               'Hits@1',
                               print_=True)
            history.log_metric('hits_3',
                               hits_3,
                               val_hits_3,
                               'Hits@3',
                               print_=True)
            history.log_metric('hits_10',
                               hits_10,
                               val_hits_10,
                               'Hits@10',
                               print_=True)

        # -------------------- Saving --------------------
        if save_best_to is not None and (
                epoch == 0 or history['val_mean_rec_rank'][-1] >= np.max(
                    history['val_mean_rec_rank'][:-1])):
            # TODO: Using save on the model here directly gives an error.
            torch.save(net.state_dict(), save_best_to)
            print()
            print('Saving model after epoch {} to {}'.format(
                epoch + 1, save_best_to))

        print('-' * 80)
        print()

    return history
Esempio n. 7
0
def train_via_classification(net,
                             train_triples,
                             val_triples,
                             optimizer,
                             num_nodes,
                             train_ranker,
                             val_ranker,
                             num_epochs,
                             batch_size,
                             batch_size_eval,
                             device,
                             history=None,
                             save_best_to=None,
                             dry_run=False,
                             ranking_eval=True):

    #if log_dir is not None:
    #    writer = SummaryWriter(log_dir=log_dir)

    if history is None:
        history = utils.History()

    loss_function = nn.BCEWithLogitsLoss()

    if dry_run:
        train_triples = train_triples[:batch_size]
        val_triples = val_triples[:batch_size_eval]

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))

        # -------------------- Training --------------------
        net.train()
        train_dataset = TriplesDatasetClassification(train_triples, num_nodes)
        train_loader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True)
        train_loader_tqdm = tqdm(train_loader)
        batches_history = utils.History()

        # running_metrics = collections.defaultdict(lambda: 0)

        for batch, (batch_triples,
                    batch_labels) in enumerate(train_loader_tqdm):

            batch_triples = batch_triples.to(device)
            batch_labels = batch_labels.to(device)

            # Sanity check 1: Train on 0 inputs.
            #train_loader_tqdm.set_description('WARNING: Sanity check enabled')
            #batch_triples = torch.zeros_like(batch_triples)

            # Sanity check 2: Train on 0 targets.
            #train_loader_tqdm.set_description('WARNING: Sanity check enabled')
            #batch_labels = torch.zeros_like(batch_labels)

            # Sanity check 3: Overfit on a single batch.
            #train_loader_tqdm.set_description('WARNING: Sanity check enabled')
            #if epoch == 0 and batch == 0:
            #    fixed_batch_values = batch_triples, batch_labels
            #else:
            #    batch_triples, batch_labels = fixed_batch_values

            # Sanity check 4: Overfit on a few batches.
            # train_loader_tqdm.set_description('WARNING: Sanity check enabled')
            # if epoch == 0:
            #     if batch == 0:
            #         fixed_batch_values = []
            #     if batch < 10:
            #         fixed_batch_values.append((batch_triples, batch_labels))
            #     else:
            #         break
            # else:
            #     if batch < len(fixed_batch_values):
            #         batch_triples, batch_labels = fixed_batch_values[batch]
            #     else:
            #         break

            optimizer.zero_grad()
            output = net(batch_triples)
            #print(output)
            loss = loss_function(output, batch_labels)
            loss.backward()
            optimizer.step()

            batches_history.log('loss', loss.item())
            batches_history.log('acc',
                                (torch.sigmoid(output).round() == batch_labels
                                 ).float().mean().item())

            if batch % 10 == 0:
                train_loader_tqdm.set_postfix(batches_history.last())

        # for key in running_metrics:
        #    running_metrics[key] /= len(batches)

        del batch_triples, batch_labels, output, loss
        torch.cuda.empty_cache()

        # -------------------- Testing --------------------
        net.eval()
        with torch.no_grad():
            val_dataset = TriplesDatasetClassification(val_triples, num_nodes)
            val_loader = DataLoader(val_dataset,
                                    batch_size=batch_size_eval,
                                    shuffle=False)
            val_batches_history = utils.History()

            for batch, (batch_triples, batch_labels) in enumerate(val_loader):
                # TODO: Does it actually make sense to move these to CUDA? They are just used as indices.
                batch_triples = batch_triples.to(device)
                batch_labels = batch_labels.to(device)
                output = net(batch_triples)
                loss = loss_function(output, batch_labels)

                val_batches_history.log('loss', loss.item())
                val_batches_history.log(
                    'acc', (torch.sigmoid(output).round() == batch_labels
                            ).float().mean().item())

            del batch_triples, batch_labels, output, loss
            torch.cuda.empty_cache()

        # for key in running_metrics:
        #    running_metrics[key] /= len(batches)

        history.log('loss',
                    batches_history.mean('loss'),
                    val_batches_history.mean('loss'),
                    print_=True)
        history.log('acc',
                    batches_history.mean('acc'),
                    val_batches_history.mean('acc'),
                    print_=True)
        # if log_dir is not None:
        #     writer.add_scalar('loss', batches_history.mean('loss'), epoch)
        #     writer.add_scalar('val_loss', val_batches_history.mean('loss'), epoch)
        #     writer.add_scalar('acc', batches_history.mean('acc'), epoch)
        #     writer.add_scalar('val_acc', val_batches_history.mean('val_acc'), epoch)

        # -------------------- Ranking --------------------
        if ranking_eval:
            mean_rank, mean_rec_rank, hits_1, hits_3, hits_10 = train_ranker(
                net, batch_size=batch_size_eval)
            val_mean_rank, val_mean_rec_rank, val_hits_1, val_hits_3, val_hits_10 = val_ranker(
                net, batch_size=batch_size_eval)

            history.log('mean_rank', mean_rank, val_mean_rank, print_=True)
            history.log('mean_rec_rank',
                        mean_rec_rank,
                        val_mean_rec_rank,
                        print_=True)
            history.log('hits_1', hits_1, val_hits_1, print_=True)
            history.log('hits_3', hits_3, val_hits_3, print_=True)
            history.log('hits_10', hits_10, val_hits_10, print_=True)
            # if log_dir is not None:
            #     writer.add_scalar('mean_rank', mean_rank, epoch)
            #     writer.add_scalar('val_mean_rank', mean_rank, epoch)
            #     writer.add_scalar('mean_rec_rank', mean_rec_rank, epoch)
            #     writer.add_scalar('val_mean_rec_rank', val_mean_rec_rank, epoch)
            #     writer.add_scalar('hits_1', hits_1, epoch)
            #     writer.add_scalar('val_hits1', val_hits_1, epoch)
            #     writer.add_scalar('hits_3', hits_3, epoch)
            #     writer.add_scalar('val_hits3', val_hits_3, epoch)
            #     writer.add_scalar('hits_10', hits_10, epoch)
            #     writer.add_scalar('val_hits_10', val_hits_10, epoch)

        # -------------------- Saving --------------------
        # TODO: Pass val_mean_rec_rank as parameter here.
        if not dry_run and save_best_to is not None and (
                epoch == 0 or history.values['val_mean_rec_rank'][-1] >=
                np.max(history.values['val_mean_rec_rank'][:-1])):
            save_best_to = save_best_to.format(
                epoch=epoch
            )  # if there is no substring {epoch}, this doesn't have an effect
            # TODO: Using save on the model here directly gives an error.
            torch.save(net.state_dict(), save_best_to)
            print()
            print('Saving model after epoch {} to {}'.format(
                epoch + 1, save_best_to))

        print('-' * 80)
        print()

    return history
Esempio n. 8
0
def _common(args,
            mode,
            validation=False,
            val_ratio=0,
            aloi_file=None,
            **kwargs):
    data_mode = 'test' if mode == 'test' else 'train'
    database = vdao.VDAO(args.dataset_dir,
                         args.file,
                         mode=data_mode,
                         val_set=validation,
                         val_ratio=val_ratio,
                         aloi_file=aloi_file)

    # Set tensorflow session configurations
    config = tf.ConfigProto()
    config.gpu_options.visible_device_list = ''
    K.set_session(tf.Session(config=config))
    print('save results: {}'.format(args.save_dir))

    # Useful metrics to record
    metrics_list = [
        metrics.fnr, metrics.fpr, metrics.distance, metrics.f1, metrics.tp,
        metrics.tn, metrics.fp, metrics.fn
    ]
    meters = {func.__name__: func for func in metrics_list}

    thresholds = kwargs.pop('thresholds', 0.5)
    arch = archs.__dict__[args.arch.lower()]
    arch_params = utils.parse_kwparams(args.arch_params)

    logger = {}
    # Apply func to data comming from all specified layers
    for layer in VDAO.LAYER_NAME:
        print('layer: {}'.format(layer))
        database.set_layer(layer)
        cross_history = utils.History()
        outputs = []

        roc = metrics.ROC() if validation is True else None

        # Apply func to each partition of the data
        for group_idx, (samples, set_size) in enumerate(
                database.load_generator(
                    **utils.parse_kwparams(args.cv_params))):

            # Load old model or create a new one
            if args.load_model is not None:
                try:
                    model = arch(load_path=args.load_model,
                                 save_path=args.save_dir,
                                 layer=layer,
                                 group_idx=group_idx)
                except FileNotFoundError:
                    print('file not found for layer {}'.format(layer))
                    continue
            else:
                model = arch(
                    load_path=args.load_model,
                    save_path=args.save_dir,
                    layer=layer,
                    group_idx=group_idx,
                    #  input_shape=samples[0][0].shape[1:],
                    input_shape=next(iter(samples.values()))[0].shape[1:],
                    weight_decay=args.weight_decay,
                    **arch_params)

            if mode == 'train':
                output = _train(args,
                                model,
                                samples,
                                set_size,
                                meters,
                                cross_history,
                                roc=roc)
                print('\nFinished training {}'.format(group_idx + 1))
            else:
                if type(thresholds) is dict:
                    group_thresholds = thresholds[layer][group_idx]
                else:
                    group_thresholds = thresholds

                output = _eval(args,
                               model,
                               samples,
                               set_size[data_mode],
                               meters,
                               threshold=group_thresholds)

            outputs += [output]

        if mode == 'train':
            logger[layer] = {'history': cross_history}
            if roc is not None:
                logger[layer].update({'roc': roc})
        else:
            logger[layer] = {'output': outputs}

        print('\n' + '* ' * 80 + '\n\n')

    return logger