Esempio n. 1
0
def main(dataset_name, n_epochs):
    dataset = dataset_prep(load_dataset(join(join(dataset_loc,'Flukes/patches'),dataset_name)))
    img_dir = join(dataset_loc,'Flukes/CRC_combined constrained')
    batch_size = 32
    losses = {}
    epoch_losses = []
    batch_losses = []
    img_shape = (87,240)
    patch_layer, descriptor_layer, match_layer = build_siamese_whole(img_shape)
    iter_funcs = similarity_iter(patch_layer, match_layer, {'learning_rate':1e-2}, match_layer_w=0.5)
    batch_loader = partial(img_batch_loader, img_dir, img_shape)
    for epoch in range(n_epochs):
        tic = time.time()
        print("Epoch %d" % epoch)
        loss = train_epoch(iter_funcs, dataset, batch_size=batch_size, batch_loader=batch_loader)
        epoch_losses.append(loss['train_loss'])
        batch_losses.append(loss['all_train_loss'])
        # shuffle training set
        dataset['train'] = shuffle_dataset(dataset['train'])
        toc = time.time() - tic
        print("Train loss (reg): %0.3f\nTrain loss: %0.3f\nValid loss: %0.3f" %
                (loss['train_reg_loss'],loss['train_loss'],loss['valid_loss']))
        print("Took %0.2f seconds" % toc)
    batch_losses = list(chain(*batch_losses))
    losses['batch'] = batch_losses
    losses['epoch'] = epoch_losses
    parameter_analysis(match_layer)
    desc = desc_func(descriptor_layer)
Esempio n. 2
0
def main(dataset_name, n_epochs):
    with open('../dataset_loc','r') as f:
        dataset_loc = f.read().rstrip()
    all_datasets = dataset_prep(load_dataset(join(join(dataset_loc,'Flukes/patches'),dataset_name)))
    batch_size = 32
    desc_funcs = {}
    losses = {}
    for patch_type in all_datasets:
        epoch_losses = []
        batch_losses = []
        patch_layer, descriptor_layer, match_layer = build_siamese_separate_similarity()
        iter_funcs = similarity_iter(patch_layer, match_layer, {'learning_rate':1e-2}, match_layer_w=0.)
        for epoch in range(n_epochs):
            tic = time.time()
            print("%s: Epoch %d" % (patch_type, epoch))
            loss = train_epoch(iter_funcs, all_datasets[patch_type], batch_size=batch_size)
            epoch_losses.append(loss['train_loss'])
            batch_losses.append(loss['all_train_loss'])
            # shuffle training set
            all_datasets[patch_type]['train'] = shuffle_dataset(all_datasets[patch_type]['train'])
            toc = time.time() - tic
            print("Train loss (reg): %0.3f\nTrain loss: %0.3f\nValid loss: %0.3f" %
                    (loss['train_reg_loss'],loss['train_loss'],loss['valid_loss']))
            print("Took %0.2f seconds" % toc)
        batch_losses = list(chain(*batch_losses))
        losses[patch_type] = {}
        losses[patch_type]['batch'] = batch_losses
        losses[patch_type]['epoch'] = epoch_losses
        print(patch_type)
        parameter_analysis(match_layer)
        desc_funcs[patch_type] = desc_func(descriptor_layer)
    display_losses(losses, n_epochs, batch_size, all_datasets['notch']['train']['X1'].shape[0])

    # Evaluate train rank accuracy and val rank accuracy
    identifier_eval_dataset = load_identifier_eval(join(join(dataset_loc,'Flukes/patches'),dataset_name))
    norm_idmap = normalize_image_idmap(identifier_eval_dataset['idmap'])
    #check_for_dupes(identifier_eval_dataset['idmap'])
    print("Identification performance train:")
    print_cdfs(identifier_eval(desc_funcs, identifier_eval_dataset['train'], norm_idmap))
    print("Identification performance valid:")
    print_cdfs(identifier_eval(desc_funcs, identifier_eval_dataset['val'], norm_idmap))
Esempio n. 3
0
                                           num_workers=args.n_threads,
                                           sampler=LimitedRandomSampler(
                                               train_ds, N_batches, args.bs))
        else:
            train_loader = data.DataLoader(train_ds,
                                           batch_size=args.bs,
                                           num_workers=args.n_threads,
                                           shuffle=True)

        print(colored('==> ', 'blue') + 'Epoch:', epoch + 1, cur_snapshot)
        # Adjusting learning rate using the scheduler
        optimizer, cur_lr = adjust_learning_rate(optimizer, epoch + 1, args)
        print(colored('==> ', 'red') + 'LR:', cur_lr)
        # Training one epoch and measure the time
        start = time.time()
        train_loss = train_epoch(epoch, net, optimizer, train_loader,
                                 criterion, args.n_epoch)
        epoch_time = np.round(time.time() - start, 4)
        print(
            colored('==> ', 'green') +
            'Epoch training time: {} s.'.format(epoch_time))
        # If it is time to start the validation, we will do it
        # args.args.start_val can be used to avoid time-consuming validation
        # in the beginning of the training
        if epoch >= args.start_val:
            start = time.time()
            val_loss, probs, truth, _ = validate_epoch(net, val_loader,
                                                       criterion)

            preds = probs.argmax(1)
            # Validation metrics
            cm = confusion_matrix(truth, preds)
Esempio n. 4
0
    best_dice = 0
    prev_model = None

    train_started = time.time()
    batch_size = 64
    n_epochs = 3
    max_ep = n_epochs
    start_val = 0
    for epoch in range(n_epochs):
        train_ds = KneeGradingDataset(dataset, transform=augment_transforms)
        train_loader = data.DataLoader(train_ds,
                                       batch_size=batch_size,
                                       shuffle=True)

        start = time.time()
        train_loss = train_epoch(epoch, net, optimizer, train_loader,
                                 criterion, n_epochs, USE_CUDA)
        epoch_time = np.round(time.time() - start, 4)
        print(
            colored('==> ', 'green') +
            'Epoch training time: {} s.'.format(epoch_time))
        if epoch >= start_val:
            start = time.time()
            val_loss, probs, truth, _ = validate_epoch(net, val_loader,
                                                       criterion, USE_CUDA)

            preds = probs.argmax(1)
            # Validation metrics
            cm = confusion_matrix(truth, preds)
            kappa = np.round(
                cohen_kappa_score(truth, preds, weights="quadratic"), 4)
            acc = np.round(
Esempio n. 5
0
    test_loader = DataLoader(ds_test, batch_size=2048, shuffle=False)

    #########################
    # Model, Optimizer, Loss#
    #########################

    model = Model(hidden_size=hidden_size,
                  input_size=input_size,
                  dropout_rate=dropout_rate).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_func = nn.MSELoss()

    n_epochs = 100  # Number of training epochs

    for i in range(n_epochs):
        train_epoch(model, optimizer, tr_loader, loss_func, i + 1, DEVICE)
        obs, preds = eval_model(model, val_loader, DEVICE)
        preds = ds_val.local_rescale(preds.cpu().numpy(), variable="output")
        nse = calc_nse(obs.cpu().numpy(), preds)
        tqdm.write(f"Validation NSE: {nse:.2f}")

    # Evaluate on test set
    obs, preds = eval_model(model, test_loader, DEVICE)
    preds = ds_val.local_rescale(preds.cpu().numpy(), variable="output")
    obs = obs.cpu().numpy()
    nse = calc_nse(obs, preds)
    if nse > max_nse:
        max_nse = nse
        best_features = features
    # Plot results
    start_date = ds_test.dates[0]
Esempio n. 6
0
                #batch_maker = {k:partial(make_batch, dataset=dset[k], actives=active_triplets[k]) for k in ['train','valid']}
            else:
                print("Number of pairs: %r" % ({k:len(dset[k]['y']) for k in dset}))
                train_pairs_shuffled = {k:shuffle_dataset({part:dset[k][part] for part in ['pairs','y']})
                                        for k in ['train','valid']}
                assert(all(train_pairs_shuffled['train']['y'] ==
                    map(lambda x: int(dset['train']['ids'][x[0]] == dset['train']['ids'][x[1]]),
                                    train_pairs_shuffled['train']['pairs'])))
                dataset = train_pairs_shuffled


            batch_maker = {k:partial(FUNCTIONS[options.loss_type]['bm'], dataset=dset[k]) for k in ['train','valid']}
            batch_loader = partial(FUNCTIONS[options.loss_type]['nonaug_bl'], batch_maker=batch_maker)

            # so we're going to just give dset as
            loss = train_epoch(iter_funcs, dataset, batch_size, batch_loader, layer_names=layer_names)
            epoch_losses.append(loss['train_loss'])
            batch_losses.append(loss['all_train_loss'])
            toc = time.time() - tic
            print("Learning rate: %0.5f" % momentum_params['l_r'].get_value())
            print("Train loss (reg): %0.3f\nTrain loss: %0.3f\nValid loss: %0.3f" %
                    (loss['train_reg_loss'],loss['train_loss'],loss['valid_loss']))
            print("Train %s failed: %s\nValid %s failed: %s" % (options.loss_type, loss['train_acc'],
                                                                options.loss_type, loss['valid_acc'],))
            if loss['valid_loss'] < best_val_loss:
                best_params = ll.get_all_param_values(embedder)
                best_val_loss = loss['valid_loss']
                print("New best validation loss!")
            print("Took %0.2f seconds" % toc)
    except KeyboardInterrupt:
        print("Training interrupted, save model? y/n")
Esempio n. 7
0
 if options.resume and exists(model_path):
     params = ut.load_cPkl(model_path)
     ll.set_all_param_values(segmenter, params)
 #iter_funcs = loss_iter(segmenter, update_params={'learning_rate':.01})
 lr = theano.shared(np.array(0.010, dtype=np.float32))
 momentum_params = {'l_r':lr, 'momentum':0.9}
 iter_funcs = loss_iter(segmenter, update_params=momentum_params)
 best_params = ll.get_all_param_values(segmenter)
 best_val_loss = np.inf
 layer_names = [p.name for p in ll.get_all_params(segmenter, trainable=True)]
 save_model = True
 try:
     for epoch in range(n_epochs):
         tic = time.time()
         print("Epoch %d" % (epoch))
         loss = train_epoch(iter_funcs, dset, batch_size, augmenting_batch_loader, layer_names=layer_names)
         epoch_losses.append(loss['train_loss'])
         batch_losses.append(loss['all_train_loss'])
         # shuffle training set
         dset['train'] = shuffle_dataset(dset['train'])
         toc = time.time() - tic
         print("Learning rate: %0.5f" % momentum_params['l_r'].get_value())
         print("Train loss (reg): %0.3f\nTrain loss: %0.3f\nValid loss: %0.3f" %
                 (loss['train_reg_loss'],loss['train_loss'],loss['valid_loss']))
         print("Train Pixel Precision @0.5: %s\nValid Pixel Precision @0.5: %s" % (loss['train_acc'], loss['valid_acc']))
         if loss['valid_loss'] < best_val_loss:
             best_params = ll.get_all_param_values(segmenter)
             best_val_loss = loss['valid_loss']
             print("New best validation loss!")
         print("Took %0.2f seconds" % toc)
 except KeyboardInterrupt:
Esempio n. 8
0
 epoch_losses = []
 batch_losses = []
 segmenter = build_segmenter_vgg()
 model_path = join(dataset_loc, "Flukes/patches/%s/model.pkl" % dset_name)
 if options.resume and exists(model_path):
     with open(model_path, 'r') as f:
         params = pickle.load(f)
     ll.set_all_param_values(segmenter, params)
 #iter_funcs = loss_iter(segmenter, update_params={'learning_rate':.01})
 iter_funcs = loss_iter(segmenter, update_params={})
 best_params = ll.get_all_param_values(segmenter)
 best_val_loss = np.inf
 for epoch in range(n_epochs):
     tic = time.time()
     print("Epoch %d" % (epoch))
     loss = train_epoch(iter_funcs, dset, batch_size=batch_size)
     epoch_losses.append(loss['train_loss'])
     batch_losses.append(loss['all_train_loss'])
     # shuffle training set
     dset['train'] = shuffle_dataset(dset['train'])
     toc = time.time() - tic
     print("Train loss (reg): %0.3f\nTrain loss: %0.3f\nValid loss: %0.3f" %
             (loss['train_reg_loss'],loss['train_loss'],loss['valid_loss']))
     print("Train acc: %0.3f\nValid acc: %0.3f" % (loss['train_acc'], loss['valid_acc']))
     if loss['valid_loss'] < best_val_loss:
         best_params = ll.get_all_param_values(segmenter)
         best_val_loss = loss['valid_loss']
         print("New best validation loss!")
     print("Took %0.2f seconds" % toc)
 batch_losses = list(chain(*batch_losses))
 losses = {}