Ejemplo n.º 1
0
def train_model(model, dataloaders, criterion, optimizer, args, start_epoch=1, num_epochs=25):
    """
    Trains the 3D CNN Model
    :param model: Model object that we will train
    :param base_model_name: The base name of the model
    :param dataloaders: A dictionary of train and validation dataloader
    :param criterion: Pytorch Criterion Instance
    :param optimizer: Pytorch Optimizer Instance
    :param num_epochs: Number of epochs during training
    :return: model, train_loss_history, val_loss_history, train_acc_history, val_acc_history, train_f1_score, val_f1_score, plot_epoch
    """

    # Initializes Session History in the history file
    init_session_history(args)
    since = time.time()

    train_acc_history = []
    val_acc_history = []
    train_loss_history = []
    val_loss_history = []
    train_f1_score = []
    val_f1_score = []
    plot_epoch = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(start_epoch, num_epochs):

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                train_pred_classes = []
                train_ground_truths = []
            else:
                model.eval()  # Set model to evaluate mode
                val_pred_classes = []
                val_ground_truths = []

            running_loss = 0.0
            running_corrects = 0
            train_n_total = 1

            pbar = tqdm(dataloaders[phase])
            # Iterate over data.
            for sample in pbar:
                inputs = sample["video"]
                labels = sample["action"]
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):

                    outputs = model(inputs)
                    loss = criterion(outputs, torch.max(labels, 1)[1])

                    _, preds = torch.max(outputs, 1)
                    #print(preds)
                    #print(torch.max(labels, 1)[1])

                    if phase == 'train':
                        train_pred_classes.extend(preds.detach().cpu().numpy())
                        train_ground_truths.extend(torch.max(labels, 1)[1].detach().cpu().numpy())
                    else:
                        val_pred_classes.extend(preds.detach().cpu().numpy())
                        val_ground_truths.extend(torch.max(labels, 1)[1].detach().cpu().numpy())

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == torch.max(labels, 1)[1])

                pbar.set_description('Phase: {} || Epoch: {} || Loss {:.5f} '.format(phase, epoch, running_loss / train_n_total))
                train_n_total += 1

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            # Calculate elapsed time
            time_elapsed = time.time() - since
            print(phase, ' training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # For Checkpointing and Confusion Matrix
            if phase == 'val':
                val_acc_history.append(epoch_acc)
                val_loss_history.append(epoch_loss)
                val_pred_classes = np.asarray(val_pred_classes)
                val_ground_truths = np.asarray(val_ground_truths)
                val_accuracy, val_f1, val_precision, val_recall = get_acc_f1_precision_recall(
                    val_pred_classes, val_ground_truths
                )
                val_f1_score.append(val_f1)
                val_confusion_matrix = np.array_str(confusion_matrix(val_ground_truths, val_pred_classes, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
                print('Epoch: {} || Val_Acc: {} || Val_Loss: {}'.format(
                    epoch, val_accuracy, epoch_loss
                ))
                print(f'val: \n{val_confusion_matrix}')

                # Deep Copy Model if best accuracy
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())

                # set current loss to val loss for write history
                val_loss = epoch_loss

            if phase == 'train':
                train_acc_history.append(epoch_acc)
                train_loss_history.append(epoch_loss)
                train_pred_classes = np.asarray(train_pred_classes)
                train_ground_truths = np.asarray(train_ground_truths)
                train_accuracy, train_f1, train_precision, train_recall = get_acc_f1_precision_recall(
                    train_pred_classes, train_ground_truths
                )
                train_f1_score.append(train_f1)
                train_confusion_matrix = np.array_str(confusion_matrix(train_ground_truths, train_pred_classes, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
                print('Epoch: {} || Train_Acc: {} || Train_Loss: {}'.format(
                    epoch, train_accuracy, epoch_loss
                ))
                print(f'train: \n{train_confusion_matrix}')
                plot_epoch.append(epoch)

                # set current loss to train loss for write history
                train_loss = epoch_loss

        # Save Weights
        model_name = save_weights(model, args, epoch, optimizer)

        # Write History after train and validation phase
        write_history(
            args.history_path,
            model_name,
            train_loss,
            val_loss,
            train_accuracy,
            val_accuracy,
            train_f1,
            val_f1,
            train_precision,
            val_precision,
            train_recall,
            val_recall,
            train_confusion_matrix,
            val_confusion_matrix
        )

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, train_loss_history, val_loss_history, train_acc_history, val_acc_history, train_f1_score, val_f1_score, plot_epoch
Ejemplo n.º 2
0
    for i in xrange(len(gl)):
        gl[i] /= num_batches_u
    for i in xrange(len(cl)):
        cl[i] /= num_batches_u

    if (epoch >= anneal_lr_epoch) and (epoch % anneal_lr_every_epoch == 0):
        lr = lr*anneal_lr_factor
        cla_lr *= anneal_lr_factor_cla

    t = time.time() - start

    line = "*Epoch=%d Time=%.2f LR=%.5f\n" %(epoch, t, lr) + "DisLosses: " + str(dl)+"\nGenLosses: "+str(gl)+"\nInfLosses: "+str(il)+"\nClaLosses: "+str(cl)
    print line
    with open(logfile,'a') as f:
        f.write(line + "\n")

    # random generation for visualization
    if epoch % vis_epoch == 0:
        import  utils.paramgraphics as paramgraphics
        tail = '-'+str(epoch)+'.png'
        ran_y = np.int32(np.repeat(np.arange(num_classes), num_classes))
        x_gen = generate(ran_y)
        x_gen = x_gen.reshape((z_generated*num_classes,-1))
        image = paramgraphics.mat_to_img(x_gen.T, dim_input, colorImg=colorImg, scale=generation_scale, save_path=os.path.join(sample_path, 'sample'+tail))

    if epoch % 200 == 0:
        from utils.checkpoints import save_weights
        params = ll.get_all_params(dis_layers+[classifier,]+gen_layers+disxz_layers+inf_layers)
        save_weights(os.path.join(outfolder, 'model_epoch' + str(epoch) + '.npy'), params, None)
        save_weights(os.path.join(outfolder, 'average'+ str(epoch) +'.npy'), cla_param_avg, None)
Ejemplo n.º 3
0
def train(config):

    # Set random seed to ensure identical network initializations.
    # Note that cuDNN's convolutions are nondeterministic, so this
    # does not guarantee that two networks will behave identically.
    lasagne.random.set_rng(np.random.RandomState(1234))

    # Load config file
    config_module = imp.load_source('config', config.model_definition)
    cfg = config_module.cfg
    # Get model
    model = config_module.get_model()
    # Compile functions
    log(config.log_file, 'Compiling theano functions...')
    test_function, test_vars, model = make_test_function(cfg, model, config)
    tfuncs, tvars, model = make_training_functions(cfg, model, config)
    tfuncs.update(test_function)
    tvars.update(test_vars)

    weights = config.weights
    if weights == -1:
        start_epoch = 0
    else:
        ld = config.log_dir
        WEIGHTS = config.weights
        ckptfile = os.path.join(ld,
                                config.snapshot_prefix + str(WEIGHTS) + '.npz')
        log(config.log_file, 'Loaded weights.')
        start_epoch = WEIGHTS + 1
        ACC_LOGGER.load(
            (os.path.join(ld, "{}_acc_train_accuracy.csv".format(config.name)),
             os.path.join(ld, "{}_acc_eval_accuracy.csv".format(config.name))),
            epoch=WEIGHTS)
        LOSS_LOGGER.load(
            (os.path.join(ld, "{}_loss_train_loss.csv".format(config.name)),
             os.path.join(ld, '{}_loss_eval_loss.csv'.format(config.name))),
            epoch=WEIGHTS)
        metadata = checkpoints.load_weights(ckptfile, model['l_out'])

    itr = 0

    # Load data and shuffle training examples.
    # Note that this loads the entire dataset into RAM! If you don't
    # have a lot of RAM, consider only loading chunks of this at a time.
    log(config.log_file, 'Loading Data')
    x_test = np.load(os.path.join(config.data, 'test.npz'))['features']
    y_test = np.load(os.path.join(config.data, 'test.npz'))['targets']
    x = np.load(os.path.join(config.data, 'train.npz'))['features']
    # Seed the shuffle
    np.random.seed(42)
    # Define shuffle indices
    index = np.random.permutation(len(x))
    # Shuffle inputs
    x = x[index]
    # Shuffle targets to match inputs
    y = np.load(os.path.join(config.data, 'train.npz'))['targets'][index]
    # Define size of chunk to be loaded into GPU memory
    chunk_size = cfg['batch_size'] * cfg['batches_per_chunk']
    # Determine number of chunks
    num_chunks = int(math.ceil(len(y) / float(chunk_size)))
    # Get current learning rate
    new_lr = np.float32(tvars['learning_rate'].get_value())
    # Loop across training epochs!

    begin = start_epoch
    end = cfg['max_epochs'] + start_epoch
    log(config.log_file, 'Starting Training')
    for epoch in xrange(begin, end + 1):
        #EVAL
        evaluate(x_test, y_test, cfg, tfuncs, tvars, config, epoch=epoch)
        ACC_LOGGER.save(config.log_dir)
        LOSS_LOGGER.save(config.log_dir)
        ACC_LOGGER.plot(dest=config.log_dir)
        LOSS_LOGGER.plot(dest=config.log_dir)

        # Update Learning Rate
        if isinstance(cfg['learning_rate'], dict) and epoch > 0:
            if any(x == epoch for x in cfg['learning_rate'].keys()):
                lr = np.float32(tvars['learning_rate'].get_value())
                new_lr = cfg['learning_rate'][epoch]
                log(config.log_file,
                    'Changing learning rate from {} to {}'.format(lr, new_lr))
                tvars['learning_rate'].set_value(np.float32(new_lr))
        if cfg['decay_rate'] and epoch > 0:
            lr = np.float32(tvars['learning_rate'].get_value())
            new_lr = lr * (1 - cfg['decay_rate'])
            log(config.log_file,
                'Changing learning rate from {} to {}'.format(lr, new_lr))
            tvars['learning_rate'].set_value(np.float32(new_lr))

        # Loop across chunks!
        #for chunk_index in xrange(1):
        for chunk_index in xrange(num_chunks):
            # Define upper index of chunk to load
            # If you start doing complicated things with data loading, consider
            # wrapping all of this into its own little function.
            upper_range = min(len(y), (chunk_index + 1) * chunk_size)
            # Get current chunk
            x_shared = np.asarray(x[chunk_index *
                                    chunk_size:upper_range, :, :, :, :],
                                  dtype=np.float32)
            y_shared = np.asarray(y[chunk_index * chunk_size:upper_range],
                                  dtype=np.float32)
            # Get repeatable seed to shuffle jittered and unjittered instances within chunk.
            # Note that this seed varies between chunks, but will be constant across epochs.
            np.random.seed(chunk_index)
            # Get shuffled chunk indices for a second round of shuffling
            indices = np.random.permutation(2 * len(x_shared))
            # Get number of batches in this chunk
            num_batches = 2 * len(x_shared) // cfg['batch_size']

            # Combine data with jittered data, then shuffle and change binary range from {0,1} to {-1,3}, then load into GPU memory.
            tvars['X_shared'].set_value(4.0 * np.append(
                x_shared, jitter_chunk(x_shared, cfg,
                                       chunk_index), axis=0)[indices] - 1.0,
                                        borrow=True)
            tvars['y_shared'].set_value(np.append(y_shared, y_shared,
                                                  axis=0)[indices],
                                        borrow=True)

            lvs, accs = [], []
            # Loop across batches!
            for bi in xrange(num_batches):

                [classifier_loss, class_acc] = tfuncs['update_iter'](bi)

                # Record batch loss and accuracy
                lvs.append(classifier_loss)
                accs.append(class_acc)

                # Update iteration counter
                itr += 1
                if itr % max(config.train_log_frq / config.batch_size, 1) == 0:
                    [closs, c_acc
                     ] = [float(np.mean(lvs)), 1.0 - float(np.mean(accs))]
                    ACC_LOGGER.log(c_acc, epoch, "train_accuracy")
                    LOSS_LOGGER.log(closs, epoch, "train_loss")
                    lvs, accs = [], []
                    log(
                        config.log_file,
                        'TRAINING: epoch: {0:^3d}, itr: {1:d}, c_loss: {2:.6f}, class_acc: {3:.5f}'
                        .format(epoch, itr, closs, c_acc))

        if not (epoch % cfg['checkpoint_every_nth']) or epoch == end:
            weights_fname = os.path.join(config.log_dir,
                                         config.snapshot_prefix + str(epoch))
            checkpoints.save_weights(weights_fname, model['l_out'], {
                'itr': itr,
                'ts': time.time(),
                'learning_rate': new_lr
            })

    log(config.log_file, 'Training done')
Ejemplo n.º 4
0
def main(args):

    # Load config file
    config_module = imp.load_source('config', args.config_path)
    cfg = config_module.cfg

    # Define weights file name
    weights_fname = str(args.config_path)[:-3] + '.npz'

    # Define training metrics filename
    metrics_fname = weights_fname[:-4] + 'METRICS.jsonl'

    # Prepare Logs
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s| %(message)s')
    logging.info('Metrics will be saved to {}'.format(metrics_fname))
    mlog = metrics_logging.MetricsLogger(metrics_fname, reinitialize=True)

    # Get model and compile theano functions
    model = config_module.get_model()
    logging.info('Compiling theano functions...')
    tfuncs, tvars = make_training_functions(cfg, model)

    logging.info('Training...')

    # Iteration Counter. One iteration corresponds to one minibatch.
    itr = 0

    # Best true-positive rate
    best_tp = 0

    for epoch in xrange(cfg['max_epochs']):
        # Prepare data loader
        loader = (data_loader(cfg, args.train_file))

        # Update Learning Rate. Note that this version of the function does not support a decay rate;
        # See other training files in the discriminative section for this.
        if isinstance(cfg['learning_rate'], dict) and epoch > 0:
            if any(x == epoch for x in cfg['learning_rate'].keys()):
                lr = np.float32(tvars['learning_rate'].get_value())
                new_lr = cfg['learning_rate'][epoch]
                logging.info('Changing learning rate from {} to {}'.format(
                    lr, new_lr))
                tvars['learning_rate'].set_value(np.float32(new_lr))

        # Initialize epoch-wise chunk counter
        iter_counter = 0

        # Initialize Epoch-wise metrics
        vloss_e, floss_e, closs_e, d_kl_e, c_acc_e, acc_e = 0, 0, 0, 0, 0, 0

        # Train!
        for x_shared, y_shared in loader:  # Loop across chunks

            # Increment chunk counter
            iter_counter += 1

            # Determine number of batches in this chunk; this should only vary from
            # cfg['batches_per_chunk'] if we're at the end of the dataset.
            num_batches = len(x_shared) // cfg['batch_size']

            # Load chunk into memory
            tvars['X_shared'].set_value(x_shared, borrow=True)
            tvars['y_shared'].set_value(y_shared, borrow=True)

            # Initialize Chunk-wise metrics
            voxel_lvs,feature_lvs,class_lvs,kl_divs,class_accs,accs = [],[],[],[],[],[]

            for bi in xrange(num_batches):  # Loop across batches within chunk
                # Update!
                results = tfuncs['update_iter'](bi)

                # Assign results
                # This could definitely be done more cleanly with a list comprehension.
                voxel_loss = results[0]
                feature_loss = results[1] if cfg['introspect'] else 0
                classifier_loss = results[
                    1 + cfg['introspect']] if cfg['discriminative'] else 0
                kl_div = results[1 + cfg['introspect'] + cfg['discriminative']]
                class_acc = results[
                    2 + cfg['introspect'] +
                    cfg['discriminative']] if cfg['discriminative'] else 0
                acc = results[2 + cfg['introspect'] +
                              2 * cfg['discriminative']]

                # Append results to chunk-wise result list; these will be averaged later.
                voxel_lvs.append(voxel_loss)
                feature_lvs.append(feature_loss)
                class_lvs.append(classifier_loss)
                kl_divs.append(kl_div)
                class_accs.append(class_acc)
                accs.append(acc)

                # Increment batch counter
                itr += 1

            # Average metrics across chunk
            [vloss, floss, closs, d_kl, c_acc, acc] = [
                float(np.mean(voxel_lvs)),
                float(np.mean(feature_lvs)),
                float(np.mean(class_lvs)),
                float(np.mean(kl_divs)), 1.0 - float(np.mean(class_accs)),
                1.0 - float(np.mean(accs))
            ]

            # Update epoch-wise metrics
            vloss_e, floss_e, closs_e, d_kl_e, c_acc_e, acc_e = [
                vloss_e + vloss, floss_e + floss, closs_e + closs,
                d_kl_e + d_kl, c_acc_e + c_acc, acc_e + acc
            ]

            # Report and Log chunk-wise metrics
            logging.info(
                'epoch: {}, itr: {}, v_loss: {}, f_loss: {}, c_loss: {}, D_kl: {}, class_acc: {}, acc: {}'
                .format(epoch, itr, vloss, floss, closs, d_kl, c_acc, acc))
            mlog.log(epoch=epoch,
                     itr=itr,
                     vloss=vloss,
                     floss=floss,
                     acc=acc,
                     d_kl=d_kl,
                     c_acc=c_acc)

        # Average  metrics across epoch
        vloss_e, floss_e, closs_e, d_kl_e, c_acc_e, acc_e = [
            vloss_e / iter_counter, floss_e / iter_counter,
            closs_e / iter_counter, d_kl_e / iter_counter,
            c_acc_e / iter_counter, acc_e / iter_counter
        ]
        #  Report and log epoch-wise metrics
        logging.info(
            'Training metrics, Epoch {}, v_loss: {}, f_loss: {}, c_loss: {}, D_kl: {}, class_acc: {}, acc: {}'
            .format(epoch, vloss_e, floss_e, closs_e, d_kl_e, c_acc_e, acc_e))
        mlog.log(epoch=epoch,
                 vloss_e=vloss_e,
                 floss_e=floss_e,
                 closs_e=closs_e,
                 d_kl_e=d_kl_e,
                 c_acc_e=c_acc_e,
                 acc_e=acc_e)

        # Every Nth epoch, save weights
        if not (epoch % cfg['checkpoint_every_nth']):
            checkpoints.save_weights(weights_fname, model['l_out'], {
                'itr': itr,
                'ts': time.time()
            })

            # When training is complete, check test performance
            test_loader = test_data_loader(cfg, 'shapenet10_test_nr.tar')
            logging.info('Examining performance on test set')

            # Initialize test metrics
            test_error,test_class_error,latent_values,tp,tn = [],[],[],[],[]

            # Initialize true class array for 2D manifold plots
            true_class = np.array([], dtype=np.int)

            for x_shared, y_shared in test_loader:  # Loop across test chunks

                # Calculate number of batches
                num_batches = len(x_shared) // cfg['batch_size']

                # Load test chunk into memory
                tvars['X_shared'].set_value(x_shared, borrow=True)
                tvars['y_shared'].set_value(y_shared, borrow=True)

                # Update true class array for 2D Manifold Plots
                true_class = np.append(true_class, np.argmax(y_shared, axis=1))

                for bi in xrange(num_batches):  # Loop across minibatches

                    # Get test results
                    test_results = tfuncs['test_function'](bi)

                    # Assign test results
                    # This could be done more cleanly with a list comprehension
                    batch_test_error = test_results[0]
                    batch_test_class_error = test_results[1] if cfg[
                        'discriminative'] else 0
                    latents = test_results[1 + cfg['discriminative']]
                    batch_tp = test_results[2 + cfg['discriminative']]
                    batch_tn = test_results[3 + cfg['discriminative']]
                    test_error.append(batch_test_error)
                    test_class_error.append(batch_test_class_error)
                    latent_values.append(latents)
                    tp.append(batch_tp)
                    tn.append(batch_tn)

            # Average results
            t_error = 1 - float(np.mean(test_error))
            true_positives = float(np.mean(tp))
            true_negatives = float(np.mean(tn))
            t_class_error = 1 - float(np.mean(test_class_error))
            Zs = np.asarray(latent_values, np.float32)

            # Report and log results
            logging.info(
                'Test Accuracy: {}, Classification Test Accuracy: {}, True Positives: {}, True Negatives: {}'
                .format(t_error, t_class_error, true_positives,
                        true_negatives))
            mlog.log(test_error=t_error,
                     t_class_error=t_class_error,
                     true_positives=true_positives,
                     true_negatives=true_negatives)

            # Optionally plot and save 2D manifold if using only 2 latent variables.
            if np.shape(Zs)[2] == 2:
                Zs = np.reshape(Zs, (np.shape(Zs)[0] * np.shape(Zs)[1], 1, 2))
                ygnd = np.asarray(true_class, np.int)
                plt.scatter(Zs[:, 0, 0], Zs[:, 0, 1], s=30, c=ygnd, alpha=0.5)
                plt.savefig('figs/' + weights_fname[:-4] + str(epoch) + '.png')
                plt.clf()

    logging.info('training done')
    checkpoints.save_weights(weights_fname, model['l_out'], {
        'itr': itr,
        'ts': time.time()
    })
def main(args):

    # Load config file
    config_module = imp.load_source('config', args.config_path)
    cfg = config_module.cfg
   
    # Define weights file name
    weights_fname = str(args.config_path)[:-3]+'.npz'
    
    # Define training metrics filename
    metrics_fname = weights_fname[:-4]+'METRICS.jsonl'
    
    # Prepare Logs
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s| %(message)s')
    logging.info('Metrics will be saved to {}'.format(metrics_fname))
    mlog = metrics_logging.MetricsLogger(metrics_fname, reinitialize=True)
    
    # Get model and compile theano functions
    model = config_module.get_model()
    logging.info('Compiling theano functions...')
    tfuncs, tvars = make_training_functions(cfg,model)

    logging.info('Training...')
    
    # Iteration Counter. One iteration corresponds to one minibatch.
    itr = 0
    
    # Best true-positive rate
    best_tp = 0
    

    for epoch in xrange(cfg['max_epochs']):
        # Prepare data loader
        loader = (data_loader(cfg,args.train_file))
        
        # Update Learning Rate. Note that this version of the function does not support a decay rate;
        # See other training files in the discriminative section for this.
        if isinstance(cfg['learning_rate'], dict) and epoch > 0:
            if any(x==epoch for x in cfg['learning_rate'].keys()):
                lr = np.float32(tvars['learning_rate'].get_value())
                new_lr = cfg['learning_rate'][epoch]
                logging.info('Changing learning rate from {} to {}'.format(lr, new_lr))
                tvars['learning_rate'].set_value(np.float32(new_lr))
        
        # Initialize epoch-wise chunk counter
        iter_counter = 0;
        
        # Initialize Epoch-wise metrics
        vloss_e, floss_e, closs_e, d_kl_e, c_acc_e, acc_e = 0, 0, 0, 0, 0, 0 

        # Train!
        for x_shared, y_shared in loader: # Loop across chunks
            
            # Increment chunk counter
            iter_counter+=1
            
            # Determine number of batches in this chunk; this should only vary from
            # cfg['batches_per_chunk'] if we're at the end of the dataset.
            num_batches = len(x_shared)//cfg['batch_size']
            
            # Load chunk into memory
            tvars['X_shared'].set_value(x_shared, borrow=True)
            tvars['y_shared'].set_value(y_shared, borrow=True)
            
            # Initialize Chunk-wise metrics
            voxel_lvs,feature_lvs,class_lvs,kl_divs,class_accs,accs = [],[],[],[],[],[]            
            
            for bi in xrange(num_batches): # Loop across batches within chunk
                # Update!
                results = tfuncs['update_iter'](bi)
                
                # Assign results
                # This could definitely be done more cleanly with a list comprehension.
                voxel_loss = results[0]
                feature_loss = results[1] if cfg['introspect'] else 0 
                classifier_loss = results[1+cfg['introspect']] if cfg['discriminative'] else 0
                kl_div = results[1+cfg['introspect']+cfg['discriminative']]
                class_acc = results[2+cfg['introspect']+cfg['discriminative']] if cfg['discriminative'] else 0
                acc = results[2+cfg['introspect']+2*cfg['discriminative']]
               
                # Append results to chunk-wise result list; these will be averaged later.
                voxel_lvs.append(voxel_loss)
                feature_lvs.append(feature_loss)
                class_lvs.append(classifier_loss)
                kl_divs.append(kl_div)
                class_accs.append(class_acc)
                accs.append(acc)

                # Increment batch counter
                itr += 1
                
            # Average metrics across chunk
            [vloss, floss,closs, d_kl,c_acc,acc] = [float(np.mean(voxel_lvs)), float(np.mean(feature_lvs)),
                                                    float(np.mean(class_lvs)), float(np.mean(kl_divs)),
                                                    1.0-float(np.mean(class_accs)), 1.0-float(np.mean(accs))]
            
            # Update epoch-wise metrics                                                 
            vloss_e, floss_e, closs_e, d_kl_e, c_acc_e, acc_e = [vloss_e+vloss, floss_e+floss, closs_e+closs, d_kl_e+d_kl, c_acc_e+c_acc, acc_e+acc] 
            
            # Report and Log chunk-wise metrics  
            logging.info('epoch: {}, itr: {}, v_loss: {}, f_loss: {}, c_loss: {}, D_kl: {}, class_acc: {}, acc: {}'.format(epoch, itr, vloss, floss,
                                                                                                                           closs, d_kl, c_acc, acc))
            mlog.log(epoch=epoch, itr=itr, vloss=vloss,floss=floss, acc=acc,d_kl=d_kl,c_acc=c_acc)
        
        # Average  metrics across epoch
        vloss_e, floss_e, closs_e, d_kl_e, c_acc_e, acc_e = [vloss_e/iter_counter, floss_e/iter_counter, 
                                                             closs_e/iter_counter, d_kl_e/iter_counter,
                                                             c_acc_e/iter_counter, acc_e/iter_counter]
        #  Report and log epoch-wise metrics                                                    
        logging.info('Training metrics, Epoch {}, v_loss: {}, f_loss: {}, c_loss: {}, D_kl: {}, class_acc: {}, acc: {}'.format(epoch, vloss_e, floss_e,closs_e,d_kl_e,c_acc_e,acc_e))
        mlog.log(epoch=epoch, vloss_e=vloss_e, floss_e=floss_e, closs_e=closs_e, d_kl_e=d_kl_e, c_acc_e=c_acc_e, acc_e=acc_e)
        
        # Every Nth epoch, save weights
        if not (epoch%cfg['checkpoint_every_nth']):
            checkpoints.save_weights(weights_fname, model['l_out'],
                                            {'itr': itr, 'ts': time.time()})

    
    # When training is complete, check test performance
            test_loader = test_data_loader(cfg,'shapenet10_test_nr.tar')
            logging.info('Examining performance on test set')
            
            # Initialize test metrics
            test_error,test_class_error,latent_values,tp,tn = [],[],[],[],[]
             
            # Initialize true class array for 2D manifold plots
            true_class = np.array([],dtype=np.int)
            
            for x_shared,y_shared in test_loader: # Loop across test chunks
                
                # Calculate number of batches
                num_batches = len(x_shared)//cfg['batch_size']
                
                # Load test chunk into memory
                tvars['X_shared'].set_value(x_shared, borrow=True)
                tvars['y_shared'].set_value(y_shared, borrow=True)
                
                # Update true class array for 2D Manifold Plots
                true_class = np.append(true_class,np.argmax(y_shared,axis=1))
                
                for bi in xrange(num_batches): # Loop across minibatches
                
                    # Get test results
                    test_results = tfuncs['test_function'](bi)
                    
                    # Assign test results
                    # This could be done more cleanly with a list comprehension
                    batch_test_error=test_results[0]
                    batch_test_class_error = test_results[1] if cfg['discriminative'] else 0
                    latents = test_results[1+cfg['discriminative']]
                    batch_tp = test_results[2+cfg['discriminative']]
                    batch_tn = test_results[3+cfg['discriminative']]
                    test_error.append(batch_test_error)
                    test_class_error.append(batch_test_class_error)
                    latent_values.append(latents)
                    tp.append(batch_tp)
                    tn.append(batch_tn)
                    
            # Average results        
            t_error = 1-float(np.mean(test_error))
            true_positives = float(np.mean(tp))
            true_negatives = float(np.mean(tn))
            t_class_error = 1-float(np.mean(test_class_error))
            Zs = np.asarray(latent_values,np.float32)        
            
            # Report and log results
            logging.info('Test Accuracy: {}, Classification Test Accuracy: {}, True Positives: {}, True Negatives: {}'.format(t_error,t_class_error,true_positives,true_negatives))
            mlog.log(test_error=t_error,t_class_error = t_class_error,true_positives=true_positives,true_negatives=true_negatives)

            # Optionally plot and save 2D manifold if using only 2 latent variables.
            if np.shape(Zs)[2]==2:
                Zs = np.reshape(Zs,(np.shape(Zs)[0]*np.shape(Zs)[1],1,2))
                ygnd = np.asarray(true_class,np.int)
                plt.scatter(Zs[:,0,0],Zs[:,0,1],s = 30, c=ygnd,alpha = 0.5)
                plt.savefig('figs/'+weights_fname[:-4]+str(epoch)+'.png')
                plt.clf()

    
    logging.info('training done')
    checkpoints.save_weights(weights_fname, model['l_out'],
                                    {'itr': itr, 'ts': time.time()})
def main(args):

    # Set random seed to ensure identical network initializations.
    # Note that cuDNN's convolutions are nondeterministic, so this
    # does not guarantee that two networks will behave identically.
    lasagne.random.set_rng(np.random.RandomState(1234))
    
    # Load config file
    config_module = imp.load_source('config', args.config_path)
    cfg = config_module.cfg
   
    # Get weights and metrics filename
    weights_fname =str(args.config_path)[:-3]+'.npz'
    
    metrics_fname = weights_fname[:-4]+'METRICS.jsonl'
    
    # Prepare logs
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s| %(message)s')
    logging.info('Metrics will be saved to {}'.format(metrics_fname))
    mlog = metrics_logging.MetricsLogger(metrics_fname, reinitialize=(not args.resume))
    
    # Get model
    model = config_module.get_model()
    
    # Compile functions
    logging.info('Compiling theano functions...')
    tfuncs, tvars,model = make_training_functions(cfg,model)
    
    # Resume training if file exists and you turn on the resume tag
    if os.path.isfile(weights_fname) and args.resume:
        print('loading weights')
        metadata = checkpoints.load_weights(weights_fname, model['l_out'])
   
    # GPU Memory Info; currently not implemented, but you can potentially
    # use this information to monitor GPU memory useage.
    baseGPUmem = sbcuda.cuda_ndarray.cuda_ndarray.mem_info()[0]/1024./1024/1024  
    
    # Training loop
    logging.info('Training...')
    itr = 0
    
    # Load data and shuffle training examples. 
    # Note that this loads the entire dataset into RAM! If you don't
    # have a lot of RAM, consider only loading chunks of this at a time.
    
    x = np.load(args.data_path)['features']
    
    # Seed the shuffle
    np.random.seed(42)
    
    # Define shuffle indices
    index = np.random.permutation(len(x))
    
    # Shuffle inputs
    x = x[index]
    
    # Shuffle targets to match inputs
    y = np.load(args.data_path)['targets'][index]

    # Define size of chunk to be loaded into GPU memory
    chunk_size = cfg['batch_size']*cfg['batches_per_chunk']
    
    # Determine number of chunks
    num_chunks = int(math.ceil(len(y)/float(chunk_size)))
    
    # Get current learning rate
    new_lr = np.float32(tvars['learning_rate'].get_value())
    
    # Loop across training epochs!
    for epoch in xrange(cfg['max_epochs']):
        
        # Tic
        epoch_start_time = time.time()
       
       # Update Learning Rate
        if isinstance(cfg['learning_rate'], dict) and epoch > 0:
            if any(x==epoch for x in cfg['learning_rate'].keys()):
                lr = np.float32(tvars['learning_rate'].get_value())
                new_lr = cfg['learning_rate'][epoch]
                logging.info('Changing learning rate from {} to {}'.format(lr, new_lr))
                tvars['learning_rate'].set_value(np.float32(new_lr))
        if cfg['decay_rate'] and epoch > 0:
            lr = np.float32(tvars['learning_rate'].get_value())
            new_lr = lr*(1-cfg['decay_rate'])
            logging.info('Changing learning rate from {} to {}'.format(lr, new_lr))
            tvars['learning_rate'].set_value(np.float32(new_lr))         
        
        # Loop across chunks!
        for chunk_index in xrange(num_chunks):
        
            # Define upper index of chunk to load
            # If you start doing complicated things with data loading, consider 
            # wrapping all of this into its own little function.
            upper_range = min(len(y),(chunk_index+1)*chunk_size)

            # Get current chunk
            x_shared = np.asarray(x[chunk_index*chunk_size:upper_range,:,:,:,:],dtype=np.float32)
            y_shared = np.asarray(y[chunk_index*chunk_size:upper_range],dtype=np.float32)
            
            # Get repeatable seed to shuffle jittered and unjittered instances within chunk.
            # Note that this seed varies between chunks, but will be constant across epochs.
            np.random.seed(chunk_index)
            
            # Get shuffled chunk indices for a second round of shuffling
            indices = np.random.permutation(2*len(x_shared))
            
            # Get number of batches in this chunk
            num_batches = 2*len(x_shared)//cfg['batch_size']
            
            # Combine data with jittered data, then shuffle and change binary range from {0,1} to {-1,3}, then load into GPU memory.
            tvars['X_shared'].set_value(4.0 * np.append(x_shared,jitter_chunk(x_shared, cfg,chunk_index),axis=0)[indices]-1.0, borrow=True)
            tvars['y_shared'].set_value(np.append(y_shared,y_shared,axis=0)[indices], borrow=True)
            
            # Prepare loss values
            lvs, accs = [],[]
            
            # Loop across batches!
            for bi in xrange(num_batches):
                
                # Train!
                [classifier_loss,class_acc] = tfuncs['update_iter'](bi)
                
                # Record batch loss and accuracy
                lvs.append(classifier_loss)
                accs.append(class_acc)
                
                # Update iteration counter
                itr += 1
            
            # Average losses and accuracies across chunk
            [closs,c_acc] = [float(np.mean(lvs)),1.0-float(np.mean(accs))]

            # Report and log losses and accuracies
            logging.info('epoch: {0:^3d}, itr: {1:d}, c_loss: {2:.6f}, class_acc: {3:.5f}'.format(epoch, itr, closs, c_acc))
            mlog.log(epoch=epoch, itr=itr, closs=closs,c_acc=c_acc)

        # Every Nth epoch, save weights
        if not (epoch%cfg['checkpoint_every_nth']):
            checkpoints.save_weights(weights_fname, model['l_out'],
                                                {'itr': itr, 'ts': time.time(),
                                                'learning_rate': new_lr}) 
        


    logging.info('training done')
Ejemplo n.º 7
0
def main(args):

    # Set random seed to ensure identical network initializations.
    # Note that cuDNN's convolutions are nondeterministic, so this
    # does not guarantee that two networks will behave identically.
    lasagne.random.set_rng(np.random.RandomState(1234))

    # Load config file
    config_module = imp.load_source('config', args.config_path)
    cfg = config_module.cfg

    # Get weights and metrics filename
    weights_fname = str(args.config_path)[:-3] + '.npz'

    metrics_fname = weights_fname[:-4] + 'METRICS.jsonl'

    # Prepare logs
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s| %(message)s')
    logging.info('Metrics will be saved to {}'.format(metrics_fname))
    mlog = metrics_logging.MetricsLogger(metrics_fname,
                                         reinitialize=(not args.resume))

    # Get model
    model = config_module.get_model()

    # Compile functions
    logging.info('Compiling theano functions...')
    tfuncs, tvars, model = make_training_functions(cfg, model)

    # Resume training if file exists and you turn on the resume tag
    if os.path.isfile(weights_fname) and args.resume:
        print('loading weights')
        metadata = checkpoints.load_weights(weights_fname, model['l_out'])

    # GPU Memory Info; currently not implemented, but you can potentially
    # use this information to monitor GPU memory useage.
    baseGPUmem = sbcuda.cuda_ndarray.cuda_ndarray.mem_info(
    )[0] / 1024. / 1024 / 1024

    # Training loop
    logging.info('Training...')
    itr = 0

    # Load data and shuffle training examples.
    # Note that this loads the entire dataset into RAM! If you don't
    # have a lot of RAM, consider only loading chunks of this at a time.

    x = np.load(args.data_path)['features']

    # Seed the shuffle
    np.random.seed(42)

    # Define shuffle indices
    index = np.random.permutation(len(x))

    # Shuffle inputs
    x = x[index]

    # Shuffle targets to match inputs
    y = np.load(args.data_path)['targets'][index]

    # Define size of chunk to be loaded into GPU memory
    chunk_size = cfg['batch_size'] * cfg['batches_per_chunk']

    # Determine number of chunks
    num_chunks = int(math.ceil(len(y) / float(chunk_size)))

    # Get current learning rate
    new_lr = np.float32(tvars['learning_rate'].get_value())

    # Loop across training epochs!
    for epoch in xrange(cfg['max_epochs']):

        # Tic
        epoch_start_time = time.time()

        # Update Learning Rate
        if isinstance(cfg['learning_rate'], dict) and epoch > 0:
            if any(x == epoch for x in cfg['learning_rate'].keys()):
                lr = np.float32(tvars['learning_rate'].get_value())
                new_lr = cfg['learning_rate'][epoch]
                logging.info('Changing learning rate from {} to {}'.format(
                    lr, new_lr))
                tvars['learning_rate'].set_value(np.float32(new_lr))
        if cfg['decay_rate'] and epoch > 0:
            lr = np.float32(tvars['learning_rate'].get_value())
            new_lr = lr * (1 - cfg['decay_rate'])
            logging.info('Changing learning rate from {} to {}'.format(
                lr, new_lr))
            tvars['learning_rate'].set_value(np.float32(new_lr))

        # Loop across chunks!
        for chunk_index in xrange(num_chunks):

            # Define upper index of chunk to load
            # If you start doing complicated things with data loading, consider
            # wrapping all of this into its own little function.
            upper_range = min(len(y), (chunk_index + 1) * chunk_size)

            # Get current chunk
            x_shared = np.asarray(x[chunk_index *
                                    chunk_size:upper_range, :, :, :, :],
                                  dtype=np.float32)
            y_shared = np.asarray(y[chunk_index * chunk_size:upper_range],
                                  dtype=np.float32)

            # Get repeatable seed to shuffle jittered and unjittered instances within chunk.
            # Note that this seed varies between chunks, but will be constant across epochs.
            np.random.seed(chunk_index)

            # Get shuffled chunk indices for a second round of shuffling
            indices = np.random.permutation(2 * len(x_shared))

            # Get number of batches in this chunk
            num_batches = 2 * len(x_shared) // cfg['batch_size']

            # Combine data with jittered data, then shuffle and change binary range from {0,1} to {-1,3}, then load into GPU memory.
            tvars['X_shared'].set_value(4.0 * np.append(
                x_shared, jitter_chunk(x_shared, cfg,
                                       chunk_index), axis=0)[indices] - 1.0,
                                        borrow=True)
            tvars['y_shared'].set_value(np.append(y_shared, y_shared,
                                                  axis=0)[indices],
                                        borrow=True)

            # Prepare loss values
            lvs, accs = [], []

            # Loop across batches!
            for bi in xrange(num_batches):

                # Train!
                [classifier_loss, class_acc] = tfuncs['update_iter'](bi)

                # Record batch loss and accuracy
                lvs.append(classifier_loss)
                accs.append(class_acc)

                # Update iteration counter
                itr += 1

            # Average losses and accuracies across chunk
            [closs, c_acc] = [float(np.mean(lvs)), 1.0 - float(np.mean(accs))]

            # Report and log losses and accuracies
            logging.info(
                'epoch: {0:^3d}, itr: {1:d}, c_loss: {2:.6f}, class_acc: {3:.5f}'
                .format(epoch, itr, closs, c_acc))
            mlog.log(epoch=epoch, itr=itr, closs=closs, c_acc=c_acc)

        # Every Nth epoch, save weights
        if not (epoch % cfg['checkpoint_every_nth']):
            checkpoints.save_weights(weights_fname, model['l_out'], {
                'itr': itr,
                'ts': time.time(),
                'learning_rate': new_lr
            })

    logging.info('training done')