예제 #1
0
def main(params, network_params):
    print('\n\n----------------------------------------------------------------------------------------------------')
    print('----------------------------------- Splitting data into {:0>2d} folds -----------------------------------'.format(params['nb_folds']))
    print('----------------------------------------------------------------------------------------------------')
    data_splitter_cls = data_splitter(data_dir=params['data_dir'], win_len=params['win_len'], hop_len=params['hop_len'], seq_len=params['seq_len'], seq_hop_len=params['seq_hop_len'], nb_channels=params['nb_channels'], nb_classes=params['nb_classes'], validate=False, nb_folds=params['nb_folds'], silent=True)
    data_splitter_cls.perform()
    Path(os.path.join(params['log_dir'])).mkdir(parents=True, exist_ok=True)
    Path(os.path.join(params['results_dir'])).mkdir(parents=True, exist_ok=True)
    acc = np.zeros((params['nb_folds'],1))
    sens = np.zeros((params['nb_folds'],1))
    spec = np.zeros((params['nb_folds'],1))
    for fold_cnt in range(params['nb_folds']):
        print('\n\n----------------------------------------------------------------------------------------------------')
        print('------------------------------------------    fold: {:0>2d}    ------------------------------------------'.format(fold_cnt+1))
        print('----------------------------------------------------------------------------------------------------')
        model_name = os.path.join(params['log_dir'],'fold_{}_best_model.h5'.format(fold_cnt+1))
        csv_logger = CSVLogger(filename=os.path.join(params['log_dir'],'fold_{}_log.csv'.format(fold_cnt+1)))
        early_stopper = EarlyStopping(monitor='loss', min_delta=0, mode='min')
        data_splitter_cls.noseq_prepare(fold_n=fold_cnt, run_type='train')
        data_splitter_cls.noseq_prepare(fold_n=fold_cnt, run_type='val')
        data_splitter_cls.noseq_prepare(fold_n=fold_cnt, run_type='test')
        print('Loading training dataset:')
        train_data_gen = DataGenerator_noseq(data_dir=params['data_dir'], shuffle=False, win_len=params['win_len'], hop_len=params['hop_len'], nb_channels=params['nb_channels'], nb_classes=params['nb_classes'], n_fold=fold_cnt, batch_size=params['batch_size'], run_type='train')
        val_data_gen = DataGenerator_noseq(data_dir=params['data_dir'], shuffle=False, win_len=params['win_len'], hop_len=params['hop_len'], nb_channels=params['nb_channels'], nb_classes=params['nb_classes'], n_fold=fold_cnt, batch_size=params['batch_size'], run_type='val')
        test_data_gen = DataGenerator_noseq(data_dir=params['data_dir'], shuffle=False, win_len=params['win_len'], hop_len=params['hop_len'], nb_channels=params['nb_channels'], nb_classes=params['nb_classes'], n_fold=fold_cnt, batch_size=params['batch_size'], run_type='test')
        model = get_fcResVGG16model(params, network_params)
        hist = model.fit(x=train_data_gen, validation_data=val_data_gen, epochs=params['nb_epochs'], workers=10, use_multiprocessing=True, callbacks=[csv_logger]) #early_stopper,csv_logger])
        model.save(model_name)

        print('\nLoading the best model and predicting results on the testing data')
        model = load_model(model_name)
        pred_test = model.predict(x=test_data_gen, workers=2, use_multiprocessing=False)
        pred_test_logits = pred_test
        pred_test = pred_test>0.5
        test_h5 = h5py.File(os.path.join(test_data_gen._data_dir, 'folds_h5', '{}_metadata_{}.hdf5'.format('test', fold_cnt+1)))
        ref_test = np.asarray(test_h5['frame_labels'])
        test_h5.close()
        Path(os.path.join(params['results_dir'], 'test_val')).mkdir(parents=True, exist_ok=True)
        Path(os.path.join(params['results_dir'], 'test_val_logits')).mkdir(parents=True, exist_ok=True)
        Path(os.path.join(params['results_dir'], 'ref_val')).mkdir(parents=True, exist_ok=True)
        np.save(os.path.join(params['results_dir'], 'test_val', 'pred_fold_{:0>2d}.npy'.format(fold_cnt+1)), pred_test)
        np.save(os.path.join(params['results_dir'], 'test_val_logits', 'logits_fold_{:0>2d}.npy'.format(fold_cnt+1)), pred_test_logits)
        np.save(os.path.join(params['results_dir'], 'ref_val', 'ref_fold_{:0>2d}.npy'.format(fold_cnt+1)), ref_test)
        cmatrix = confusion_matrix(ref_test[:pred_test.shape[0]], pred_test, labels=[0,1])
        acc[fold_cnt] = (cmatrix[0,0]+cmatrix[1,1])/np.sum(np.sum(cmatrix))
        if cmatrix[0,0]==0 or cmatrix[1,1]==0:
            sens[fold_cnt] = 1
            spec[fold_cnt] = 1
        else:
            sens[fold_cnt] = cmatrix[0,0]/(cmatrix[0,0]+cmatrix[0,1])
            spec[fold_cnt] = cmatrix[1,1]/(cmatrix[1,0]+cmatrix[1,1])
    np.save(os.path.join(params['results_dir'], 'qmeasures_acc.npy'), acc)
    np.save(os.path.join(params['results_dir'], 'qmeasures_sens.npy'), sens)
    np.save(os.path.join(params['results_dir'], 'qmeasures_spec.npy'), spec)
예제 #2
0
def main(params, network_params):
    print('\n\n----------------------------------------------------------------------------------------------------')
    print('----------------------------------- Splitting data into {:0>2d} folds -----------------------------------'.format(params['nb_folds']))
    print('----------------------------------------------------------------------------------------------------')
    evaluation_cls = evaluation_metrics(results_dir=params['results_dir'], win_len=params['win_len'], hop_len=params['hop_len'], seq_len=params['seq_len'], seq_hop_len=params['seq_hop_len'], nb_channels=params['nb_channels'], nb_classes=params['nb_classes'], silent=True)
    data_splitter_cls = data_splitter(data_dir=params['data_dir'], win_len=params['win_len'], hop_len=params['hop_len'], seq_len=params['seq_len'], seq_hop_len=params['seq_hop_len'], nb_channels=params['nb_channels'], nb_classes=params['nb_classes'], validate=False, nb_folds=params['nb_folds'], silent=True)
    data_splitter_cls.perform()
    Path(os.path.join(params['log_dir'])).mkdir(parents=True, exist_ok=True)
    Path(os.path.join(params['results_dir'])).mkdir(parents=True, exist_ok=True)
    for fold_cnt in range(params['nb_folds']):
        print('\n\n----------------------------------------------------------------------------------------------------')
        print('------------------------------------------    fold: {:0>2d}    ------------------------------------------'.format(fold_cnt+1))
        print('----------------------------------------------------------------------------------------------------')
        model_name = os.path.join(params['log_dir'],'fold_{}_best_model.h5'.format(fold_cnt+1))
        csv_logger = CSVLogger(filename=os.path.join(params['log_dir'],'fold_{}_log.csv'.format(fold_cnt+1)))
        early_stopper = EarlyStopping(monitor='loss', min_delta=0, mode='min')
        data_splitter_cls.prepare(fold_n=fold_cnt, run_type='train')
        data_splitter_cls.prepare(fold_n=fold_cnt, run_type='val')
        data_splitter_cls.prepare(fold_n=fold_cnt, run_type='test')
        print('Loading training dataset:')
        train_data_gen = DataGenerator(data_dir=params['data_dir'], shuffle=False, win_len=params['win_len'], hop_len=params['hop_len'], seq_len=params['seq_len'], seq_hop_len=params['seq_hop_len'], nb_channels=params['nb_channels'], nb_classes=params['nb_classes'], n_fold=fold_cnt, batch_size=params['batch_size'], run_type='train')
        val_data_gen = DataGenerator(data_dir=params['data_dir'], shuffle=False, win_len=params['win_len'], hop_len=params['hop_len'], seq_len=params['seq_len'], seq_hop_len=params['seq_hop_len'], nb_channels=params['nb_channels'], nb_classes=params['nb_classes'], n_fold=fold_cnt, batch_size=params['batch_size'], run_type='val')
        test_data_gen = DataGenerator(data_dir=params['data_dir'], shuffle=False, win_len=params['win_len'], hop_len=params['hop_len'], seq_len=params['seq_len'], seq_hop_len=params['seq_hop_len'], nb_channels=params['nb_channels'], nb_classes=params['nb_classes'], n_fold=fold_cnt, batch_size=params['batch_size'], run_type='test')
        model = get_ssVGG16model(params, network_params)
        #for epoch_cnt in range(params['nb_epochs']):
            #print('Epoch No. {}'.format(epoch_cnt+1))
        hist = model.fit(x=train_data_gen, validation_data=val_data_gen, epochs=params['nb_epochs'], workers=10, use_multiprocessing=True, callbacks=[csv_logger]) #early_stopper,csv_logger])
        model.save(model_name)

        print('\nLoading the best model and predicting results on the testing data')
        model = load_model(model_name)
        pred_test = model.predict(x=test_data_gen, workers=2, use_multiprocessing=False)
        pred_test_logits = pred_test
        pred_test = pred_test>0.5
        test_data_records = pd.read_csv(os.path.join(test_data_gen._data_dir, 'folds_metadata', '{}_metadata_{}.csv'.format(test_data_gen._run_type, fold_cnt+1)))
        test_h5 = h5py.File(os.path.join(test_data_gen._data_dir, 'folds_h5', '{}_metadata_{}.hdf5'.format('test', fold_cnt+1)))
        ref_test = np.asarray(test_h5['seq_labels'])
        test_h5.close()
        Path(os.path.join(params['results_dir'], 'test_val_fold{:0>2d}'.format(fold_cnt+1))).mkdir(parents=True, exist_ok=True)
        Path(os.path.join(params['results_dir'], 'test_val_logits_fold{:0>2d}'.format(fold_cnt+1))).mkdir(parents=True, exist_ok=True)
        Path(os.path.join(params['results_dir'], 'ref_val_fold{:0>2d}'.format(fold_cnt+1))).mkdir(parents=True, exist_ok=True)
        evaluation_cls.combine_seqlogits(pred_test,test_data_records,run_type='test',fold_n=fold_cnt)
        evaluation_cls.combine_seqlogits(ref_test,test_data_records,run_type='ref',fold_n=fold_cnt)
        evaluation_cls.combine_seqlogitspro(pred_test_logits,test_data_records,fold_n=fold_cnt)
        nb_test_files = os.listdir(os.path.join(params['results_dir'], 'test_val_fold{:0>2d}'.format(fold_cnt+1)))
        acc = np.zeros((len(nb_test_files),1))
        sens = np.zeros((len(nb_test_files),1))
        spec = np.zeros((len(nb_test_files),1))
        fold_clogits = []
        fold_cgtruth = []
        print('Model evaluation for fold#{}:'.format(fold_cnt+1))
        for testfile_cnt, testfile in enumerate(nb_test_files):
            print('Evaluating segmentation of file# {}: {}'.format(testfile_cnt+1, testfile))
            acc[testfile_cnt], sens[testfile_cnt], spec[testfile_cnt] = evaluation_cls.evaluate_sequences(np.load(os.path.join(params['results_dir'], 'ref_val_fold{:0>2d}'.format(fold_cnt+1), testfile)),np.load(os.path.join(params['results_dir'], 'test_val_fold{:0>2d}'.format(fold_cnt+1), testfile)))
            fold_clogits = np.concatenate((fold_clogits, np.load(os.path.join(params['results_dir'], 'test_val_logits_fold{:0>2d}'.format(fold_cnt+1), testfile))))
            fold_cgtruth = np.concatenate((fold_cgtruth, np.load(os.path.join(params['results_dir'], 'ref_val_fold{:0>2d}'.format(fold_cnt+1), testfile))))
        np.save(os.path.join(params['results_dir'], 'fold_{:0>2d}_aggregate_logits.npy'.format(fold_cnt+1)), fold_clogits)
        np.save(os.path.join(params['results_dir'], 'fold_{:0>2d}_aggregate_gtruth.npy'.format(fold_cnt+1)), fold_cgtruth)
        np.save(os.path.join(params['results_dir'], 'fold_{:0>2d}_qmeasures_acc.npy'.format(fold_cnt+1)), acc)
        np.save(os.path.join(params['results_dir'], 'fold_{:0>2d}_qmeasures_sens.npy'.format(fold_cnt+1)), sens)
        np.save(os.path.join(params['results_dir'], 'fold_{:0>2d}_qmeasures_spec.npy'.format(fold_cnt+1)), spec)