Esempio n. 1
0
    print('Total elapsed time (prediction): %f' % (time() - start_time))

    # Postprocessing
    # --------------
    y_pred = y_pred * y_norm + y_mean

    # Calculate prediction accuracy
    # -----------------------------
    print('Prediction accuracy')

    start_time = time()

    y_pred_2d = y_pred.reshape([y_pred.shape[0], -1])
    y_true_2d = y.reshape([y.shape[0], -1])

    y_true_2d = get_refdata(y_true_2d, np.array(y_labels), x_labels_unique)

    n_units = y_true_2d.shape[1]

    accuracy = np.array([
        np.corrcoef(y_pred_2d[:, i].flatten(), y_true_2d[:, i].flatten())[0, 1]
        for i in range(n_units)
    ])
    accuracy = accuracy.reshape((1, ) + y_pred.shape[1:])

    print('Total elapsed time (prediction accuracy): %f' %
          (time() - start_time))

    # Save results
    # ------------
    print('Saving results')
Esempio n. 2
0
def main():
    # Settings ---------------------------------------------------------

    # Data settings
    subjects = config.subjects
    rois = config.rois
    num_voxel = config.num_voxel

    image_feature = config.image_feature_file
    features = config.features

    n_iter = 200

    results_dir = config.results_dir

    # Misc settings
    analysis_basename = os.path.basename(__file__)

    # Load data --------------------------------------------------------
    print('----------------------------------------')
    print('Loading data')

    data_all = {}
    for sbj in subjects:
        if len(subjects[sbj]) == 1:
            data_all[sbj] = bdpy.BData(subjects[sbj][0])
        else:
            # Concatenate data
            suc_cols = ['Run', 'Block']
            data_all[sbj] = concat_dataset(
                [bdpy.BData(f) for f in subjects[sbj]], successive=suc_cols)

    data_feature = bdpy.BData(image_feature)

    # Add any additional processing to data here

    # Initialize directories -------------------------------------------
    makedir_ifnot(results_dir)
    makedir_ifnot('tmp')

    # Analysis loop ----------------------------------------------------
    print('----------------------------------------')
    print('Analysis loop')

    for sbj, roi, feat in product(subjects, rois, features):
        print('--------------------')
        print('Subject:    %s' % sbj)
        print('ROI:        %s' % roi)
        print('Num voxels: %d' % num_voxel[roi])
        print('Feature:    %s' % feat)

        # Distributed computation
        analysis_id = analysis_basename + '-' + sbj + '-' + roi + '-' + feat
        results_file = os.path.join(results_dir, analysis_id + '.pkl')

        if os.path.exists(results_file):
            print('%s is already done. Skipped.' % analysis_id)
            continue

        dist = DistComp(lockdir='tmp', comp_id=analysis_id)
        if dist.islocked():
            print('%s is already running. Skipped.' % analysis_id)
            continue

        dist.lock()

        # Prepare data
        print('Preparing data')
        dat = data_all[sbj]

        x = dat.select(rois[roi])  # Brain data
        datatype = dat.select('DataType')  # Data type
        labels = dat.select('stimulus_id')  # Image labels in brain data

        y = data_feature.select(feat)  # Image features
        y_label = data_feature.select('ImageID')  # Image labels

        # For quick demo, reduce the number of units from 1000 to 100
        y = y[:, :100]

        y_sorted = get_refdata(
            y, y_label, labels)  # Image features corresponding to brain data

        # Get training and test dataset
        i_train = (datatype == 1).flatten()  # Index for training
        i_test_pt = (datatype == 2).flatten()  # Index for perception test
        i_test_im = (datatype == 3).flatten()  # Index for imagery test
        i_test = i_test_pt + i_test_im

        x_train = x[i_train, :]
        x_test = x[i_test, :]

        y_train = y_sorted[i_train, :]
        y_test = y_sorted[i_test, :]

        # Feature prediction
        pred_y, true_y = feature_prediction(x_train,
                                            y_train,
                                            x_test,
                                            y_test,
                                            n_voxel=num_voxel[roi],
                                            n_iter=n_iter)

        # Separate results for perception and imagery tests
        i_pt = i_test_pt[i_test]  # Index for perception test within test
        i_im = i_test_im[i_test]  # Index for imagery test within test

        pred_y_pt = pred_y[i_pt, :]
        pred_y_im = pred_y[i_im, :]

        true_y_pt = true_y[i_pt, :]
        true_y_im = true_y[i_im, :]

        # Get averaged predicted feature
        test_label_pt = labels[i_test_pt, :].flatten()
        test_label_im = labels[i_test_im, :].flatten()

        pred_y_pt_av, true_y_pt_av, test_label_set_pt \
            = get_averaged_feature(pred_y_pt, true_y_pt, test_label_pt)
        pred_y_im_av, true_y_im_av, test_label_set_im \
            = get_averaged_feature(pred_y_im, true_y_im, test_label_im)

        # Get category averaged features
        catlabels_pt = np.vstack([int(n) for n in test_label_pt
                                  ])  # Category labels (perception test)
        catlabels_im = np.vstack([int(n) for n in test_label_im
                                  ])  # Category labels (imagery test)
        catlabels_set_pt = np.unique(
            catlabels_pt)  # Category label set (perception test)
        catlabels_set_im = np.unique(
            catlabels_im)  # Category label set (imagery test)

        y_catlabels = data_feature.select(
            'CatID')  # Category labels in image features
        ind_catave = (data_feature.select('FeatureType') == 3).flatten()

        y_catave_pt = get_refdata(y[ind_catave, :], y_catlabels[ind_catave, :],
                                  catlabels_set_pt)
        y_catave_im = get_refdata(y[ind_catave, :], y_catlabels[ind_catave, :],
                                  catlabels_set_im)

        # Prepare result dataframe
        results = pd.DataFrame({
            'subject': [sbj, sbj],
            'roi': [roi, roi],
            'feature': [feat, feat],
            'test_type': ['perception', 'imagery'],
            'true_feature': [true_y_pt, true_y_im],
            'predicted_feature': [pred_y_pt, pred_y_im],
            'test_label': [test_label_pt, test_label_im],
            'test_label_set': [test_label_set_pt, test_label_set_im],
            'true_feature_averaged': [true_y_pt_av, true_y_im_av],
            'predicted_feature_averaged': [pred_y_pt_av, pred_y_im_av],
            'category_label_set': [catlabels_set_pt, catlabels_set_im],
            'category_feature_averaged': [y_catave_pt, y_catave_im]
        })

        # Save results
        makedir_ifnot(os.path.dirname(results_file))
        with open(results_file, 'wb') as f:
            pickle.dump(results, f)

        print('Saved %s' % results_file)

        dist.unlock()
def main():
    # Read settings ----------------------------------------------------

    # Brain data
    brain_dir = '/home/share/data/fmri_shared/datasets/Deeprecon/fmriprep'
    subjects_list = {'TH': 'TH_ImageNetTest_volume_native.h5'}

    rois_list = {
        'VC': 'ROI_VC = 1',
    }

    # Image features
    features_dir = '/home/ho/Documents/brain-decoding-examples/python/feature-prediction/data/features/ImageNetTest'
    network = 'caffe/VGG_ILSVRC_19_layers'
    features_list = [
        'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2',
        'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4',
        'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4', 'fc6', 'fc7', 'fc8'
    ][::-1]
    features_list = ['fc6', 'fc7', 'fc8'][::-1]
    target_subject = 'AM'

    Lambda = 0.1
    data_rep = 5

    # Model parameters
    gpu_device = 1

    # Results directory
    results_dir_root = './NCconverter_results'

    # Converter models
    nc_models_dir_root = os.path.join(results_dir_root,
                                      'pytorch_converter_training', 'model')
    selected_converter_type = 'conv5'

    # Misc settings
    analysis_basename = os.path.splitext(os.path.basename(__file__))[0]

    # Pretrained model metadata
    pre_results_dir_root = '/home/share/data/contents_shared/ImageNetTraining/derivatives/feature_decoders'
    pre_analysis_basename = 'deeprecon_fmriprep_rep5_500voxel_allunits_fastl2lir_alpha100'
    pre_models_dir_root = os.path.join(pre_results_dir_root,
                                       pre_analysis_basename)

    # Load data --------------------------------------------------------
    print('----------------------------------------')
    print('Loading data')

    data_brain = {
        sbj: bdpy.BData(os.path.join(brain_dir, dat_file))
        for sbj, dat_file in subjects_list.items()
    }
    data_features = Features(os.path.join(features_dir, network))

    # Initialize directories -------------------------------------------
    makedir_ifnot(results_dir_root)
    makedir_ifnot('tmp')

    # Analysis loop ----------------------------------------------------
    print('----------------------------------------')
    print('Analysis loop')

    for sbj, roi, feat in product(subjects_list, rois_list, features_list):
        print('--------------------')
        print('Subject:    %s' % sbj)
        print('ROI:        %s' % roi)

        # Distributed computation setup
        # -----------------------------
        subject_name = sbj + '2' + target_subject + '_' + str(
            data_rep * 20) + 'p' + '_lambda' + str(Lambda)
        analysis_id = analysis_basename + '-' + subject_name + '-' + roi + '-' + feat
        results_dir_prediction = os.path.join(results_dir_root,
                                              analysis_basename,
                                              'decoded_features', network,
                                              feat, subject_name, roi)
        results_dir_accuracy = os.path.join(results_dir_root,
                                            analysis_basename,
                                            'prediction_accuracy', network,
                                            feat, subject_name, roi)

        if os.path.exists(results_dir_prediction):
            print('%s is already done. Skipped.' % analysis_id)
            continue

        dist = DistComp(lockdir='tmp', comp_id=analysis_id)
        if dist.islocked_lock():
            print('%s is already running. Skipped.' % analysis_id)
            continue

        # Preparing data
        # --------------
        print('Preparing data')

        start_time = time()

        # Brain data
        x = data_brain[sbj].select(rois_list[roi])  # Brain data
        x_labels = data_brain[sbj].select(
            'image_index')  # Image labels in the brain data

        # Target features and image labels (file names)
        y = data_features.get_features(feat)
        y_labels = data_features.index
        image_names = data_features.labels

        # Get test data
        x_test = x
        x_test_labels = x_labels

        y_test = y
        y_test_labels = y_labels

        # Averaging brain data
        x_test_labels_unique = np.unique(x_test_labels)
        x_test_averaged = np.vstack([
            np.mean(x_test[(x_test_labels == lb).flatten(), :], axis=0)
            for lb in x_test_labels_unique
        ])

        print('Total elapsed time (data preparation): %f' %
              (time() - start_time))

        # Convert x_test_averaged
        nc_models_dir = os.path.join(nc_models_dir_root, subject_name, roi,
                                     'model')
        x_test_averaged = test_ncconverter(nc_models_dir, x_test_averaged,
                                           gpu_device)

        # Prediction
        # ----------
        print('Prediction')

        start_time = time()
        y_pred = test_fastl2lir_div(
            os.path.join(pre_models_dir_root, network, feat, target_subject,
                         roi, 'model'), x_test_averaged)
        print('Total elapsed time (prediction): %f' % (time() - start_time))

        # Calculate prediction accuracy
        # -----------------------------
        print('Prediction accuracy')

        start_time = time()

        y_pred_2d = y_pred.reshape([y_pred.shape[0], -1])
        y_true_2d = y.reshape([y.shape[0], -1])

        y_true_2d = get_refdata(y_true_2d, y_labels, x_test_labels_unique)

        n_units = y_true_2d.shape[1]

        accuracy = np.array([
            np.corrcoef(y_pred_2d[:, i].flatten(),
                        y_true_2d[:, i].flatten())[0, 1]
            for i in range(n_units)
        ])
        accuracy = accuracy.reshape((1, ) + y_pred.shape[1:])

        print('Mean prediction accuracy: {}'.format(np.mean(accuracy)))

        print('Total elapsed time (prediction accuracy): %f' %
              (time() - start_time))

        # Save results
        # ------------
        print('Saving results')

        makedir_ifnot(results_dir_prediction)
        makedir_ifnot(results_dir_accuracy)

        start_time = time()

        # Predicted features
        for i, lb in enumerate(x_test_labels_unique):
            # Predicted features
            feat = np.array([y_pred[i, ]
                             ])  # To make feat shape 1 x M x N x ...

            image_filename = image_names[
                int(lb) - 1]  # Image labels are one-based image indexes

            # Save file name
            save_file = os.path.join(results_dir_prediction,
                                     '%s.mat' % image_filename)

            # Save
            hdf5storage.savemat(save_file, {u'feat': feat},
                                format='7.3',
                                oned_as='column',
                                store_python_metadata=True)

        print('Saved %s' % results_dir_prediction)

        # Prediction accuracy
        save_file = os.path.join(results_dir_accuracy, 'accuracy.mat')
        hdf5storage.savemat(save_file, {u'accuracy': accuracy},
                            format='7.3',
                            oned_as='column',
                            store_python_metadata=True)
        print('Saved %s' % save_file)

        print('Elapsed time (saving results): %f' % (time() - start_time))

        dist.unlock()

    print('%s finished.' % analysis_basename)
    for feat in features:
        # f=open(dir_path+'/results/feature-decoding/texts/'+subject+'_'+roi+'_'+feat+'_'+'feature-decoding'+'.txt','w')
        y = image_features.select(feat)  # Image features
        if pca:
            from sklearn.decomposition import IncrementalPCA
            print('Shape of y before PCA:', y.shape)
            ipca = IncrementalPCA(n_components=20, batch_size=20)
            ipca.fit(y)
            y = ipca.transform(y)
            print('Shape of y after PCA:', y.shape)
        else:
            y = y[:, :100]  #take 100 features for time constraint

        y_label = image_features.select('ImageID')  # Image labels

        y_sorted = get_refdata(
            y, y_label, labels)  # Image features corresponding to brain data

        y_train = y_sorted[i_train, :]
        y_test = y_sorted[i_test, :]

        for roi in ['VC']:

            # Feature prediction
            pred_y, true_y = feature_prediction(subject, roi, y_train, y_test)

            i_pt = i_test_pt[i_test]  # Index for perception test within test
            i_im = i_test_im[i_test]  # Index for imagery test within test

            print(pred_y.shape)
            print(i_pt.shape)
Esempio n. 5
0
def main():
    # Settings ---------------------------------------------------------

    # Data settings
    subjects = config.subjects
    rois = config.rois
    num_voxel = config.num_voxel

    if CAFFEflag:
        if cboflag:
            image_feature1 = '/home/akpapadim/Desktop/RemoteThesis/cbof-kamitani/data/ImageFeatures_caffe_cbof.pkl'
        else:
            image_feature1 = '/home/akpapadim/Desktop/RemoteThesis/cbof-kamitani/data/ImageFeatures_caffe.pkl'

    image_feature = config.image_feature_file
    features = config.features
    results_dir = config.results_dir

    # Misc settings
    analysis_basename = os.path.basename(__file__)

    # Load data --------------------------------------------------------
    print('----------------------------------------')
    print('Loading data')

    data_all = {}
    for sbj in subjects:
        if len(subjects[sbj]) == 1:
            data_all[sbj] = bdpy.BData(subjects[sbj][0])
        else:
            # Concatenate data
            suc_cols = ['Run', 'Block']
            data_all[sbj] = concat_dataset(
                [bdpy.BData(f) for f in subjects[sbj]], successive=suc_cols)

    data_feature = bdpy.BData(image_feature)

    # check which features file to open
    if CAFFEflag:
        data_feature1 = pd.read_pickle(image_feature1)
        print('From file ', image_feature1)
    elif cboflag == False:
        print('From file ', image_feature)

    # Initialize directories -------------------------------------------
    makedir_ifnot(results_dir)
    makedir_ifnot('tmp')

    # Analysis loop ----------------------------------------------------
    print('----------------------------------------')
    print('Analysis loop')

    for sbj, roi, feat in product(subjects, rois, features):
        print('--------------------')
        print('Subject:    %s' % sbj)
        print('ROI:        %s' % roi)
        print('Num voxels: %d' % num_voxel[roi])
        print('Feature:    %s' % feat)

        # Distributed computation
        analysis_id = analysis_basename + '-' + sbj + '-' + roi + '-' + feat
        results_file = os.path.join(results_dir, analysis_id + '.pkl')

        if os.path.exists(results_file):
            print('%s is already done. Skipped.' % analysis_id)
            continue

        dist = DistComp(lockdir='tmp', comp_id=analysis_id)
        if dist.islocked():
            print('%s is already running. Skipped.' % analysis_id)
            continue

        dist.lock()

        # Prepare data
        print('Preparing data')
        dat = data_all[sbj]

        x = dat.select(rois[roi])  # Brain data
        datatype = dat.select('DataType')  # Data type
        labels = dat.select('Label')  # Image labels in brain data

        if CAFFEflag:
            yold = data_feature.select(feat)  # Image features

            y = data_feature1[feat]
            y_label = data_feature1['ImageID']

            if cboflag == False:
                y = np.concatenate(y).reshape(
                    y.shape[0], y[0].shape[0])  # reshape to 1250, 1000
                y_label = y_label.reshape(y.shape[0], 1)

        else:
            y = data_feature.select(feat)  # Image features
            y_label = data_feature.select('ImageID')  # Image labels

        y_sorted = get_refdata(
            y, y_label, labels)  # Image features corresponding to brain data

        # alternative sorting method is the same as get_refdata
        """
        object_map = {}
        for i in range(len(y)):
            key = y_label[i][0]
            object_map[key]= y[i]

        y_sorted2 = [object_map[id[0]] for id in labels]
        """

        # Get training and test dataset
        i_train = (datatype == 1).flatten()  # Index for training
        i_test_pt = (datatype == 2).flatten()  # Index for perception test
        i_test_im = (datatype == 3).flatten()  # Index for imagery test
        i_test = i_test_pt + i_test_im

        x_train = x[i_train, :]
        x_test = x[i_test, :]

        y_train = y_sorted[i_train, :]
        y_test = y_sorted[i_test, :]

        # Feature prediction
        pred_y, true_y = feature_prediction(x_train,
                                            y_train,
                                            x_test,
                                            y_test,
                                            modeloption=MODELOPTION)
        print('Model: ', MODELOPTION)

        #pred_y = true_y # suppose ideal regression

        # Separate results for perception and imagery tests
        i_pt = i_test_pt[i_test]  # Index for perception test within test
        i_im = i_test_im[i_test]  # Index for imagery test within test

        pred_y_pt = pred_y[i_pt, :]
        pred_y_im = pred_y[i_im, :]

        true_y_pt = true_y[i_pt, :]
        true_y_im = true_y[i_im, :]

        # Get averaged predicted feature
        test_label_pt = labels[i_test_pt, :].flatten()
        test_label_im = labels[i_test_im, :].flatten()

        pred_y_pt_av, true_y_pt_av, test_label_set_pt \
            = get_averaged_feature(pred_y_pt, true_y_pt, test_label_pt)
        pred_y_im_av, true_y_im_av, test_label_set_im \
            = get_averaged_feature(pred_y_im, true_y_im, test_label_im)

        # Get category averaged features
        catlabels_pt = np.vstack([int(n) for n in test_label_pt
                                  ])  # Category labels (perception test)
        catlabels_im = np.vstack([int(n) for n in test_label_im
                                  ])  # Category labels (imagery test)
        catlabels_set_pt = np.unique(
            catlabels_pt)  # Category label set (perception test)
        catlabels_set_im = np.unique(
            catlabels_im)  # Category label set (imagery test)

        if CAFFEflag:

            yold_catlabels = data_feature.select(
                'CatID')  # Category labels in image features
            ind_catave = (data_feature.select('FeatureType') == 3
                          ).flatten()  # boolean mask of featuretype

            y_catave_pt = get_refdata(yold[ind_catave, :],
                                      yold_catlabels[ind_catave, :],
                                      catlabels_set_pt)
            y_catave_im = get_refdata(yold[ind_catave, :],
                                      yold_catlabels[ind_catave, :],
                                      catlabels_set_im)

        else:
            y_catlabels = data_feature.select(
                'CatID')  # Category labels in image features
            ind_catave = (data_feature.select('FeatureType') == 3
                          ).flatten()  #boolean mask of featuretype

            y_catave_pt = get_refdata(y[ind_catave, :],
                                      y_catlabels[ind_catave, :],
                                      catlabels_set_pt)
            y_catave_im = get_refdata(y[ind_catave, :],
                                      y_catlabels[ind_catave, :],
                                      catlabels_set_im)

        # Prepare result dataframe
        results = pd.DataFrame({
            'subject': [sbj, sbj],
            'roi': [roi, roi],
            'feature': [feat, feat],
            'test_type': ['perception', 'imagery'],
            'true_feature': [true_y_pt, true_y_im],
            'predicted_feature': [pred_y_pt, pred_y_im],
            'test_label': [test_label_pt, test_label_im],
            'test_label_set': [test_label_set_pt, test_label_set_im],
            'true_feature_averaged': [true_y_pt_av, true_y_im_av],
            'predicted_feature_averaged': [pred_y_pt_av, pred_y_im_av],
            'category_label_set': [catlabels_set_pt, catlabels_set_im],
            'category_feature_averaged': [y_catave_pt, y_catave_im]
        })

        # Save results
        makedir_ifnot(os.path.dirname(results_file))
        with open(results_file, 'wb') as f:
            pickle.dump(results, f)

        print('Saved %s' % results_file)

        dist.unlock()