Esempio n. 1
0
def main():

    analysis_name = 'GenericObjectDecoding'
    resnet_reindex = config.resnet_reindex
    resnet_true_layers = config.resnet_true_layers

    alexnet_file = os.path.join('results-alexnet', analysis_name + '.pkl')
    resnet_file = os.path.join('results-resnet', analysis_name + '.pkl')
    output_file_featpred = os.path.join('results', config.analysis_name + '_featureprediction.pdf')

    # Load results -----------------------------------------------------
    with open(alexnet_file, 'rb') as f:
        print('Loading %s' % alexnet_file)
        alexnet_results = pickle.load(f)

    with open(resnet_file, 'rb') as f:
        print('Loading %s' % resnet_file)
        resnet_results = pickle.load(f)

    # Figure settings
    plt.rcParams['font.size'] = 7

    # Plot (feature prediction) ----------------------------------------
    fig, axes = plt.subplots(4,2,figsize=(8,9))
    num_plots = range(8)

    # Image
    plotresults(fig, axes, alexnet_results, resnet_results, num_plots)

    # Save the figure
    makedir_ifnot('results')
    plt.savefig(output_file_featpred, dpi=300)
    print('Saved %s' % output_file_featpred)

    plt.show()
Esempio n. 2
0
    def __save_model(self, output_files):
        if self.save_format == 'pickle':
            save_file = 'hoge.pkl'
            if len(output_files) != 1:
                raise RuntimeError('Invalid output file(s)')
            save_file = output_files[0]['file_path']

            makedir_ifnot(os.path.dirname(save_file))
            with open(save_file, 'wb') as f:
                pickle.dump(self.model, f, protocol=2)
            if self.verbose >= 1: print('Saved %s' % save_file)
        elif self.save_format == 'bdmodel':
            if not self.model.__class__.__name__ == 'FastL2LiR':
                raise NotImplementedError(
                    'BD model current supports only FastL2LiR models.')

            for s in output_files:
                makedir_ifnot(os.path.dirname(s['file_path']))
                save_array(s['file_path'],
                           getattr(self.model, s['src']),
                           key=s['dst'],
                           dtype=self.dtype,
                           sparse=s['sparse'])
                if self.verbose >= 1: print('Saved %s' % s['file_path'])
        else:
            raise ValueError('Unsupported output format: %s' %
                             self.save_format)
        return None
Esempio n. 3
0
# Main #######################################################################

analysis_basename = os.path.splitext(os.path.basename(__file__))[0]

# Load data --------------------------------------------------------
print('----------------------------------------')
print('Loading data')

data_brain = {
    sbj: bdpy.BData(os.path.join(brain_dir, dat_file))
    for sbj, dat_file in subjects_list.items()
}
data_features = Features(os.path.join(features_dir, network))

# Initialize directories -------------------------------------------
makedir_ifnot(results_dir_root)
makedir_ifnot(os.path.join(results_dir_root, 'decoded_features', network))
makedir_ifnot(os.path.join(results_dir_root, 'prediction_accuracy', network))
makedir_ifnot('tmp')

# Save runtime information -----------------------------------------
runtime_params = {
    'fMRI data': [
        os.path.abspath(os.path.join(brain_dir, v))
        for v in subjects_list.values()
    ],
    'ROIs':
    rois_list.keys(),
    'feature_decoders':
    os.path.abspath(models_dir_root),
    'target DNN':
Esempio n. 4
0
from sklearn.metrics import accuracy_score, mean_squared_error
# from sklearn import svm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
from itertools import product

data_dir = '/home/share/data/fmri_shared/eyetracker/YS210108/bdata'
os.listdir(data_dir)
results_dir = '/home/yqsong/Documents/eye_movement/GOD_eyetracking/results'

# Setups

analysis_basename = 'eye_position_Linreg_GOD.py'
makedir_ifnot(os.path.join(results_dir, analysis_basename))

print('----------------------------------------')
print('Loading data')

sbj = 'YS210108'
method = "MRI-based"

cond_type = 5
dir_type = 0

dat_name = 'YS210108_GODeyetracking_eyetracking_volume_native_prep.h5'
# data_all = {}
# for sbj in subjects:
#     data_all[sbj] = bdpy.BData(os.path.join(data_dir, subjects[sbj][0]))
data_train = bdpy.BData(os.path.join(data_dir, dat_name))
print('Target features: %s' % network)
print('Layers:          %s' % features_list)
print('')

# Load data ------------------------------------------------------------------
print('----------------------------------------')
print('Loading data')

data_brain = {
    sbj: bdpy.BData(os.path.join(brain_dir, dat_file))
    for sbj, dat_file in subjects_list.items()
}
data_features = Features(os.path.join(features_dir, network))

# Initialize directories -----------------------------------------------------
makedir_ifnot(results_dir_root)
makedir_ifnot(os.path.join(results_dir_root, network))
makedir_ifnot('tmp')

# Save runtime information ---------------------------------------------------
info_dir = os.path.join(results_dir_root, network)
runtime_params = {
    'learning method':
    'PyFastL2LiR',
    'regularization parameter':
    alpha,
    'fMRI data': [
        os.path.abspath(os.path.join(brain_dir, v))
        for v in subjects_list.values()
    ],
    'ROIs':
# Main #######################################################################

analysis_basename = os.path.splitext(os.path.basename(__file__))[0]

# Load data --------------------------------------------------------
print('----------------------------------------')
print('Loading data')

data_brain = {
    sbj: bdpy.BData(os.path.join(brain_dir, dat_file))
    for sbj, dat_file in subjects_list.items()
}
data_features = Features(os.path.join(features_dir, network))

# Initialize directories -------------------------------------------
makedir_ifnot(os.path.join(results_dir_decoded_features_root, network))
makedir_ifnot(os.path.join(results_dir_decoding_accuracy_root, network))
makedir_ifnot('tmp')

# Save runtime information -----------------------------------------
runtime_params = {
    'fMRI data': [
        os.path.abspath(os.path.join(brain_dir, v))
        for v in subjects_list.values()
    ],
    'ROIs':
    rois_list.keys(),
    'feature_decoders':
    os.path.abspath(models_dir_root),
    'target DNN':
    network,
def main():
    # Data settings ----------------------------------------------------

    # Brain data
    brain_dir = '/home/share/data/fmri_shared/datasets/Deeprecon/fmriprep'
    subjects_list = {'TH': 'TH_ImageNetTraining_volume_native.h5'}
    target_data = {'AM': 'AM_ImageNetTraining_volume_native.h5'}

    rois_list = {
        'LH': 'VertexLeft',
    }

    # Assume the pretrained model is trained with TH data.
    # We want to train NCconverter using ES data.
    target_subject = 'AM'

    # Model parameters
    data_rep = 5

    # Model parameters
    lr_rate = 0.01
    epoch = 200
    batch = 1
    gpu_device = 0

    # Results directory
    results_dir_root = './NCconverter_results'

    # geometry dir
    geometry_dir = './surf'

    analysis_basename = os.path.splitext(os.path.basename(__file__))[0]

    # Load data --------------------------------------------------------
    print('----------------------------------------')
    print('Loading data')

    data_brain = {
        sbj: bdpy.BData(os.path.join(brain_dir, dat_file))
        for sbj, dat_file in subjects_list.items()
    }

    # Initialize directories -------------------------------------------
    makedir_ifnot(results_dir_root)
    makedir_ifnot('tmp')

    # Analysis loop ----------------------------------------------------
    print('----------------------------------------')
    print('Analysis loop')

    for sbj, roi in product(subjects_list, rois_list):
        print('--------------------')
        print('Subject:    %s' % sbj)
        print('Target subject: %s' % target_subject)
        print('ROI:        %s' % roi)

        # Setup
        # -----
        subject_name = sbj + '2' + target_subject + '_' + str(
            data_rep * 20) + 'p'
        analysis_id = analysis_basename + '-' + subject_name + '-' + roi
        results_dir = os.path.join(results_dir_root, analysis_basename,
                                   'model', subject_name, roi, 'model')
        makedir_ifnot(results_dir)

        # Check whether the analysis has been done or not.
        check_file = os.path.join(results_dir, analysis_id + '.done')
        if os.path.exists(check_file):
            print('%s is already done and skipped' % analysis_id)
            continue

        # Preparing data
        # --------------
        print('Preparing data')

        start_time = time()

        # geometry data
        geofiles = (os.path.join(geometry_dir, '{}.white'.format(roi.lower())),
                    os.path.join(geometry_dir, '{}.pial'.format(roi.lower())))
        mesh = MeshData(geofiles)
        edges, pseudo = mesh.edge_pseudo()

        # Brain data
        x = data_brain[sbj].select(rois_list[roi])  # Brain data
        x_labels = data_brain[sbj].select(
            'image_index')  # Image labels in the brain data

        target_brain_data = bdpy.BData(
            os.path.join(brain_dir, target_data[target_subject]))
        y = target_brain_data.select(rois_list[roi])
        y_labels = target_brain_data.select('image_index')

        # Get training data
        x_train = x
        x_train_labels = x_labels

        y_train = y
        y_train_labels = y_labels
        del x, y, x_labels, y_labels

        print('Total elapsed time (data preparation): %f' %
              (time() - start_time))

        # Model training
        # --------------
        print('Model training')
        start_time = time()
        train_NCconverter(x_train,
                          y_train,
                          x_train_labels,
                          y_train_labels,
                          edges,
                          pseudo,
                          lr_rate=lr_rate,
                          batch=batch,
                          output=results_dir,
                          save_chunk=True,
                          axis_chunk=1,
                          tmp_dir='tmp',
                          comp_id=analysis_id,
                          gpu_device=gpu_device,
                          epoch=epoch)
        print('Total elapsed time (model training): %f' %
              (time() - start_time))

    print('%s finished.' % analysis_basename)
def train_NCconverter(x,
                      y,
                      x_labels,
                      y_labels,
                      edges,
                      pseudo,
                      lr_rate=0.01,
                      batch=64,
                      output='./NCconverter_results.mat',
                      save_chunk=False,
                      axis_chunk=1,
                      tmp_dir='./tmp',
                      comp_id=None,
                      gpu_device=0,
                      epoch=500):

    makedir_ifnot(output)
    makedir_ifnot(tmp_dir)

    if y.ndim == 4:
        # The Y input to the NCconveter has to be strictly number of samples x number of features
        y = y.reshape((y.shape[0], -1))
    elif y.ndim == 2:
        pass
    else:
        raise ValueError('Unsupported feature array shape')

    # Preprocessing ----------------------------------------------------------
    print('Preprocessing')
    start_time = time()

    # Normalize X (fMRI data)
    x_mean = np.mean(
        x,
        axis=0)[np.newaxis, :]  # np.newaxis was added to match Matlab outputs
    x_norm = np.std(x, axis=0, ddof=1)[np.newaxis, :]
    x_normalized = (x - x_mean) / x_norm

    # Normalize Y (DNN features)
    y_mean = np.mean(y, axis=0)[np.newaxis, :]
    y_norm = np.std(y, axis=0, ddof=1)[np.newaxis, :]
    y_normalized = (y - y_mean) / y_norm

    print('Elapsed time: %f' % (time() - start_time))

    # Model training loop ----------------------------------------------------

    comp_id_t = comp_id + 'NCconverter'
    results_dir = os.path.join(output)
    result_model = os.path.join(results_dir, 'NCconverter.pt')

    makedir_ifnot(results_dir)

    if os.path.exists(result_model):
        print('%s already exists and skipped' % result_model)
        return

    dist = DistComp(lockdir=tmp_dir, comp_id=comp_id_t)
    if dist.islocked():
        print('%s is already running. Skipped.' % comp_id_t)
        return

    dist.lock()

    start_time = time()

    print('Training')

    # add bias term in X
    #x_normalized = np.concatenate([x_normalized, np.ones((x_normalized.shape[0],1))], axis=1)

    # Align Y to X labels
    x_index = np.argsort(x_labels.flatten())
    x_labels_aligned = x_labels[x_index]

    y_index = np.argsort(y_labels.flatten())
    y_labels_aligned = y_labels[y_index]

    #y_index = np.array([np.where(y_labels == xl)[0] for xl in x_labels]).flatten()
    #y_aligned = y_normalized[y_index, :]
    #y_labels_aligned = y_labels[y_index]
    x_aligned = x_normalized[x_index, :]
    y_aligned = y_normalized[y_index, :]
    print(x_labels_aligned[:20])
    print(y_labels_aligned[:20])
    # np.random.seed(88)
    # x_aligned = np.random.permutation(x_aligned)
    # np.random.seed(88)
    # y_aligned = np.random.permutation(y_aligned)

    # Data
    graph = GraphData(x_aligned, y_aligned, edges, pseudo)
    graph_dat_list = graph.data

    # Model training
    #torch.cuda.set_device(gpu_device)
    #model = NCconverter_torch(x_aligned.shape[1], y_aligned.shape[1])
    model = train(graph_dat_list, lr_rate=lr_rate, epoch=epoch, batch=batch)

    # Save chunk results
    torch.save(model, result_model)
    print('Saved %s' % result_model)

    del (y_aligned)
    etime = time() - start_time
    print('Elapsed time: %f' % etime)
    dist.unlock()

    del (x_normalized)

    # Save results -----------------------------------------------------------

    print('Saving normalization parameters.')
    norm_param = {
        'x_mean': x_mean,
        'y_mean': y_mean,
        'x_norm': x_norm,
        'y_norm': y_norm
    }
    save_targets = [u'x_mean', u'y_mean', u'x_norm', u'y_norm']
    for sv in save_targets:
        save_file = os.path.join(results_dir, sv + '.mat')
        if not os.path.exists(save_file):
            hdf5storage.savemat(save_file, {sv: norm_param[sv]},
                                format='7.3',
                                oned_as='column',
                                store_python_metadata=True)
            print('Saved %s' % save_file)

    if not save_chunk:
        # Merge results into 'model'mat'
        raise NotImplementedError('Result merging is not implemented yet.')

    return None
Esempio n. 9
0
def main():
    # Settings ---------------------------------------------------------

    # Data settings
    subjects = config.subjects
    rois = config.rois
    num_voxel = config.num_voxel

    image_feature = config.image_feature_file
    features = config.features

    n_iter = 200

    results_dir = config.results_dir

    # Misc settings
    analysis_basename = os.path.basename(__file__)

    # Load data --------------------------------------------------------
    print('----------------------------------------')
    print('Loading data')

    data_all = {}
    for sbj in subjects:
        if len(subjects[sbj]) == 1:
            data_all[sbj] = bdpy.BData(subjects[sbj][0])
        else:
            # Concatenate data
            suc_cols = ['Run', 'Block']
            data_all[sbj] = concat_dataset(
                [bdpy.BData(f) for f in subjects[sbj]], successive=suc_cols)

    data_feature = bdpy.BData(image_feature)

    # Add any additional processing to data here

    # Initialize directories -------------------------------------------
    makedir_ifnot(results_dir)
    makedir_ifnot('tmp')

    # Analysis loop ----------------------------------------------------
    print('----------------------------------------')
    print('Analysis loop')

    for sbj, roi, feat in product(subjects, rois, features):
        print('--------------------')
        print('Subject:    %s' % sbj)
        print('ROI:        %s' % roi)
        print('Num voxels: %d' % num_voxel[roi])
        print('Feature:    %s' % feat)

        # Distributed computation
        analysis_id = analysis_basename + '-' + sbj + '-' + roi + '-' + feat
        results_file = os.path.join(results_dir, analysis_id + '.pkl')

        if os.path.exists(results_file):
            print('%s is already done. Skipped.' % analysis_id)
            continue

        dist = DistComp(lockdir='tmp', comp_id=analysis_id)
        if dist.islocked():
            print('%s is already running. Skipped.' % analysis_id)
            continue

        dist.lock()

        # Prepare data
        print('Preparing data')
        dat = data_all[sbj]

        x = dat.select(rois[roi])  # Brain data
        datatype = dat.select('DataType')  # Data type
        labels = dat.select('stimulus_id')  # Image labels in brain data

        y = data_feature.select(feat)  # Image features
        y_label = data_feature.select('ImageID')  # Image labels

        # For quick demo, reduce the number of units from 1000 to 100
        y = y[:, :100]

        y_sorted = get_refdata(
            y, y_label, labels)  # Image features corresponding to brain data

        # Get training and test dataset
        i_train = (datatype == 1).flatten()  # Index for training
        i_test_pt = (datatype == 2).flatten()  # Index for perception test
        i_test_im = (datatype == 3).flatten()  # Index for imagery test
        i_test = i_test_pt + i_test_im

        x_train = x[i_train, :]
        x_test = x[i_test, :]

        y_train = y_sorted[i_train, :]
        y_test = y_sorted[i_test, :]

        # Feature prediction
        pred_y, true_y = feature_prediction(x_train,
                                            y_train,
                                            x_test,
                                            y_test,
                                            n_voxel=num_voxel[roi],
                                            n_iter=n_iter)

        # Separate results for perception and imagery tests
        i_pt = i_test_pt[i_test]  # Index for perception test within test
        i_im = i_test_im[i_test]  # Index for imagery test within test

        pred_y_pt = pred_y[i_pt, :]
        pred_y_im = pred_y[i_im, :]

        true_y_pt = true_y[i_pt, :]
        true_y_im = true_y[i_im, :]

        # Get averaged predicted feature
        test_label_pt = labels[i_test_pt, :].flatten()
        test_label_im = labels[i_test_im, :].flatten()

        pred_y_pt_av, true_y_pt_av, test_label_set_pt \
            = get_averaged_feature(pred_y_pt, true_y_pt, test_label_pt)
        pred_y_im_av, true_y_im_av, test_label_set_im \
            = get_averaged_feature(pred_y_im, true_y_im, test_label_im)

        # Get category averaged features
        catlabels_pt = np.vstack([int(n) for n in test_label_pt
                                  ])  # Category labels (perception test)
        catlabels_im = np.vstack([int(n) for n in test_label_im
                                  ])  # Category labels (imagery test)
        catlabels_set_pt = np.unique(
            catlabels_pt)  # Category label set (perception test)
        catlabels_set_im = np.unique(
            catlabels_im)  # Category label set (imagery test)

        y_catlabels = data_feature.select(
            'CatID')  # Category labels in image features
        ind_catave = (data_feature.select('FeatureType') == 3).flatten()

        y_catave_pt = get_refdata(y[ind_catave, :], y_catlabels[ind_catave, :],
                                  catlabels_set_pt)
        y_catave_im = get_refdata(y[ind_catave, :], y_catlabels[ind_catave, :],
                                  catlabels_set_im)

        # Prepare result dataframe
        results = pd.DataFrame({
            'subject': [sbj, sbj],
            'roi': [roi, roi],
            'feature': [feat, feat],
            'test_type': ['perception', 'imagery'],
            'true_feature': [true_y_pt, true_y_im],
            'predicted_feature': [pred_y_pt, pred_y_im],
            'test_label': [test_label_pt, test_label_im],
            'test_label_set': [test_label_set_pt, test_label_set_im],
            'true_feature_averaged': [true_y_pt_av, true_y_im_av],
            'predicted_feature_averaged': [pred_y_pt_av, pred_y_im_av],
            'category_label_set': [catlabels_set_pt, catlabels_set_im],
            'category_feature_averaged': [y_catave_pt, y_catave_im]
        })

        # Save results
        makedir_ifnot(os.path.dirname(results_file))
        with open(results_file, 'wb') as f:
            pickle.dump(results, f)

        print('Saved %s' % results_file)

        dist.unlock()
                'feature': [feat, feat],
                'test_type': ['perception', 'imagery'],
                'true_feature': [true_y_pt, true_y_im],
                'predicted_feature': [pred_y_pt, pred_y_im],
                'test_label': [test_label_pt, test_label_im],
                'test_label_set': [test_label_set_pt, test_label_set_im],
                'true_feature_averaged': [true_y_pt_av, true_y_im_av],
                'predicted_feature_averaged': [pred_y_pt_av, pred_y_im_av],
                'category_label_set': [catlabels_set_pt, catlabels_set_im],
                'category_feature_averaged': [y_catave_pt, y_catave_im]
            })

            # print('catlabels_set_pt size',catlabels_set_pt.shape)
            # print('catlabels_set_im size',catlabels_set_im.shape)
            # print('true_y_pt_av size',true_y_pt_av.shape)
            # print('true_y_im_av size',true_y_im_av.shape)
            # print('pred_y_pt_av size',pred_y_pt_av.shape)
            # print('pred_y_im_av size',pred_y_im_av.shape)
            # print('y_catave_pt size',y_catave_pt.shape)
            # print('y_catave_im size',y_catave_im.shape)
            if pca:
                res = dir_path + '/results/feature-decoding-pca/' + subject + '_' + roi + '_' + feat + '_' + 'decode_results.pkl'
            else:
                res = dir_path + '/results/feature-decoding/' + subject + '_' + roi + '_' + feat + '_' + 'decode_results.pkl'
            makedir_ifnot(os.path.dirname(res))

            with open(res, 'wb') as f:
                pickle.dump(results, f)

            print('Saved %s' % res)
Esempio n. 11
0
    def run(self):
        '''Run training.'''

        if self.dtype is not None:
            self.X = self.X.astype(self.dtype)
            self.Y = self.Y.astype(self.dtype)

        # Chunking
        if self.chunk_axis is None:
            self.__chunking = False
        elif self.Y.ndim == 2:
            self.__chunking = False
        else:
            self.__chunking = True

        if self.__chunking:
            chunk_index = range(self.Y.shape[self.chunk_axis])
        else:
            chunk_index = [None]

        # Distributed computation setup
        if self.distcomp is None:
            dist_db_path = os.path.join(os.path.dirname(self.save_path),
                                        self.id + '.db')
            makedir_ifnot(os.path.dirname(dist_db_path))
            distcomp = DistComp(backend='sqlite3', db_path=dist_db_path)
        else:
            distcomp = self.distcomp

        # X normalization
        if not self.X_normalize is None:
            print('Normalizing X')
            self.X = (self.X -
                      self.X_normalize['mean']) / self.X_normalize['std']
            self.X[np.isinf(self.X)] = 0

        # Model training loop
        time_elapsed = []
        output_files_all = []

        for i, i_chunk in enumerate(chunk_index):
            loop_start_time = time()

            if self.id is None:
                training_id_chunk = 'chunk%08d' % i
            else:
                training_id_chunk = '%s-chunk%08d' % (self.id, i)

            # Output file setting
            output_files = self.__output_file(chunk=i)
            output_files_all.extend(output_files)

            # Check chunk results
            if self.__is_done(output_files):
                if self.verbose >= 1:
                    print('%s is already done. Skipped.' % training_id_chunk)
                continue

            # Parallel computation setup
            # DistComp.lock() returns True if the computation is not locked and successfully locked.
            if not distcomp.lock(training_id_chunk):
                if self.verbose >= 1:
                    print('%s is already running. Skipped.' %
                          training_id_chunk)
                continue

            if self.__chunking:
                Y = np.take(self.Y, [i_chunk], axis=self.chunk_axis)
            else:
                Y = self.Y

            # Y preprocessing
            if not self.Y_normalize is None:
                print('Normalizing Y')
                if self.__chunking:
                    y_mean = np.take(self.Y_normalize['mean'], [i_chunk],
                                     axis=self.chunk_axis)
                    y_norm = np.take(self.Y_normalize['std'], [i_chunk],
                                     axis=self.chunk_axis)
                else:
                    y_mean = self.Y_normalize['mean']
                    y_norm = self.Y_normalize['std']
                Y = (Y - y_mean) / y_norm
                Y[np.isinf(Y)] = 0

            if not self.Y_sort is None:
                print('Sorting Y')
                Y = Y[self.Y_sort['index'], :]

            # Training
            if self.verbose >= 1: print('Training: %s' % training_id_chunk)
            self.model.fit(self.X, Y, **self.model_parameters)

            # Save models
            self.__save_model(output_files)

            etime = time() - loop_start_time
            time_elapsed.append(etime)
            if self.verbose >= 1: print('Elapsed time: %f' % etime)

            distcomp.unlock(training_id_chunk)

            if len(chunk_index) > 1:
                etime_ave = np.mean(time_elapsed)
                est_time_left = etime_ave * (len(chunk_index) - (i + 1))
                est_time_end = time() + est_time_left
                print('')
                print('Average computation time/chunk: %f s' % etime_ave)
                print('Estimated remaining time:       %f s' % est_time_left)
                print('Estimated computation end time: %s' %
                      datetime.fromtimestamp(est_time_end).strftime(
                          '%Y-%m-%d %H:%M:%S'))
                print('')

        # Check outputs and add information
        if self.__is_done(output_files_all):
            if os.path.isdir(self.save_path):
                info_file = os.path.join(self.save_path, 'info.yaml')

                if os.path.exists(info_file):
                    while True:
                        with open(info_file, 'r') as f:
                            info = yaml.load(f)
                        if info is None:
                            print('Failed to load info from %s. Retrying ...' %
                                  info_file)
                            sleep(1)
                        else:
                            print('Loaded info from %s' % info_file)
                            break
                else:
                    info = {}

                if not '_status' in info:
                    info.update({'_status': {}})

                info['_status'].update({
                    'computation_id': self.id,
                    'computation_status': 'done'
                })

                with open(info_file, 'w') as f:
                    f.write(yaml.dump(info, default_flow_style=False))

        return self.model
def main():
    # Read settings ----------------------------------------------------

    # Brain data
    brain_dir = '/home/share/data/fmri_shared/datasets/Deeprecon/fmriprep'
    subjects_list = {'TH': 'TH_ImageNetTest_volume_native.h5'}

    rois_list = {
        'VC': 'ROI_VC = 1',
    }

    # Image features
    features_dir = '/home/ho/Documents/brain-decoding-examples/python/feature-prediction/data/features/ImageNetTest'
    network = 'caffe/VGG_ILSVRC_19_layers'
    features_list = [
        'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2',
        'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4',
        'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4', 'fc6', 'fc7', 'fc8'
    ][::-1]
    features_list = ['fc6', 'fc7', 'fc8'][::-1]
    target_subject = 'AM'

    Lambda = 0.1
    data_rep = 5

    # Model parameters
    gpu_device = 1

    # Results directory
    results_dir_root = './NCconverter_results'

    # Converter models
    nc_models_dir_root = os.path.join(results_dir_root,
                                      'pytorch_converter_training', 'model')
    selected_converter_type = 'conv5'

    # Misc settings
    analysis_basename = os.path.splitext(os.path.basename(__file__))[0]

    # Pretrained model metadata
    pre_results_dir_root = '/home/share/data/contents_shared/ImageNetTraining/derivatives/feature_decoders'
    pre_analysis_basename = 'deeprecon_fmriprep_rep5_500voxel_allunits_fastl2lir_alpha100'
    pre_models_dir_root = os.path.join(pre_results_dir_root,
                                       pre_analysis_basename)

    # Load data --------------------------------------------------------
    print('----------------------------------------')
    print('Loading data')

    data_brain = {
        sbj: bdpy.BData(os.path.join(brain_dir, dat_file))
        for sbj, dat_file in subjects_list.items()
    }
    data_features = Features(os.path.join(features_dir, network))

    # Initialize directories -------------------------------------------
    makedir_ifnot(results_dir_root)
    makedir_ifnot('tmp')

    # Analysis loop ----------------------------------------------------
    print('----------------------------------------')
    print('Analysis loop')

    for sbj, roi, feat in product(subjects_list, rois_list, features_list):
        print('--------------------')
        print('Subject:    %s' % sbj)
        print('ROI:        %s' % roi)

        # Distributed computation setup
        # -----------------------------
        subject_name = sbj + '2' + target_subject + '_' + str(
            data_rep * 20) + 'p' + '_lambda' + str(Lambda)
        analysis_id = analysis_basename + '-' + subject_name + '-' + roi + '-' + feat
        results_dir_prediction = os.path.join(results_dir_root,
                                              analysis_basename,
                                              'decoded_features', network,
                                              feat, subject_name, roi)
        results_dir_accuracy = os.path.join(results_dir_root,
                                            analysis_basename,
                                            'prediction_accuracy', network,
                                            feat, subject_name, roi)

        if os.path.exists(results_dir_prediction):
            print('%s is already done. Skipped.' % analysis_id)
            continue

        dist = DistComp(lockdir='tmp', comp_id=analysis_id)
        if dist.islocked_lock():
            print('%s is already running. Skipped.' % analysis_id)
            continue

        # Preparing data
        # --------------
        print('Preparing data')

        start_time = time()

        # Brain data
        x = data_brain[sbj].select(rois_list[roi])  # Brain data
        x_labels = data_brain[sbj].select(
            'image_index')  # Image labels in the brain data

        # Target features and image labels (file names)
        y = data_features.get_features(feat)
        y_labels = data_features.index
        image_names = data_features.labels

        # Get test data
        x_test = x
        x_test_labels = x_labels

        y_test = y
        y_test_labels = y_labels

        # Averaging brain data
        x_test_labels_unique = np.unique(x_test_labels)
        x_test_averaged = np.vstack([
            np.mean(x_test[(x_test_labels == lb).flatten(), :], axis=0)
            for lb in x_test_labels_unique
        ])

        print('Total elapsed time (data preparation): %f' %
              (time() - start_time))

        # Convert x_test_averaged
        nc_models_dir = os.path.join(nc_models_dir_root, subject_name, roi,
                                     'model')
        x_test_averaged = test_ncconverter(nc_models_dir, x_test_averaged,
                                           gpu_device)

        # Prediction
        # ----------
        print('Prediction')

        start_time = time()
        y_pred = test_fastl2lir_div(
            os.path.join(pre_models_dir_root, network, feat, target_subject,
                         roi, 'model'), x_test_averaged)
        print('Total elapsed time (prediction): %f' % (time() - start_time))

        # Calculate prediction accuracy
        # -----------------------------
        print('Prediction accuracy')

        start_time = time()

        y_pred_2d = y_pred.reshape([y_pred.shape[0], -1])
        y_true_2d = y.reshape([y.shape[0], -1])

        y_true_2d = get_refdata(y_true_2d, y_labels, x_test_labels_unique)

        n_units = y_true_2d.shape[1]

        accuracy = np.array([
            np.corrcoef(y_pred_2d[:, i].flatten(),
                        y_true_2d[:, i].flatten())[0, 1]
            for i in range(n_units)
        ])
        accuracy = accuracy.reshape((1, ) + y_pred.shape[1:])

        print('Mean prediction accuracy: {}'.format(np.mean(accuracy)))

        print('Total elapsed time (prediction accuracy): %f' %
              (time() - start_time))

        # Save results
        # ------------
        print('Saving results')

        makedir_ifnot(results_dir_prediction)
        makedir_ifnot(results_dir_accuracy)

        start_time = time()

        # Predicted features
        for i, lb in enumerate(x_test_labels_unique):
            # Predicted features
            feat = np.array([y_pred[i, ]
                             ])  # To make feat shape 1 x M x N x ...

            image_filename = image_names[
                int(lb) - 1]  # Image labels are one-based image indexes

            # Save file name
            save_file = os.path.join(results_dir_prediction,
                                     '%s.mat' % image_filename)

            # Save
            hdf5storage.savemat(save_file, {u'feat': feat},
                                format='7.3',
                                oned_as='column',
                                store_python_metadata=True)

        print('Saved %s' % results_dir_prediction)

        # Prediction accuracy
        save_file = os.path.join(results_dir_accuracy, 'accuracy.mat')
        hdf5storage.savemat(save_file, {u'accuracy': accuracy},
                            format='7.3',
                            oned_as='column',
                            store_python_metadata=True)
        print('Saved %s' % save_file)

        print('Elapsed time (saving results): %f' % (time() - start_time))

        dist.unlock()

    print('%s finished.' % analysis_basename)
Esempio n. 13
0
def main():
    # Settings ---------------------------------------------------------

    # Data settings
    subjects = config.subjects
    rois = config.rois
    num_voxel = config.num_voxel

    if CAFFEflag:
        if cboflag:
            image_feature1 = '/home/akpapadim/Desktop/RemoteThesis/cbof-kamitani/data/ImageFeatures_caffe_cbof.pkl'
        else:
            image_feature1 = '/home/akpapadim/Desktop/RemoteThesis/cbof-kamitani/data/ImageFeatures_caffe.pkl'

    image_feature = config.image_feature_file
    features = config.features
    results_dir = config.results_dir

    # Misc settings
    analysis_basename = os.path.basename(__file__)

    # Load data --------------------------------------------------------
    print('----------------------------------------')
    print('Loading data')

    data_all = {}
    for sbj in subjects:
        if len(subjects[sbj]) == 1:
            data_all[sbj] = bdpy.BData(subjects[sbj][0])
        else:
            # Concatenate data
            suc_cols = ['Run', 'Block']
            data_all[sbj] = concat_dataset(
                [bdpy.BData(f) for f in subjects[sbj]], successive=suc_cols)

    data_feature = bdpy.BData(image_feature)

    # check which features file to open
    if CAFFEflag:
        data_feature1 = pd.read_pickle(image_feature1)
        print('From file ', image_feature1)
    elif cboflag == False:
        print('From file ', image_feature)

    # Initialize directories -------------------------------------------
    makedir_ifnot(results_dir)
    makedir_ifnot('tmp')

    # Analysis loop ----------------------------------------------------
    print('----------------------------------------')
    print('Analysis loop')

    for sbj, roi, feat in product(subjects, rois, features):
        print('--------------------')
        print('Subject:    %s' % sbj)
        print('ROI:        %s' % roi)
        print('Num voxels: %d' % num_voxel[roi])
        print('Feature:    %s' % feat)

        # Distributed computation
        analysis_id = analysis_basename + '-' + sbj + '-' + roi + '-' + feat
        results_file = os.path.join(results_dir, analysis_id + '.pkl')

        if os.path.exists(results_file):
            print('%s is already done. Skipped.' % analysis_id)
            continue

        dist = DistComp(lockdir='tmp', comp_id=analysis_id)
        if dist.islocked():
            print('%s is already running. Skipped.' % analysis_id)
            continue

        dist.lock()

        # Prepare data
        print('Preparing data')
        dat = data_all[sbj]

        x = dat.select(rois[roi])  # Brain data
        datatype = dat.select('DataType')  # Data type
        labels = dat.select('Label')  # Image labels in brain data

        if CAFFEflag:
            yold = data_feature.select(feat)  # Image features

            y = data_feature1[feat]
            y_label = data_feature1['ImageID']

            if cboflag == False:
                y = np.concatenate(y).reshape(
                    y.shape[0], y[0].shape[0])  # reshape to 1250, 1000
                y_label = y_label.reshape(y.shape[0], 1)

        else:
            y = data_feature.select(feat)  # Image features
            y_label = data_feature.select('ImageID')  # Image labels

        y_sorted = get_refdata(
            y, y_label, labels)  # Image features corresponding to brain data

        # alternative sorting method is the same as get_refdata
        """
        object_map = {}
        for i in range(len(y)):
            key = y_label[i][0]
            object_map[key]= y[i]

        y_sorted2 = [object_map[id[0]] for id in labels]
        """

        # Get training and test dataset
        i_train = (datatype == 1).flatten()  # Index for training
        i_test_pt = (datatype == 2).flatten()  # Index for perception test
        i_test_im = (datatype == 3).flatten()  # Index for imagery test
        i_test = i_test_pt + i_test_im

        x_train = x[i_train, :]
        x_test = x[i_test, :]

        y_train = y_sorted[i_train, :]
        y_test = y_sorted[i_test, :]

        # Feature prediction
        pred_y, true_y = feature_prediction(x_train,
                                            y_train,
                                            x_test,
                                            y_test,
                                            modeloption=MODELOPTION)
        print('Model: ', MODELOPTION)

        #pred_y = true_y # suppose ideal regression

        # Separate results for perception and imagery tests
        i_pt = i_test_pt[i_test]  # Index for perception test within test
        i_im = i_test_im[i_test]  # Index for imagery test within test

        pred_y_pt = pred_y[i_pt, :]
        pred_y_im = pred_y[i_im, :]

        true_y_pt = true_y[i_pt, :]
        true_y_im = true_y[i_im, :]

        # Get averaged predicted feature
        test_label_pt = labels[i_test_pt, :].flatten()
        test_label_im = labels[i_test_im, :].flatten()

        pred_y_pt_av, true_y_pt_av, test_label_set_pt \
            = get_averaged_feature(pred_y_pt, true_y_pt, test_label_pt)
        pred_y_im_av, true_y_im_av, test_label_set_im \
            = get_averaged_feature(pred_y_im, true_y_im, test_label_im)

        # Get category averaged features
        catlabels_pt = np.vstack([int(n) for n in test_label_pt
                                  ])  # Category labels (perception test)
        catlabels_im = np.vstack([int(n) for n in test_label_im
                                  ])  # Category labels (imagery test)
        catlabels_set_pt = np.unique(
            catlabels_pt)  # Category label set (perception test)
        catlabels_set_im = np.unique(
            catlabels_im)  # Category label set (imagery test)

        if CAFFEflag:

            yold_catlabels = data_feature.select(
                'CatID')  # Category labels in image features
            ind_catave = (data_feature.select('FeatureType') == 3
                          ).flatten()  # boolean mask of featuretype

            y_catave_pt = get_refdata(yold[ind_catave, :],
                                      yold_catlabels[ind_catave, :],
                                      catlabels_set_pt)
            y_catave_im = get_refdata(yold[ind_catave, :],
                                      yold_catlabels[ind_catave, :],
                                      catlabels_set_im)

        else:
            y_catlabels = data_feature.select(
                'CatID')  # Category labels in image features
            ind_catave = (data_feature.select('FeatureType') == 3
                          ).flatten()  #boolean mask of featuretype

            y_catave_pt = get_refdata(y[ind_catave, :],
                                      y_catlabels[ind_catave, :],
                                      catlabels_set_pt)
            y_catave_im = get_refdata(y[ind_catave, :],
                                      y_catlabels[ind_catave, :],
                                      catlabels_set_im)

        # Prepare result dataframe
        results = pd.DataFrame({
            'subject': [sbj, sbj],
            'roi': [roi, roi],
            'feature': [feat, feat],
            'test_type': ['perception', 'imagery'],
            'true_feature': [true_y_pt, true_y_im],
            'predicted_feature': [pred_y_pt, pred_y_im],
            'test_label': [test_label_pt, test_label_im],
            'test_label_set': [test_label_set_pt, test_label_set_im],
            'true_feature_averaged': [true_y_pt_av, true_y_im_av],
            'predicted_feature_averaged': [pred_y_pt_av, pred_y_im_av],
            'category_label_set': [catlabels_set_pt, catlabels_set_im],
            'category_feature_averaged': [y_catave_pt, y_catave_im]
        })

        # Save results
        makedir_ifnot(os.path.dirname(results_file))
        with open(results_file, 'wb') as f:
            pickle.dump(results, f)

        print('Saved %s' % results_file)

        dist.unlock()