def main(): analysis_name = 'GenericObjectDecoding' resnet_reindex = config.resnet_reindex resnet_true_layers = config.resnet_true_layers alexnet_file = os.path.join('results-alexnet', analysis_name + '.pkl') resnet_file = os.path.join('results-resnet', analysis_name + '.pkl') output_file_featpred = os.path.join('results', config.analysis_name + '_featureprediction.pdf') # Load results ----------------------------------------------------- with open(alexnet_file, 'rb') as f: print('Loading %s' % alexnet_file) alexnet_results = pickle.load(f) with open(resnet_file, 'rb') as f: print('Loading %s' % resnet_file) resnet_results = pickle.load(f) # Figure settings plt.rcParams['font.size'] = 7 # Plot (feature prediction) ---------------------------------------- fig, axes = plt.subplots(4,2,figsize=(8,9)) num_plots = range(8) # Image plotresults(fig, axes, alexnet_results, resnet_results, num_plots) # Save the figure makedir_ifnot('results') plt.savefig(output_file_featpred, dpi=300) print('Saved %s' % output_file_featpred) plt.show()
def __save_model(self, output_files): if self.save_format == 'pickle': save_file = 'hoge.pkl' if len(output_files) != 1: raise RuntimeError('Invalid output file(s)') save_file = output_files[0]['file_path'] makedir_ifnot(os.path.dirname(save_file)) with open(save_file, 'wb') as f: pickle.dump(self.model, f, protocol=2) if self.verbose >= 1: print('Saved %s' % save_file) elif self.save_format == 'bdmodel': if not self.model.__class__.__name__ == 'FastL2LiR': raise NotImplementedError( 'BD model current supports only FastL2LiR models.') for s in output_files: makedir_ifnot(os.path.dirname(s['file_path'])) save_array(s['file_path'], getattr(self.model, s['src']), key=s['dst'], dtype=self.dtype, sparse=s['sparse']) if self.verbose >= 1: print('Saved %s' % s['file_path']) else: raise ValueError('Unsupported output format: %s' % self.save_format) return None
# Main ####################################################################### analysis_basename = os.path.splitext(os.path.basename(__file__))[0] # Load data -------------------------------------------------------- print('----------------------------------------') print('Loading data') data_brain = { sbj: bdpy.BData(os.path.join(brain_dir, dat_file)) for sbj, dat_file in subjects_list.items() } data_features = Features(os.path.join(features_dir, network)) # Initialize directories ------------------------------------------- makedir_ifnot(results_dir_root) makedir_ifnot(os.path.join(results_dir_root, 'decoded_features', network)) makedir_ifnot(os.path.join(results_dir_root, 'prediction_accuracy', network)) makedir_ifnot('tmp') # Save runtime information ----------------------------------------- runtime_params = { 'fMRI data': [ os.path.abspath(os.path.join(brain_dir, v)) for v in subjects_list.values() ], 'ROIs': rois_list.keys(), 'feature_decoders': os.path.abspath(models_dir_root), 'target DNN':
from sklearn.metrics import accuracy_score, mean_squared_error # from sklearn import svm import matplotlib.pyplot as plt import numpy as np import pandas as pd from scipy import stats from itertools import product data_dir = '/home/share/data/fmri_shared/eyetracker/YS210108/bdata' os.listdir(data_dir) results_dir = '/home/yqsong/Documents/eye_movement/GOD_eyetracking/results' # Setups analysis_basename = 'eye_position_Linreg_GOD.py' makedir_ifnot(os.path.join(results_dir, analysis_basename)) print('----------------------------------------') print('Loading data') sbj = 'YS210108' method = "MRI-based" cond_type = 5 dir_type = 0 dat_name = 'YS210108_GODeyetracking_eyetracking_volume_native_prep.h5' # data_all = {} # for sbj in subjects: # data_all[sbj] = bdpy.BData(os.path.join(data_dir, subjects[sbj][0])) data_train = bdpy.BData(os.path.join(data_dir, dat_name))
print('Target features: %s' % network) print('Layers: %s' % features_list) print('') # Load data ------------------------------------------------------------------ print('----------------------------------------') print('Loading data') data_brain = { sbj: bdpy.BData(os.path.join(brain_dir, dat_file)) for sbj, dat_file in subjects_list.items() } data_features = Features(os.path.join(features_dir, network)) # Initialize directories ----------------------------------------------------- makedir_ifnot(results_dir_root) makedir_ifnot(os.path.join(results_dir_root, network)) makedir_ifnot('tmp') # Save runtime information --------------------------------------------------- info_dir = os.path.join(results_dir_root, network) runtime_params = { 'learning method': 'PyFastL2LiR', 'regularization parameter': alpha, 'fMRI data': [ os.path.abspath(os.path.join(brain_dir, v)) for v in subjects_list.values() ], 'ROIs':
# Main ####################################################################### analysis_basename = os.path.splitext(os.path.basename(__file__))[0] # Load data -------------------------------------------------------- print('----------------------------------------') print('Loading data') data_brain = { sbj: bdpy.BData(os.path.join(brain_dir, dat_file)) for sbj, dat_file in subjects_list.items() } data_features = Features(os.path.join(features_dir, network)) # Initialize directories ------------------------------------------- makedir_ifnot(os.path.join(results_dir_decoded_features_root, network)) makedir_ifnot(os.path.join(results_dir_decoding_accuracy_root, network)) makedir_ifnot('tmp') # Save runtime information ----------------------------------------- runtime_params = { 'fMRI data': [ os.path.abspath(os.path.join(brain_dir, v)) for v in subjects_list.values() ], 'ROIs': rois_list.keys(), 'feature_decoders': os.path.abspath(models_dir_root), 'target DNN': network,
def main(): # Data settings ---------------------------------------------------- # Brain data brain_dir = '/home/share/data/fmri_shared/datasets/Deeprecon/fmriprep' subjects_list = {'TH': 'TH_ImageNetTraining_volume_native.h5'} target_data = {'AM': 'AM_ImageNetTraining_volume_native.h5'} rois_list = { 'LH': 'VertexLeft', } # Assume the pretrained model is trained with TH data. # We want to train NCconverter using ES data. target_subject = 'AM' # Model parameters data_rep = 5 # Model parameters lr_rate = 0.01 epoch = 200 batch = 1 gpu_device = 0 # Results directory results_dir_root = './NCconverter_results' # geometry dir geometry_dir = './surf' analysis_basename = os.path.splitext(os.path.basename(__file__))[0] # Load data -------------------------------------------------------- print('----------------------------------------') print('Loading data') data_brain = { sbj: bdpy.BData(os.path.join(brain_dir, dat_file)) for sbj, dat_file in subjects_list.items() } # Initialize directories ------------------------------------------- makedir_ifnot(results_dir_root) makedir_ifnot('tmp') # Analysis loop ---------------------------------------------------- print('----------------------------------------') print('Analysis loop') for sbj, roi in product(subjects_list, rois_list): print('--------------------') print('Subject: %s' % sbj) print('Target subject: %s' % target_subject) print('ROI: %s' % roi) # Setup # ----- subject_name = sbj + '2' + target_subject + '_' + str( data_rep * 20) + 'p' analysis_id = analysis_basename + '-' + subject_name + '-' + roi results_dir = os.path.join(results_dir_root, analysis_basename, 'model', subject_name, roi, 'model') makedir_ifnot(results_dir) # Check whether the analysis has been done or not. check_file = os.path.join(results_dir, analysis_id + '.done') if os.path.exists(check_file): print('%s is already done and skipped' % analysis_id) continue # Preparing data # -------------- print('Preparing data') start_time = time() # geometry data geofiles = (os.path.join(geometry_dir, '{}.white'.format(roi.lower())), os.path.join(geometry_dir, '{}.pial'.format(roi.lower()))) mesh = MeshData(geofiles) edges, pseudo = mesh.edge_pseudo() # Brain data x = data_brain[sbj].select(rois_list[roi]) # Brain data x_labels = data_brain[sbj].select( 'image_index') # Image labels in the brain data target_brain_data = bdpy.BData( os.path.join(brain_dir, target_data[target_subject])) y = target_brain_data.select(rois_list[roi]) y_labels = target_brain_data.select('image_index') # Get training data x_train = x x_train_labels = x_labels y_train = y y_train_labels = y_labels del x, y, x_labels, y_labels print('Total elapsed time (data preparation): %f' % (time() - start_time)) # Model training # -------------- print('Model training') start_time = time() train_NCconverter(x_train, y_train, x_train_labels, y_train_labels, edges, pseudo, lr_rate=lr_rate, batch=batch, output=results_dir, save_chunk=True, axis_chunk=1, tmp_dir='tmp', comp_id=analysis_id, gpu_device=gpu_device, epoch=epoch) print('Total elapsed time (model training): %f' % (time() - start_time)) print('%s finished.' % analysis_basename)
def train_NCconverter(x, y, x_labels, y_labels, edges, pseudo, lr_rate=0.01, batch=64, output='./NCconverter_results.mat', save_chunk=False, axis_chunk=1, tmp_dir='./tmp', comp_id=None, gpu_device=0, epoch=500): makedir_ifnot(output) makedir_ifnot(tmp_dir) if y.ndim == 4: # The Y input to the NCconveter has to be strictly number of samples x number of features y = y.reshape((y.shape[0], -1)) elif y.ndim == 2: pass else: raise ValueError('Unsupported feature array shape') # Preprocessing ---------------------------------------------------------- print('Preprocessing') start_time = time() # Normalize X (fMRI data) x_mean = np.mean( x, axis=0)[np.newaxis, :] # np.newaxis was added to match Matlab outputs x_norm = np.std(x, axis=0, ddof=1)[np.newaxis, :] x_normalized = (x - x_mean) / x_norm # Normalize Y (DNN features) y_mean = np.mean(y, axis=0)[np.newaxis, :] y_norm = np.std(y, axis=0, ddof=1)[np.newaxis, :] y_normalized = (y - y_mean) / y_norm print('Elapsed time: %f' % (time() - start_time)) # Model training loop ---------------------------------------------------- comp_id_t = comp_id + 'NCconverter' results_dir = os.path.join(output) result_model = os.path.join(results_dir, 'NCconverter.pt') makedir_ifnot(results_dir) if os.path.exists(result_model): print('%s already exists and skipped' % result_model) return dist = DistComp(lockdir=tmp_dir, comp_id=comp_id_t) if dist.islocked(): print('%s is already running. Skipped.' % comp_id_t) return dist.lock() start_time = time() print('Training') # add bias term in X #x_normalized = np.concatenate([x_normalized, np.ones((x_normalized.shape[0],1))], axis=1) # Align Y to X labels x_index = np.argsort(x_labels.flatten()) x_labels_aligned = x_labels[x_index] y_index = np.argsort(y_labels.flatten()) y_labels_aligned = y_labels[y_index] #y_index = np.array([np.where(y_labels == xl)[0] for xl in x_labels]).flatten() #y_aligned = y_normalized[y_index, :] #y_labels_aligned = y_labels[y_index] x_aligned = x_normalized[x_index, :] y_aligned = y_normalized[y_index, :] print(x_labels_aligned[:20]) print(y_labels_aligned[:20]) # np.random.seed(88) # x_aligned = np.random.permutation(x_aligned) # np.random.seed(88) # y_aligned = np.random.permutation(y_aligned) # Data graph = GraphData(x_aligned, y_aligned, edges, pseudo) graph_dat_list = graph.data # Model training #torch.cuda.set_device(gpu_device) #model = NCconverter_torch(x_aligned.shape[1], y_aligned.shape[1]) model = train(graph_dat_list, lr_rate=lr_rate, epoch=epoch, batch=batch) # Save chunk results torch.save(model, result_model) print('Saved %s' % result_model) del (y_aligned) etime = time() - start_time print('Elapsed time: %f' % etime) dist.unlock() del (x_normalized) # Save results ----------------------------------------------------------- print('Saving normalization parameters.') norm_param = { 'x_mean': x_mean, 'y_mean': y_mean, 'x_norm': x_norm, 'y_norm': y_norm } save_targets = [u'x_mean', u'y_mean', u'x_norm', u'y_norm'] for sv in save_targets: save_file = os.path.join(results_dir, sv + '.mat') if not os.path.exists(save_file): hdf5storage.savemat(save_file, {sv: norm_param[sv]}, format='7.3', oned_as='column', store_python_metadata=True) print('Saved %s' % save_file) if not save_chunk: # Merge results into 'model'mat' raise NotImplementedError('Result merging is not implemented yet.') return None
def main(): # Settings --------------------------------------------------------- # Data settings subjects = config.subjects rois = config.rois num_voxel = config.num_voxel image_feature = config.image_feature_file features = config.features n_iter = 200 results_dir = config.results_dir # Misc settings analysis_basename = os.path.basename(__file__) # Load data -------------------------------------------------------- print('----------------------------------------') print('Loading data') data_all = {} for sbj in subjects: if len(subjects[sbj]) == 1: data_all[sbj] = bdpy.BData(subjects[sbj][0]) else: # Concatenate data suc_cols = ['Run', 'Block'] data_all[sbj] = concat_dataset( [bdpy.BData(f) for f in subjects[sbj]], successive=suc_cols) data_feature = bdpy.BData(image_feature) # Add any additional processing to data here # Initialize directories ------------------------------------------- makedir_ifnot(results_dir) makedir_ifnot('tmp') # Analysis loop ---------------------------------------------------- print('----------------------------------------') print('Analysis loop') for sbj, roi, feat in product(subjects, rois, features): print('--------------------') print('Subject: %s' % sbj) print('ROI: %s' % roi) print('Num voxels: %d' % num_voxel[roi]) print('Feature: %s' % feat) # Distributed computation analysis_id = analysis_basename + '-' + sbj + '-' + roi + '-' + feat results_file = os.path.join(results_dir, analysis_id + '.pkl') if os.path.exists(results_file): print('%s is already done. Skipped.' % analysis_id) continue dist = DistComp(lockdir='tmp', comp_id=analysis_id) if dist.islocked(): print('%s is already running. Skipped.' % analysis_id) continue dist.lock() # Prepare data print('Preparing data') dat = data_all[sbj] x = dat.select(rois[roi]) # Brain data datatype = dat.select('DataType') # Data type labels = dat.select('stimulus_id') # Image labels in brain data y = data_feature.select(feat) # Image features y_label = data_feature.select('ImageID') # Image labels # For quick demo, reduce the number of units from 1000 to 100 y = y[:, :100] y_sorted = get_refdata( y, y_label, labels) # Image features corresponding to brain data # Get training and test dataset i_train = (datatype == 1).flatten() # Index for training i_test_pt = (datatype == 2).flatten() # Index for perception test i_test_im = (datatype == 3).flatten() # Index for imagery test i_test = i_test_pt + i_test_im x_train = x[i_train, :] x_test = x[i_test, :] y_train = y_sorted[i_train, :] y_test = y_sorted[i_test, :] # Feature prediction pred_y, true_y = feature_prediction(x_train, y_train, x_test, y_test, n_voxel=num_voxel[roi], n_iter=n_iter) # Separate results for perception and imagery tests i_pt = i_test_pt[i_test] # Index for perception test within test i_im = i_test_im[i_test] # Index for imagery test within test pred_y_pt = pred_y[i_pt, :] pred_y_im = pred_y[i_im, :] true_y_pt = true_y[i_pt, :] true_y_im = true_y[i_im, :] # Get averaged predicted feature test_label_pt = labels[i_test_pt, :].flatten() test_label_im = labels[i_test_im, :].flatten() pred_y_pt_av, true_y_pt_av, test_label_set_pt \ = get_averaged_feature(pred_y_pt, true_y_pt, test_label_pt) pred_y_im_av, true_y_im_av, test_label_set_im \ = get_averaged_feature(pred_y_im, true_y_im, test_label_im) # Get category averaged features catlabels_pt = np.vstack([int(n) for n in test_label_pt ]) # Category labels (perception test) catlabels_im = np.vstack([int(n) for n in test_label_im ]) # Category labels (imagery test) catlabels_set_pt = np.unique( catlabels_pt) # Category label set (perception test) catlabels_set_im = np.unique( catlabels_im) # Category label set (imagery test) y_catlabels = data_feature.select( 'CatID') # Category labels in image features ind_catave = (data_feature.select('FeatureType') == 3).flatten() y_catave_pt = get_refdata(y[ind_catave, :], y_catlabels[ind_catave, :], catlabels_set_pt) y_catave_im = get_refdata(y[ind_catave, :], y_catlabels[ind_catave, :], catlabels_set_im) # Prepare result dataframe results = pd.DataFrame({ 'subject': [sbj, sbj], 'roi': [roi, roi], 'feature': [feat, feat], 'test_type': ['perception', 'imagery'], 'true_feature': [true_y_pt, true_y_im], 'predicted_feature': [pred_y_pt, pred_y_im], 'test_label': [test_label_pt, test_label_im], 'test_label_set': [test_label_set_pt, test_label_set_im], 'true_feature_averaged': [true_y_pt_av, true_y_im_av], 'predicted_feature_averaged': [pred_y_pt_av, pred_y_im_av], 'category_label_set': [catlabels_set_pt, catlabels_set_im], 'category_feature_averaged': [y_catave_pt, y_catave_im] }) # Save results makedir_ifnot(os.path.dirname(results_file)) with open(results_file, 'wb') as f: pickle.dump(results, f) print('Saved %s' % results_file) dist.unlock()
'feature': [feat, feat], 'test_type': ['perception', 'imagery'], 'true_feature': [true_y_pt, true_y_im], 'predicted_feature': [pred_y_pt, pred_y_im], 'test_label': [test_label_pt, test_label_im], 'test_label_set': [test_label_set_pt, test_label_set_im], 'true_feature_averaged': [true_y_pt_av, true_y_im_av], 'predicted_feature_averaged': [pred_y_pt_av, pred_y_im_av], 'category_label_set': [catlabels_set_pt, catlabels_set_im], 'category_feature_averaged': [y_catave_pt, y_catave_im] }) # print('catlabels_set_pt size',catlabels_set_pt.shape) # print('catlabels_set_im size',catlabels_set_im.shape) # print('true_y_pt_av size',true_y_pt_av.shape) # print('true_y_im_av size',true_y_im_av.shape) # print('pred_y_pt_av size',pred_y_pt_av.shape) # print('pred_y_im_av size',pred_y_im_av.shape) # print('y_catave_pt size',y_catave_pt.shape) # print('y_catave_im size',y_catave_im.shape) if pca: res = dir_path + '/results/feature-decoding-pca/' + subject + '_' + roi + '_' + feat + '_' + 'decode_results.pkl' else: res = dir_path + '/results/feature-decoding/' + subject + '_' + roi + '_' + feat + '_' + 'decode_results.pkl' makedir_ifnot(os.path.dirname(res)) with open(res, 'wb') as f: pickle.dump(results, f) print('Saved %s' % res)
def run(self): '''Run training.''' if self.dtype is not None: self.X = self.X.astype(self.dtype) self.Y = self.Y.astype(self.dtype) # Chunking if self.chunk_axis is None: self.__chunking = False elif self.Y.ndim == 2: self.__chunking = False else: self.__chunking = True if self.__chunking: chunk_index = range(self.Y.shape[self.chunk_axis]) else: chunk_index = [None] # Distributed computation setup if self.distcomp is None: dist_db_path = os.path.join(os.path.dirname(self.save_path), self.id + '.db') makedir_ifnot(os.path.dirname(dist_db_path)) distcomp = DistComp(backend='sqlite3', db_path=dist_db_path) else: distcomp = self.distcomp # X normalization if not self.X_normalize is None: print('Normalizing X') self.X = (self.X - self.X_normalize['mean']) / self.X_normalize['std'] self.X[np.isinf(self.X)] = 0 # Model training loop time_elapsed = [] output_files_all = [] for i, i_chunk in enumerate(chunk_index): loop_start_time = time() if self.id is None: training_id_chunk = 'chunk%08d' % i else: training_id_chunk = '%s-chunk%08d' % (self.id, i) # Output file setting output_files = self.__output_file(chunk=i) output_files_all.extend(output_files) # Check chunk results if self.__is_done(output_files): if self.verbose >= 1: print('%s is already done. Skipped.' % training_id_chunk) continue # Parallel computation setup # DistComp.lock() returns True if the computation is not locked and successfully locked. if not distcomp.lock(training_id_chunk): if self.verbose >= 1: print('%s is already running. Skipped.' % training_id_chunk) continue if self.__chunking: Y = np.take(self.Y, [i_chunk], axis=self.chunk_axis) else: Y = self.Y # Y preprocessing if not self.Y_normalize is None: print('Normalizing Y') if self.__chunking: y_mean = np.take(self.Y_normalize['mean'], [i_chunk], axis=self.chunk_axis) y_norm = np.take(self.Y_normalize['std'], [i_chunk], axis=self.chunk_axis) else: y_mean = self.Y_normalize['mean'] y_norm = self.Y_normalize['std'] Y = (Y - y_mean) / y_norm Y[np.isinf(Y)] = 0 if not self.Y_sort is None: print('Sorting Y') Y = Y[self.Y_sort['index'], :] # Training if self.verbose >= 1: print('Training: %s' % training_id_chunk) self.model.fit(self.X, Y, **self.model_parameters) # Save models self.__save_model(output_files) etime = time() - loop_start_time time_elapsed.append(etime) if self.verbose >= 1: print('Elapsed time: %f' % etime) distcomp.unlock(training_id_chunk) if len(chunk_index) > 1: etime_ave = np.mean(time_elapsed) est_time_left = etime_ave * (len(chunk_index) - (i + 1)) est_time_end = time() + est_time_left print('') print('Average computation time/chunk: %f s' % etime_ave) print('Estimated remaining time: %f s' % est_time_left) print('Estimated computation end time: %s' % datetime.fromtimestamp(est_time_end).strftime( '%Y-%m-%d %H:%M:%S')) print('') # Check outputs and add information if self.__is_done(output_files_all): if os.path.isdir(self.save_path): info_file = os.path.join(self.save_path, 'info.yaml') if os.path.exists(info_file): while True: with open(info_file, 'r') as f: info = yaml.load(f) if info is None: print('Failed to load info from %s. Retrying ...' % info_file) sleep(1) else: print('Loaded info from %s' % info_file) break else: info = {} if not '_status' in info: info.update({'_status': {}}) info['_status'].update({ 'computation_id': self.id, 'computation_status': 'done' }) with open(info_file, 'w') as f: f.write(yaml.dump(info, default_flow_style=False)) return self.model
def main(): # Read settings ---------------------------------------------------- # Brain data brain_dir = '/home/share/data/fmri_shared/datasets/Deeprecon/fmriprep' subjects_list = {'TH': 'TH_ImageNetTest_volume_native.h5'} rois_list = { 'VC': 'ROI_VC = 1', } # Image features features_dir = '/home/ho/Documents/brain-decoding-examples/python/feature-prediction/data/features/ImageNetTest' network = 'caffe/VGG_ILSVRC_19_layers' features_list = [ 'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4', 'fc6', 'fc7', 'fc8' ][::-1] features_list = ['fc6', 'fc7', 'fc8'][::-1] target_subject = 'AM' Lambda = 0.1 data_rep = 5 # Model parameters gpu_device = 1 # Results directory results_dir_root = './NCconverter_results' # Converter models nc_models_dir_root = os.path.join(results_dir_root, 'pytorch_converter_training', 'model') selected_converter_type = 'conv5' # Misc settings analysis_basename = os.path.splitext(os.path.basename(__file__))[0] # Pretrained model metadata pre_results_dir_root = '/home/share/data/contents_shared/ImageNetTraining/derivatives/feature_decoders' pre_analysis_basename = 'deeprecon_fmriprep_rep5_500voxel_allunits_fastl2lir_alpha100' pre_models_dir_root = os.path.join(pre_results_dir_root, pre_analysis_basename) # Load data -------------------------------------------------------- print('----------------------------------------') print('Loading data') data_brain = { sbj: bdpy.BData(os.path.join(brain_dir, dat_file)) for sbj, dat_file in subjects_list.items() } data_features = Features(os.path.join(features_dir, network)) # Initialize directories ------------------------------------------- makedir_ifnot(results_dir_root) makedir_ifnot('tmp') # Analysis loop ---------------------------------------------------- print('----------------------------------------') print('Analysis loop') for sbj, roi, feat in product(subjects_list, rois_list, features_list): print('--------------------') print('Subject: %s' % sbj) print('ROI: %s' % roi) # Distributed computation setup # ----------------------------- subject_name = sbj + '2' + target_subject + '_' + str( data_rep * 20) + 'p' + '_lambda' + str(Lambda) analysis_id = analysis_basename + '-' + subject_name + '-' + roi + '-' + feat results_dir_prediction = os.path.join(results_dir_root, analysis_basename, 'decoded_features', network, feat, subject_name, roi) results_dir_accuracy = os.path.join(results_dir_root, analysis_basename, 'prediction_accuracy', network, feat, subject_name, roi) if os.path.exists(results_dir_prediction): print('%s is already done. Skipped.' % analysis_id) continue dist = DistComp(lockdir='tmp', comp_id=analysis_id) if dist.islocked_lock(): print('%s is already running. Skipped.' % analysis_id) continue # Preparing data # -------------- print('Preparing data') start_time = time() # Brain data x = data_brain[sbj].select(rois_list[roi]) # Brain data x_labels = data_brain[sbj].select( 'image_index') # Image labels in the brain data # Target features and image labels (file names) y = data_features.get_features(feat) y_labels = data_features.index image_names = data_features.labels # Get test data x_test = x x_test_labels = x_labels y_test = y y_test_labels = y_labels # Averaging brain data x_test_labels_unique = np.unique(x_test_labels) x_test_averaged = np.vstack([ np.mean(x_test[(x_test_labels == lb).flatten(), :], axis=0) for lb in x_test_labels_unique ]) print('Total elapsed time (data preparation): %f' % (time() - start_time)) # Convert x_test_averaged nc_models_dir = os.path.join(nc_models_dir_root, subject_name, roi, 'model') x_test_averaged = test_ncconverter(nc_models_dir, x_test_averaged, gpu_device) # Prediction # ---------- print('Prediction') start_time = time() y_pred = test_fastl2lir_div( os.path.join(pre_models_dir_root, network, feat, target_subject, roi, 'model'), x_test_averaged) print('Total elapsed time (prediction): %f' % (time() - start_time)) # Calculate prediction accuracy # ----------------------------- print('Prediction accuracy') start_time = time() y_pred_2d = y_pred.reshape([y_pred.shape[0], -1]) y_true_2d = y.reshape([y.shape[0], -1]) y_true_2d = get_refdata(y_true_2d, y_labels, x_test_labels_unique) n_units = y_true_2d.shape[1] accuracy = np.array([ np.corrcoef(y_pred_2d[:, i].flatten(), y_true_2d[:, i].flatten())[0, 1] for i in range(n_units) ]) accuracy = accuracy.reshape((1, ) + y_pred.shape[1:]) print('Mean prediction accuracy: {}'.format(np.mean(accuracy))) print('Total elapsed time (prediction accuracy): %f' % (time() - start_time)) # Save results # ------------ print('Saving results') makedir_ifnot(results_dir_prediction) makedir_ifnot(results_dir_accuracy) start_time = time() # Predicted features for i, lb in enumerate(x_test_labels_unique): # Predicted features feat = np.array([y_pred[i, ] ]) # To make feat shape 1 x M x N x ... image_filename = image_names[ int(lb) - 1] # Image labels are one-based image indexes # Save file name save_file = os.path.join(results_dir_prediction, '%s.mat' % image_filename) # Save hdf5storage.savemat(save_file, {u'feat': feat}, format='7.3', oned_as='column', store_python_metadata=True) print('Saved %s' % results_dir_prediction) # Prediction accuracy save_file = os.path.join(results_dir_accuracy, 'accuracy.mat') hdf5storage.savemat(save_file, {u'accuracy': accuracy}, format='7.3', oned_as='column', store_python_metadata=True) print('Saved %s' % save_file) print('Elapsed time (saving results): %f' % (time() - start_time)) dist.unlock() print('%s finished.' % analysis_basename)
def main(): # Settings --------------------------------------------------------- # Data settings subjects = config.subjects rois = config.rois num_voxel = config.num_voxel if CAFFEflag: if cboflag: image_feature1 = '/home/akpapadim/Desktop/RemoteThesis/cbof-kamitani/data/ImageFeatures_caffe_cbof.pkl' else: image_feature1 = '/home/akpapadim/Desktop/RemoteThesis/cbof-kamitani/data/ImageFeatures_caffe.pkl' image_feature = config.image_feature_file features = config.features results_dir = config.results_dir # Misc settings analysis_basename = os.path.basename(__file__) # Load data -------------------------------------------------------- print('----------------------------------------') print('Loading data') data_all = {} for sbj in subjects: if len(subjects[sbj]) == 1: data_all[sbj] = bdpy.BData(subjects[sbj][0]) else: # Concatenate data suc_cols = ['Run', 'Block'] data_all[sbj] = concat_dataset( [bdpy.BData(f) for f in subjects[sbj]], successive=suc_cols) data_feature = bdpy.BData(image_feature) # check which features file to open if CAFFEflag: data_feature1 = pd.read_pickle(image_feature1) print('From file ', image_feature1) elif cboflag == False: print('From file ', image_feature) # Initialize directories ------------------------------------------- makedir_ifnot(results_dir) makedir_ifnot('tmp') # Analysis loop ---------------------------------------------------- print('----------------------------------------') print('Analysis loop') for sbj, roi, feat in product(subjects, rois, features): print('--------------------') print('Subject: %s' % sbj) print('ROI: %s' % roi) print('Num voxels: %d' % num_voxel[roi]) print('Feature: %s' % feat) # Distributed computation analysis_id = analysis_basename + '-' + sbj + '-' + roi + '-' + feat results_file = os.path.join(results_dir, analysis_id + '.pkl') if os.path.exists(results_file): print('%s is already done. Skipped.' % analysis_id) continue dist = DistComp(lockdir='tmp', comp_id=analysis_id) if dist.islocked(): print('%s is already running. Skipped.' % analysis_id) continue dist.lock() # Prepare data print('Preparing data') dat = data_all[sbj] x = dat.select(rois[roi]) # Brain data datatype = dat.select('DataType') # Data type labels = dat.select('Label') # Image labels in brain data if CAFFEflag: yold = data_feature.select(feat) # Image features y = data_feature1[feat] y_label = data_feature1['ImageID'] if cboflag == False: y = np.concatenate(y).reshape( y.shape[0], y[0].shape[0]) # reshape to 1250, 1000 y_label = y_label.reshape(y.shape[0], 1) else: y = data_feature.select(feat) # Image features y_label = data_feature.select('ImageID') # Image labels y_sorted = get_refdata( y, y_label, labels) # Image features corresponding to brain data # alternative sorting method is the same as get_refdata """ object_map = {} for i in range(len(y)): key = y_label[i][0] object_map[key]= y[i] y_sorted2 = [object_map[id[0]] for id in labels] """ # Get training and test dataset i_train = (datatype == 1).flatten() # Index for training i_test_pt = (datatype == 2).flatten() # Index for perception test i_test_im = (datatype == 3).flatten() # Index for imagery test i_test = i_test_pt + i_test_im x_train = x[i_train, :] x_test = x[i_test, :] y_train = y_sorted[i_train, :] y_test = y_sorted[i_test, :] # Feature prediction pred_y, true_y = feature_prediction(x_train, y_train, x_test, y_test, modeloption=MODELOPTION) print('Model: ', MODELOPTION) #pred_y = true_y # suppose ideal regression # Separate results for perception and imagery tests i_pt = i_test_pt[i_test] # Index for perception test within test i_im = i_test_im[i_test] # Index for imagery test within test pred_y_pt = pred_y[i_pt, :] pred_y_im = pred_y[i_im, :] true_y_pt = true_y[i_pt, :] true_y_im = true_y[i_im, :] # Get averaged predicted feature test_label_pt = labels[i_test_pt, :].flatten() test_label_im = labels[i_test_im, :].flatten() pred_y_pt_av, true_y_pt_av, test_label_set_pt \ = get_averaged_feature(pred_y_pt, true_y_pt, test_label_pt) pred_y_im_av, true_y_im_av, test_label_set_im \ = get_averaged_feature(pred_y_im, true_y_im, test_label_im) # Get category averaged features catlabels_pt = np.vstack([int(n) for n in test_label_pt ]) # Category labels (perception test) catlabels_im = np.vstack([int(n) for n in test_label_im ]) # Category labels (imagery test) catlabels_set_pt = np.unique( catlabels_pt) # Category label set (perception test) catlabels_set_im = np.unique( catlabels_im) # Category label set (imagery test) if CAFFEflag: yold_catlabels = data_feature.select( 'CatID') # Category labels in image features ind_catave = (data_feature.select('FeatureType') == 3 ).flatten() # boolean mask of featuretype y_catave_pt = get_refdata(yold[ind_catave, :], yold_catlabels[ind_catave, :], catlabels_set_pt) y_catave_im = get_refdata(yold[ind_catave, :], yold_catlabels[ind_catave, :], catlabels_set_im) else: y_catlabels = data_feature.select( 'CatID') # Category labels in image features ind_catave = (data_feature.select('FeatureType') == 3 ).flatten() #boolean mask of featuretype y_catave_pt = get_refdata(y[ind_catave, :], y_catlabels[ind_catave, :], catlabels_set_pt) y_catave_im = get_refdata(y[ind_catave, :], y_catlabels[ind_catave, :], catlabels_set_im) # Prepare result dataframe results = pd.DataFrame({ 'subject': [sbj, sbj], 'roi': [roi, roi], 'feature': [feat, feat], 'test_type': ['perception', 'imagery'], 'true_feature': [true_y_pt, true_y_im], 'predicted_feature': [pred_y_pt, pred_y_im], 'test_label': [test_label_pt, test_label_im], 'test_label_set': [test_label_set_pt, test_label_set_im], 'true_feature_averaged': [true_y_pt_av, true_y_im_av], 'predicted_feature_averaged': [pred_y_pt_av, pred_y_im_av], 'category_label_set': [catlabels_set_pt, catlabels_set_im], 'category_feature_averaged': [y_catave_pt, y_catave_im] }) # Save results makedir_ifnot(os.path.dirname(results_file)) with open(results_file, 'wb') as f: pickle.dump(results, f) print('Saved %s' % results_file) dist.unlock()