def generate(mod, run, language, textgrid, overwrite=False): name = os.path.basename(os.path.splitext(run)[0]) run_name = name.split('_')[-1] # extract the name of the run save_all = None # Defining paths model_name = 'glove_model' check_folder( os.path.join(Paths().path2derivatives, 'fMRI/raw-features', language, model_name)) path = os.path.join( Paths().path2derivatives, 'fMRI/raw-features', language, model_name, 'raw-features_{}_{}_{}.csv'.format(language, model_name, run_name)) #### generating raw-features #### if (os.path.exists(path)) & (not overwrite): raw_features = pd.read_csv(path) else: raw_features = mod.generate(run, language, textgrid) save_all = path #### Retrieving data of interest #### # columns2retrieve = [function.__name__ for function in mod.functions] columns2retrieve = [ 'embedding-{}'.format(index) for index in range(mod.param['embedding-size']) ] return raw_features[:textgrid.offsets.count()], columns2retrieve, save_all
def generate(mod, run, language, textgrid, overwrite=False): from .WORDRATE import model from .WORDRATE.utils import wordrate, function_words, content_words name = os.path.basename(os.path.splitext(run)[0]) run_name = name.split('_')[-1] # extract the name of the run save_all = None mod = model.Wordrate([content_words, function_words, wordrate], language) # all functions model_name = 'wordrate_model' check_folder( os.path.join(Paths().path2derivatives, 'fMRI/raw-features', language, model_name)) path = os.path.join( Paths().path2derivatives, 'fMRI/raw-features', language, model_name, 'raw-features_{}_{}_{}.csv'.format(language, model_name, run_name)) #### parameters studied #### parameters = sorted([wordrate]) #### generating raw-features #### if (os.path.exists(path)) & (not overwrite): raw_features = pd.read_csv(path) else: raw_features = mod.generate(run, language, textgrid) save_all = path #### Retrieving data of interest #### columns2retrieve = [ function.__name__ for function in model.Wordrate(parameters, language).functions ] return raw_features[:textgrid.offsets.count()], columns2retrieve, save_all
def generate(mod, run, language, textgrid, overwrite=False): name = os.path.basename(os.path.splitext(run)[0]) run_name = name.split('_')[-1] # extract the name of the run save_all = None mod = model.EnergySpectrum([rms], language) # all functions for now model_name = 'rms_model' check_folder( os.path.join(Paths().path2derivatives, 'fMRI/raw-features', language, model_name)) path = os.path.join( Paths().path2derivatives, 'fMRI/raw-features', language, model_name, 'raw-features_{}_{}_{}.csv'.format(language, model_name, run_name)) #### parameters studied #### parameters = sorted([rms]) #### generating raw-features #### if (os.path.exists(path)) & (not overwrite): raw_features = pd.read_csv(path) else: raw_features = mod.generate(run, language, textgrid, slice_period=10e-3) save_all = path #### Retrieving data of interest #### columns2retrieve = [ function.__name__ for function in model.EnergySpectrum(parameters, language).functions ] return raw_features[:textgrid.offsets.count()], columns2retrieve, save_all
def generate(mod, run, language, textgrid, overwrite=False): name = os.path.basename(os.path.splitext(run)[0]) run_name = name.split('_')[-1] # extract the name of the run save_all = None model_name = 'bottomup_model' check_folder(os.path.join(Paths().path2derivatives, 'fMRI/raw-features', language, model_name)) path = os.path.join(Paths().path2derivatives, 'fMRI/raw-features', language, model_name, 'raw-features_{}_{}_{}.csv'.format(language, model_name, run_name)) #### generating raw-features #### if (os.path.exists(path)) & (not overwrite): raw_features = pd.read_csv(path) else: raw_features = mod.generate(run, language, textgrid) save_all = path #### Retrieving data of interest #### columns2retrieve = ['bottomup'] textgrid = pd.read_csv(os.path.join(paths.path2data, 'text', language, 'BOTTOMUP', 'onsets-offsets', '{}_{}_{}_onsets-offsets_{}'.format('text', language, 'BOTTOMUP', run_name)+'.csv')) # df with onsets-offsets-word return raw_features[:textgrid.offsets.count()], columns2retrieve, save_all
def generate(mod, run, language, textgrid, overwrite=False): name = os.path.basename(os.path.splitext(run)[0]) run_name = name.split('_')[-1] # extract the name of the run save_all = None model_name = 'mfcc_model' check_folder(os.path.join(Paths().path2derivatives, 'fMRI/raw-features', language, model_name)) path = os.path.join(Paths().path2derivatives, 'fMRI/raw-features', language, model_name, 'raw-features_{}_{}_{}.csv'.format(language, model_name, run_name)) #### generating raw-features #### if (os.path.exists(path)) & (not overwrite): raw_features = pd.read_csv(path) else: raw_features = mod.generate(run, language) save_all = path #### Retrieving data of interest #### columns2retrieve = ["mfcc #{}".format((i)//3) if i%3==0 else ("mfcc' #{}".format((i)//3) if i%3==1 else "mfcc'' #{}".format((i)//3)) for i in range(mod.num_cepstral*3)] textgrid = pd.read_csv(os.path.join(paths.path2data, 'wave', language, 'MFCC', 'onsets-offsets', '{}_{}_{}_onsets-offsets_{}'.format('wave', language, 'MFCC', run_name)+'.csv')) # df with onsets-offsets-word return raw_features[:textgrid.offsets.count()], columns2retrieve, save_all
def generate(mod, run, language, textgrid, overwrite=False): from .OTHER import model from .OTHER.utils import sentence_onset name = os.path.basename(os.path.splitext(run)[0]) run_name = name.split('_')[-1] # extract the name of the run save_all = None mod = model.Other([sentence_onset], language) # all functions model_name = 'other_sentence_onset' check_folder( os.path.join(Paths().path2derivatives, 'fMRI/raw-features', language, model_name)) path = os.path.join( Paths().path2derivatives, 'fMRI/raw-features', language, model_name, 'raw-features_{}_{}_{}.csv'.format(language, model_name, run_name)) #### generating raw-features #### if (os.path.exists(path)) & (not overwrite): raw_features = pd.read_csv(path) else: raw_features = mod.generate(run, language, textgrid) save_all = path #### Retrieving data of interest #### columns2retrieve = [function.__name__ for function in mod.functions] return raw_features[:textgrid.offsets.count()], columns2retrieve, save_all
#plt.xlim(0,0.2) #plt.ylim(0,0.2) plt.plot([ max([np.min(x), np.min(y)]), min([np.max(x), np.max(y)]) ], [ max([np.min(x), np.min(y)]), min([np.max(x), np.max(y)]) ], c='blue') plt.axhline(y=0., color='blue', linestyle='-') plt.legend() save_folder = os.path.join(paths.path2derivatives, source, 'analysis', language, 'scatter_plots', analysis_name) check_folder(save_folder) plt.savefig( os.path.join( save_folder, analysis['title'] + ' - ' + labels[index_mask + 1] + ' - ' + subject + '.png')) plt.close() i += 1 ########################################################################## ############################## Check models ############################## ########################################################################## if 'check_model' in args.analysis[0]: # retrieve default atlas (= set of ROI) atlas = datasets.fetch_atlas_harvard_oxford(params.atlas) labels = atlas['labels']
# $ cd $LePetitPrince/code/ # $ python create_data_architecture.py ################################################################################ import os from utilities.settings import Subjects, Paths, Params from utilities.utils import check_folder subjects = Subjects() paths = Paths() params = Params() languages = params.languages ##################### ROOT ##################### os.chdir(paths.path2root) check_folder('data') check_folder('derivatives') check_folder('paradigm') check_folder('oldstuff') ##################### DATA ##################### os.chdir(paths.path2data) ## fMRI ## check_folder('fMRI') for language in languages: check_folder('fMRI/{}'.format(language)) for subject in subjects.get_all(language): check_folder('fMRI/{}/{}'.format(language, subject)) check_folder('fMRI/{}/{}/anat'.format(language, subject)) check_folder('fMRI/{}/{}/func'.format(language, subject))
default=False, action='store_true', help="Precise if we want to run code in parallel") args = parser.parse_args() input_data_type = 'raw-features' output_data_type = 'features' extension = '.csv' source = 'fMRI' model = args.model output_parent_folder = get_output_parent_folder(source, output_data_type, args.language, model) check_folder( output_parent_folder ) # check if the output_parent_folder exists and create it if not raw_features = get_data(args.language, input_data_type, model=model, source='fMRI') if not args.parallel: for i, run in enumerate(raw_features): compute_features(run, output_parent_folder, output_data_type, args.language, model, extension, args.tr, args.overwrite) else: Parallel(n_jobs=-2)(delayed(compute_features) \ (run, output_parent_folder, output_data_type, args.language, model, extension, args.tr, args.overwrite) for run in raw_features)