def task_features(): """Step 2: Generate features (=fMRI regressors) from raw_features (csv file with 3 columns onset-amplitude-duration) by convolution with an hrf kernel.""" input_data_type = 'raw-features' output_data_type = 'features' extension = '.csv' source = 'fMRI' for language in languages: for model in models: output_parent_folder = get_output_parent_folder( source, output_data_type, language, model) input_parent_folder = get_output_parent_folder( source, input_data_type, language, model) dependencies = [ get_path2output(input_parent_folder, input_data_type, language, model, run_name, extension) for run_name in run_names ] targets = [ get_path2output(output_parent_folder, output_data_type, language, model, run_name, extension) for run_name in run_names ] yield { 'name': model, 'file_dep': ['features.py'] + dependencies, 'targets': targets, 'actions': [ 'python features.py --tr {} --language {} --model {} '. format(tr, language, model) + optional + optional_parallel ], }
def task_design_matrices(): """Step 3: Generate design matrices from features in a given language.""" input_data_type = 'features' output_data_type = 'design-matrices' extension = '.csv' source = 'fMRI' for language in languages: for models in aggregated_models: output_parent_folder = get_output_parent_folder( source, output_data_type, language, models) dependencies = [] for model in models.split('+'): input_parent_folder = get_output_parent_folder( source, input_data_type, language, model) dependencies += [ get_path2output(input_parent_folder, input_data_type, language, model, run_name, extension) for run_name in run_names ] targets = [ get_path2output(output_parent_folder, output_data_type, language, models, run_name, extension) for run_name in run_names ] yield { 'name': models, 'file_dep': ['design-matrices.py'] + dependencies, 'targets': targets, 'actions': [ 'python design-matrices.py --language {} --models {} '. format(language, ' '.join(models.split('+'))) + optional ], }
def task_raw_features(): """Step 1: Generate raw features from raw data (text, wave) model predictions.""" extension = '.csv' output_data_type = 'raw-features' source = 'fMRI' for language in languages: for model in models: if model.split('_')[0] in ['rms', 'f0', 'mfcc']: input_data_type = 'wave' extension_input = '.wav' else: input_data_type = 'text' extension_input = '.txt' input_parent_folder = join( paths.path2data, '{0}/{1}/{2}'.format(input_data_type, language, params.get_category(model))) dependencies = [ join( input_parent_folder, '{0}_{1}_{2}_{3}'.format( input_data_type, language, params.get_category(model), run_name) + extension_input) for run_name in run_names ] output_parent_folder = get_output_parent_folder( source, output_data_type, language, model) targets = [ get_path2output(output_parent_folder, output_data_type, language, model, run_name, extension) for run_name in run_names ] yield { 'name': model, 'file_dep': ['raw_features.py'] + dependencies, 'targets': targets, 'actions': [ 'python raw_features.py --language {} --model_name {} --model_category {} ' .format(language, model, params.get_category(model)) + optional + optional_parallel ], }
action='store_true', help="Precise if we overwrite existing files") parser.add_argument("--parallel", default=False, action='store_true', help="Precise if we want to run code in parallel") args = parser.parse_args() input_data_type = 'raw-features' output_data_type = 'features' extension = '.csv' source = 'fMRI' model = args.model output_parent_folder = get_output_parent_folder(source, output_data_type, args.language, model) check_folder( output_parent_folder ) # check if the output_parent_folder exists and create it if not raw_features = get_data(args.language, input_data_type, model=model, source='fMRI') if not args.parallel: for i, run in enumerate(raw_features): compute_features(run, output_parent_folder, output_data_type, args.language, model, extension, args.tr, args.overwrite) else: