def task_data(): """ runs python functions in src.data creating data files """ functions = collect_functions('src.data') outpath = r"data/processed" for func_name, func, src_file in functions: outfiles = target_filenames(outpath, func, suffix='_') yield { 'name': func_name, 'doc': func.__doc__, 'actions': [func], 'targets': outfiles, 'file_dep': [src_file], 'clean': True }
def task_figures(): """ runs python functions in the src.visualization directory (input arguments are data\processed\arg.pkl, outfile to reports\figures\func_name.png) """ figures = collect_functions('src.visualization', exclude_module=['utils']) outpath = r'reports/figures' for name, func, src in figures: outfiles = target_filenames(outpath, func, suffix='png') yield { 'name': name, 'actions': [func], 'doc': func.__doc__, 'targets': outfiles, 'file_dep': [src], 'clean': True }
def task_features(): """ runs python functions in the src.features directory (input arguments are data/processed/arg.pkl, outfile to reports/func_name) """ functions = collect_functions('src.features') outpath = r'data/processed' for name, func, src in functions: outfiles = target_filenames(outpath, func, suffix='_') yield { 'name': name, 'actions': [func], 'doc': func.__doc__, 'targets': outfiles, 'file_dep': [src], 'clean': True }
def task_models(): """ preprocess data, e.g. resample the raw data in raw.h5 """ functions = collect_functions('src.models', exclude_module='pure_tensorflow') outpath = r'models' for name, func, src in functions: outfiles = target_filenames(outpath, func, suffix='_') yield { 'name': name, 'actions': [func], 'doc': func.__doc__, 'targets': outfiles, 'file_dep': [src], 'clean': True }