def params_dir(basedir, params, method): if method == 'model' and 'metrics' in params: params = deepcopy(params) params.pop('metrics') h = util.hash_yaml_dict(params) d = os.path.join(basedir, method, h + '/') return d
def grid_search(params, outputdir, drakefile, drakein=None, tag=None, python_args=None, overwrite_tag=False, preview=False): data = list_dict_product(params['data']) transforms = list_dict_product(params['transforms']) models = list_dict_product(params['models']) metrics = list_dict_product(params['metrics']) if drakein is not None: dirname, basename = os.path.split(os.path.abspath(drakein)) drakefile.write("BASE={}\n".format(dirname)) drakefile.write("%include $[BASE]/{}\n".format(basename)) #TODO include a project specific Drakefile via cmd arg bindir = os.path.abspath(os.path.dirname(sys.argv[0])) drakefile.write(""" PYTHONUNBUFFERED=Y\n data() python {python_args} {bindir}/read_write_data.py $INPUT $OUTPUT model() python {python_args} {bindir}/run_model.py $INPUT $OUTPUT $INPUT1 \n """.format(bindir=bindir, python_args=python_args)) # data steps for d in data: p = {'data': d} drakefile.write(drake_step(outputdir, p, 'data', preview=preview)) if tag is not None: tagdir = os.path.join(outputdir, 'tag', tag) if overwrite_tag and not preview: shutil.rmtree(tagdir) if not os.path.exists(tagdir) and not preview: os.makedirs(tagdir) # model steps i = 0 for d,t,m in itertools.product(data,transforms,models): i = i + 1 p = {'data': d, 'transform':t, 'model':m, 'metrics':metrics} d = {'data': d} datadir = os.path.join(params_dir(outputdir, d, 'data'), 'output/') # use data dir for drake dependency tagdir = os.path.join(outputdir, 'tag', tag, util.hash_yaml_dict(p)) if tag is not None else None drakefile.write(drake_step(outputdir, p, 'model', inputs=[datadir], tagdir=tagdir, preview=preview))