コード例 #1
0
ファイル: explore.py プロジェクト: pombredanne/drain
def read_data(row, basedir, transform=True):
    params = {'data': row['params']['data']}
    datadir = os.path.join(params_dir(basedir, params, 'data'), 'output')
    row['data'].read(datadir)

    if transform:
        row['data'].transform(**row['params']['transform'])
コード例 #2
0
ファイル: grid_search.py プロジェクト: pombredanne/drain
def grid_search(params, outputdir, drakefile, drakein=None, tag=None, python_args=None, overwrite_tag=False, preview=False):
    data = list_dict_product(params['data'])
    transforms = list_dict_product(params['transforms'])
    models = list_dict_product(params['models'])
    metrics = list_dict_product(params['metrics'])
    
    if drakein is not None:
        dirname, basename = os.path.split(os.path.abspath(drakein))
        drakefile.write("BASE={}\n".format(dirname))
        drakefile.write("%include $[BASE]/{}\n".format(basename))
    
    #TODO include a project specific Drakefile via cmd arg
    bindir = os.path.abspath(os.path.dirname(sys.argv[0]))
    drakefile.write("""
PYTHONUNBUFFERED=Y\n
data()
    python {python_args} {bindir}/read_write_data.py $INPUT $OUTPUT
model()
    python {python_args} {bindir}/run_model.py $INPUT $OUTPUT $INPUT1 \n
""".format(bindir=bindir, python_args=python_args))
    
    # data steps
    for d in data:
        p = {'data': d}
        drakefile.write(drake_step(outputdir, p, 'data', preview=preview))
    
    if tag is not None:
        tagdir = os.path.join(outputdir, 'tag', tag)
        if overwrite_tag and not preview:
            shutil.rmtree(tagdir)
        if not os.path.exists(tagdir) and not preview:
            os.makedirs(tagdir)
            

    # model steps
    i = 0
    for d,t,m in itertools.product(data,transforms,models):
        i = i + 1
        p = {'data': d, 'transform':t, 'model':m, 'metrics':metrics}
        d = {'data': d}
        datadir = os.path.join(params_dir(outputdir, d, 'data'), 'output/') # use data dir for drake dependency
        tagdir = os.path.join(outputdir, 'tag', tag, util.hash_yaml_dict(p)) if tag is not None else None
    
        drakefile.write(drake_step(outputdir, p, 'model', inputs=[datadir], tagdir=tagdir, preview=preview))
コード例 #3
0
ファイル: grid_search.py プロジェクト: pombredanne/drain
def drake_step(basedir, params, method, inputs=[], tagdir=None, preview=False):
    d = params_dir(basedir, params, method)

    if not os.path.exists(d) and not preview:
        os.makedirs(d)
    
    dirname = os.path.join(d, 'output/')
    params_file = os.path.join(d, 'params.yaml')

    if params_new(params, params_file) and not preview:
        with open(params_file, 'w') as f:
            yaml.dump(params, f)

    if tagdir is not None and not os.path.exists(tagdir) and not preview:
        os.symlink(d, tagdir)

    cls = util.get_attr(params[method]['name'])
    if hasattr(cls, 'DEPENDENCIES'):
        inputs = inputs + cls.DEPENDENCIES

    inputs = ', !' + str.join(', !', inputs) if len(inputs) > 0 else ''

    return '!'+dirname + ' <- ' + '!'+params_file + inputs + ' [method:' + method + ']\n\n'
コード例 #4
0
ファイル: explore.py プロジェクト: pombredanne/drain
def read_estimator(row, basedir):
    modeldir = os.path.join(params_dir(basedir, row['params'], 'model'), 'output')
    row['estimator'] = joblib.load(os.path.join(modeldir, 'estimator.pkl'))