class Task3(d6tflow.tasks.TaskCache): multiplier = d6tflow.IntParameter(default=2) def run(self): meta = self.metaLoad()['upstream1'] print(meta) print(meta['columns']) self.metaSave({'columns':100})
class Task3(d6tflow.tasks.TaskCache): multiplier = d6tflow.IntParameter(default=2) def run(self): df1 = self.input()['input1'].load() # quickly load input data df2 = self.input()['input2'].load() # quickly load input data df = df1.join(df2, lsuffix='1', rsuffix='2') df['b'] = df['a1'] * self.multiplier # use task parameter self.save(df)
class Task3(d6tflow.tasks.TaskCache): multiplier = d6tflow.IntParameter() def run(self): df1 = self.input()[0]['a1'].load() df2 = self.input()[1].load() assert df2.equals(df1) df = df1.join(df2, lsuffix='1', rsuffix='2') df['b'] = df['a1'] * self.multiplier # use task parameter self.save(df)
class TradingSignals(d6tflow.tasks.TaskPqPandas): lookback_period = d6tflow.IntParameter() def run(self): df_gdp = self.inputLoad() # load input data # generate l/s trading signals df_signal = (df_gdp['CPGDPAI'].diff(self.lookback_period)>0) df_signal = df_signal.to_frame(name='position') df_signal['position'] = np.where(df_signal['position'],1,-1) self.save(df_signal)
class ModelTrainLGBM(d6tflow.tasks.TaskPickle): max_depth = d6tflow.IntParameter(default=2) learning_rate = d6tflow.FloatParameter(default=0.1) def run(self): df_trainX, df_trainY = self.inputLoad() model = lightgbm.LGBMRegressor(max_depth=self.max_depth, learning_rate=self.learning_rate) model.fit(df_trainX, df_trainY[cfg_col_Y]) self.save(model)
class Task3(d6tflow.tasks.TaskCache): multiplier = d6tflow.IntParameter(default=2) def run(self): meta = self.metaLoad()['upstream1'] print(meta) print(meta['columns']) df1 = self.input()['upstream1'].load() # quickly load input data df2 = self.input()['upstream2'].load() # quickly load input data df = df1.join(df2, lsuffix='1', rsuffix='2') df['b'] = df['a1']*self.multiplier # use task parameter self.save(df) self.metaSave({'columns': 100})
def test_functional_Flow(): import d6tflow import pandas as pd from d6tflow.functional import Flow flow = Flow() @flow.step(d6tflow.tasks.TaskCache) @flow.persists(['a1', 'a2']) def get_data0(task): df = pd.DataFrame({'a': range(3)}) task.save({'a1': df, 'a2': df}) @flow.step(d6tflow.tasks.TaskCache) @flow.persists(['a1', 'a2']) def get_data1(task): df = pd.DataFrame({'a': range(3)}) task.save({'a1': df, 'a2': df}) @flow.step(d6tflow.tasks.TaskCache) @flow.requires(get_data0) def get_data2(task): df0 = task.inputLoad(as_dict=True) df = pd.DataFrame({'a': range(3)}) task.save({'b1': df, 'b2': df0}) @flow.step(d6tflow.tasks.TaskCache) @flow.requires({"a": get_data1, "b": get_data2}) @flow.persists(['aa']) def use_data(task): df0 = task.inputLoad(as_dict=True) df = pd.DataFrame({'a': range(3)}) assert df0["a"]["a1"].equals(df) and df0["a"]["a2"].equals(df) assert df0["b"]["b1"].equals(df) and df0["b"]["b2"]["a1"].equals(df) assert task.multiplier == 42 output = pd.DataFrame({'a': range(4)}) task.save({'aa': output}) flow.add_params({'multiplier': d6tflow.IntParameter(default=0)}) flow.run([use_data, get_data0], forced_all_upstream=True, confirm=False, params={'multiplier': 42}) flow.run(use_data, forced_all_upstream=True, confirm=False, params={'multiplier': 42}) dfo = pd.DataFrame({'a': range(4)}) assert flow.outputLoad(use_data, params={'multiplier': 42})[0].equals(dfo)
@flow.task(d6tflow.tasks.TaskPqPandas) def get_data1(task): df = pd.DataFrame({'a': range(3)}) task.save(df) @flow.task(d6tflow.tasks.TaskCache) @flow.persists(['b']) def get_data2(task): df = pd.DataFrame({'b': range(3)}) task.save({'b': df}) @flow.task(d6tflow.tasks.TaskPqPandas) @flow.requires({"input1": get_data1, "input2": get_data2}) @flow.params(multiplier=d6tflow.IntParameter(default=0)) def usedata(task): data = task.inputLoad() df1 = data['input1'] df2 = data['input2'] df3 = df1.join(df2, lsuffix='1', rsuffix='2') df3['b'] = df3['a'] * task.multiplier # use task parameter task.save(df3) flow.add_global_params(multiplier=d6tflow.IntParameter(default=0)) use_params = {'exp1': {'multiplier': 40}, 'exp2': {'multiplier': 42}} flow.preview(usedata, params=use_params) flow.run(usedata,
class Task1(d6tflow.tasks.TaskCache): param = d6tflow.IntParameter(significant=False) def run(self): self.save({1: 1})
class Task1(d6tflow.tasks.TaskCache): param1 = d6tflow.IntParameter(default=0) def run(self): self.save({'hello': self.param1})