class GetDataEcon(d6tflow.tasks.TaskPqPandas): date_start = d6tflow.DateParameter() # define parameters date_end = d6tflow.DateParameter() # define parameters def run(self): df_gdp = pddr.DataReader('CPGDPAI', 'fred', self.date_start, self.date_end) self.save(df_gdp)
class TaskGetData( d6tflow.tasks.TaskPqPandas ): # save dataframe as parquet, see https://d6tflow.readthedocs.io/en/latest/targets.html dt_start = d6tflow.DateParameter( default=cfg.dt_start ) # workflow parameters. See https://d6tflow.readthedocs.io/en/latest/advparam.html dt_end = d6tflow.DateParameter(default=cfg.dt_end) def run(self): iris = sklearn.datasets.load_iris() df_train = pd.DataFrame( iris.data, columns=['feature{}'.format(i) for i in range(4)]) df_train['y'] = iris.target # optional: df_train[df_train['date']>=self.dt_start] self.save(df_train) # quickly save dataframe
class Task_getPlot(TaskPickle): runDate = d6tflow.DateParameter() plottype = d6tflow.Parameter() def run(self): tickers, Data = self.input() tickers, Data = tickers.load().set_index('Ticker symbol'), Data.load() status = Data['status'] if status == 'failure': self.save({'status': status, 'plot': None}) return None company_timeseries = pd.read_json(Data['dataframe']) performance_data = company_timeseries.set_index('Date') # status, company_timeseries = Data['status'], Data['company_timeseries'] # performance_data = pd.read_sql( # company_timeseries.statement, # company_timeseries.session.bind).set_index('Date') # performance_data = self.input().load().set_index('Date') myplot = getPlot( plottype=self.plottype, performance_data=performance_data, status=status, ticker=tickers.at[self.stockticker, 'Company'], ) self.save(myplot)
class Task_getDates(TaskJson): runDate = d6tflow.DateParameter() type = d6tflow.Parameter() def run(self): input = self.input().load() status, timeseries = input['status'], input['timeseries'] if status == 'failure': self.save({'status': status, 'dates': None}) return None timeseries = pd.read_json(timeseries) options_date_map = sorted(timeseries['date'].unique(), reverse=True) options_date_map = pd.DataFrame({'date':options_date_map}) options_date_map['date'] = options_date_map['date'].apply(lambda x: x.strftime('%Y-%m-%d')) out = { 'timeseries': options_date_map.to_json(), 'status': 'success' } self.save(out)
class Task_getTickers(TaskCSVPandas): runDate = d6tflow.DateParameter() def run(self): tickers = getTickers() self.save(tickers)
class Task_getData(TaskJson): runDate = d6tflow.DateParameter() type = d6tflow.Parameter() def run(self): timeseries = getTimeseries_T(self.type) self.save(timeseries)
class Task_getData(TaskJson): runDate = d6tflow.DateParameter() stockticker = d6tflow.Parameter() def run(self): # tickers = self.input().load() #assert self.stockticker in tickers.keys(), 'Symbol should be in ticker list' out = getData(stockticker=self.stockticker) self.save(out)
class Task_getCountries(TaskJson): runDate = d6tflow.DateParameter() def run(self): out = getCountries() self.save(out)