class Task_getPlot(TaskPickle): runDate = d6tflow.DateParameter() plottype = d6tflow.Parameter() def run(self): tickers, Data = self.input() tickers, Data = tickers.load().set_index('Ticker symbol'), Data.load() status = Data['status'] if status == 'failure': self.save({'status': status, 'plot': None}) return None company_timeseries = pd.read_json(Data['dataframe']) performance_data = company_timeseries.set_index('Date') # status, company_timeseries = Data['status'], Data['company_timeseries'] # performance_data = pd.read_sql( # company_timeseries.statement, # company_timeseries.session.bind).set_index('Date') # performance_data = self.input().load().set_index('Date') myplot = getPlot( plottype=self.plottype, performance_data=performance_data, status=status, ticker=tickers.at[self.stockticker, 'Company'], ) self.save(myplot)
class Task_getDates(TaskJson): runDate = d6tflow.DateParameter() type = d6tflow.Parameter() def run(self): input = self.input().load() status, timeseries = input['status'], input['timeseries'] if status == 'failure': self.save({'status': status, 'dates': None}) return None timeseries = pd.read_json(timeseries) options_date_map = sorted(timeseries['date'].unique(), reverse=True) options_date_map = pd.DataFrame({'date':options_date_map}) options_date_map['date'] = options_date_map['date'].apply(lambda x: x.strftime('%Y-%m-%d')) out = { 'timeseries': options_date_map.to_json(), 'status': 'success' } self.save(out)
class Task_getData(TaskJson): runDate = d6tflow.DateParameter() type = d6tflow.Parameter() def run(self): timeseries = getTimeseries_T(self.type) self.save(timeseries)
class Task_getData(TaskJson): runDate = d6tflow.DateParameter() stockticker = d6tflow.Parameter() def run(self): # tickers = self.input().load() #assert self.stockticker in tickers.keys(), 'Symbol should be in ticker list' out = getData(stockticker=self.stockticker) self.save(out)
class TaskTrain(d6tflow.tasks.TaskPickle): # save output as pickle model = d6tflow.Parameter(default='ols') # parameter for model selection def run(self): df_train = self.input().load() if self.model == 'ols': model = sklearn.linear_model.LogisticRegression() elif self.model == 'svm': model = sklearn.svm.SVC() else: raise ValueError('invalid model selection') model.fit(df_train.drop('y', 1), df_train['y']) self.save(model)
class ModelTrain(d6tflow.tasks.TaskPickle): # save output as pickle model = d6tflow.Parameter(default='ols') # parameter for model selection def run(self): df_train = self.input().load() if self.model == 'ols': model = sklearn.linear_model.LinearRegression() elif self.model == 'gbm': model = sklearn.ensemble.GradientBoostingRegressor() else: raise ValueError('invalid model selection') model.fit(df_train.drop('y', 1), df_train['y']) self.save(model) self.saveMeta( {'score': model.score(df_train.drop('y', 1), df_train['y'])})
class ModelTrain(d6tflow.tasks.TaskPickle): model = d6tflow.Parameter() # parameter for model selection def run(self): df_trainX, df_trainY = self.inputLoad() # quickly load input data if self.model == 'ols': # select model based on parameter model = sklearn.linear_model.LinearRegression() elif self.model == 'gbm': model = sklearn.ensemble.GradientBoostingRegressor() # fit and save model with training score model.fit(df_trainX, df_trainY) self.save(model) # persist/cache model self.saveMeta({'score': model.score(df_trainX, df_trainY)}) # save model score