def genf() : db = pg.PostgreDB() sql_s = "select tkr,csvh from tkrprices order by tkr" sql_sql = sql.text(sql_s) result = db.query(sql_sql) if not result.rowcount: sys.exit(1) for row in result: print(row.tkr) # converting DataFrame as it is feat_df = pd.read_csv(io.StringIO(row.csvh), names=('cdate','cp')) # But first, I should calculate the dependent variable: feat_df['pct_lead'] = 100.0*((feat_df.cp.shift(-1) - feat_df.cp) / feat_df.cp).fillna(0) # Now, I should get 'lag' features from price: feat_df['pct_lag1'] = 100.0*((feat_df.cp - feat_df.cp.shift(1))/feat_df.cp.shift(1)).fillna(0) feat_df['pct_lag2'] = 100.0*((feat_df.cp - feat_df.cp.shift(2))/feat_df.cp.shift(2)).fillna(0) feat_df['pct_lag4'] = 100.0*((feat_df.cp - feat_df.cp.shift(4))/feat_df.cp.shift(4)).fillna(0) feat_df['pct_lag8'] = 100.0*((feat_df.cp - feat_df.cp.shift(8))/feat_df.cp.shift(8)).fillna(0) # Now, I should calculate 'slope' features from price: for slope_i in [3,4,5,6,7,8,9]: rollx = feat_df.rolling(window=slope_i) col_s = 'slope'+str(slope_i) slope_sr = 100.0 * (rollx.mean().cp - rollx.mean().cp.shift(1))/rollx.mean().cp feat_df[col_s] = slope_sr # Now, I should calculate 'date' features from cdate: dt_sr = pd.to_datetime(feat_df.cdate) # normalize dow_l = [float(dt.strftime('%w' ))/100.0 for dt in dt_sr] moy_l = [float(dt.strftime('%m'))/100.0 for dt in dt_sr] #dom_l = [float(dt.strftime('%d')) for dt in dt_sr] # maybe use later #wom_l = [round(dom/5)/100.0 for dom in dom_l] # maybe use later feat_df['dow'] = dow_l #day feat_df['moy'] = moy_l #month feat_df.to_csv('/tmp/tmp.csv',index=False, float_format="%.3f") csv0_s = feat_df.to_csv(index=False,header=True,float_format='%.3f') csv_s = "'"+csv0_s+"'" tkr_s = "'"+row.tkr+"'" sql_s = "insert into features(tkr,csv)values("+tkr_s+","+csv_s+")" db.query(sql_s) 'bye'
def copy2db(tkrdir) : tkrhdir = tkrdir + '\\history' tkrddir = tkrdir + '\\div' tkrsdir = tkrdir + '\\split' # gathering csv to import into database for csvf_s in sorted(glob.glob(tkrhdir+'/*.csv')): sz_i = os.path.getsize(csvf_s) #print(csvf_s, sz_i) if (sz_i > 140): tkr0_s = csvf_s.split('\\')[-1].split('.')[0] # get ticker name try : csv_df = pd.read_csv(csvf_s) except : print(csvf_s + ' failed to read as a csv file') # I should convert to String and pick only two columns: csv0_s = csv_df.to_csv(index=False,header=False,columns=('Date','Close'),float_format='%.3f') csvh_s = "'"+csv0_s+"'" tkr_s = "'"+tkr0_s+"'" csvfd_s = tkrddir+'\\'+tkr0_s+'.csv' csvfs_s = tkrsdir+'\\'+tkr0_s+'.csv' try : csvd0_s = pd.read_csv(csvfd_s).sort_values('Date').to_csv(index=False,header=False) except: print(csvf_s + ' failed to read as a csv file') try: csvs0_s = pd.read_csv(csvfs_s).sort_values('Date').to_csv(index=False, header=False) except: print(csvfs_s + ' failed to read as a csv file') csvd_s = "'"+csvd0_s+"'" csvs_s = "'"+csvs0_s+"'" db = pg.PostgreDB() sql_s = "insert into tkrprices(tkr,csvd,csvh,csvs)values("+tkr_s+","+csvd_s+","+csvh_s+","+csvs_s+")" db.query(sql_s) # sql_check = "select tkr, csvh from tkrprices limit 1;" # result = db.execu(sql_check) 'bye'
def load_predict_keraslinear(tkr='FB', yrs=3, mnth='2017-08', features='pct_lag1,slope4,moy'): """This function should demo how to predict from a model in the db.""" learn_predict_keraslinear(tkr, yrs, mnth, features) # Store a model in th db. db = pg.PostgreDB('postgres://*****:*****@127.0.0.1/tkrapi') sql_s = '''SELECT tkr,yrs,mnth,features,algo,algo_params, kmodel_h5 FROM predictions WHERE tkr = %s AND yrs = %s AND mnth = %s AND features = %s LIMIT 1''' result = db.execute_select_sql(sql_s, [tkr, yrs, mnth, features]) if not result.rowcount: return ['no result'] # Probably, a problem. myrow = [row for row in result][0] kmodel_h5 = (bytes(myrow.kmodel_h5)) with open('/tmp/kmodel2.h5', 'wb') as fh: fh.write(kmodel_h5) kmodel = keras.models.load_model('/tmp/kmodel2.h5') xtrain_a, ytrain_a, xtest_a, out_df = pgdb.get_train_and_test_data( tkr, yrs, mnth, features) if ((xtrain_a.size == 0) or (ytrain_a.size == 0) or (xtest_a.size == 0)): return out_df # probably empty too. # Start using Keras here. # I should predict xtest_a then update out_df predictions_a = np.round(kmodel.predict(xtest_a), 3) # Done with Keras, I should pass along the predictions. predictions_l = [p_f[0] for p_f in predictions_a] # I want a list out_df['prediction'] = predictions_l out_df['effectiveness'] = np.sign( out_df.pct_lead * out_df.prediction) * np.abs(out_df.pct_lead) out_df['accuracy'] = (1 + np.sign(out_df.effectiveness)) / 2 algo = 'keraslinear' return out_df
logging.debug('root project path : %s', config.APP_CONFIG['HOME']) mode = os.environ.get('mode', None) if not mode: raise ValueError('You must have "mode" variable') logging.debug('mode : %s', mode) if mode == 'init-environment' : logging.debug('initialize environment ') import core.persist.pg as pg # setup db connection db = pg.PostgreDB() # drop & create table db.script_execution(config.APP_CONFIG['HOME']+config.DATABASE_CONFIG['init-script']) elif mode == 'crawl' : print('crawl') ticker = os.environ.get('ticker', None) if ticker is None : crawl() else: #crawl('BAC') #crawl('DIA') crawl(ticker) elif mode == 'import-to-db' :