Beispiel #1
0
def genf() :

  db = pg.PostgreDB()

  sql_s   = "select tkr,csvh from tkrprices order by tkr"
  sql_sql = sql.text(sql_s)
  result  = db.query(sql_sql)

  if not result.rowcount:
    sys.exit(1)

  for row in result:

    print(row.tkr)

    # converting DataFrame as it is
    feat_df = pd.read_csv(io.StringIO(row.csvh), names=('cdate','cp'))

    # But first, I should calculate the dependent variable:
    feat_df['pct_lead'] = 100.0*((feat_df.cp.shift(-1) - feat_df.cp) / feat_df.cp).fillna(0)

    # Now, I should get 'lag' features from price:
    feat_df['pct_lag1'] = 100.0*((feat_df.cp - feat_df.cp.shift(1))/feat_df.cp.shift(1)).fillna(0)
    feat_df['pct_lag2'] = 100.0*((feat_df.cp - feat_df.cp.shift(2))/feat_df.cp.shift(2)).fillna(0)
    feat_df['pct_lag4'] = 100.0*((feat_df.cp - feat_df.cp.shift(4))/feat_df.cp.shift(4)).fillna(0)
    feat_df['pct_lag8'] = 100.0*((feat_df.cp - feat_df.cp.shift(8))/feat_df.cp.shift(8)).fillna(0)

    # Now, I should calculate 'slope' features from price:
    for slope_i in [3,4,5,6,7,8,9]:
      rollx          = feat_df.rolling(window=slope_i)
      col_s          = 'slope'+str(slope_i)
      slope_sr       = 100.0 * (rollx.mean().cp - rollx.mean().cp.shift(1))/rollx.mean().cp
      feat_df[col_s] = slope_sr

    # Now, I should calculate 'date' features from cdate:
    dt_sr = pd.to_datetime(feat_df.cdate)

    # normalize
    dow_l = [float(dt.strftime('%w' ))/100.0 for dt in dt_sr]
    moy_l = [float(dt.strftime('%m'))/100.0 for dt in dt_sr]

    #dom_l = [float(dt.strftime('%d'))       for dt in dt_sr] # maybe use later
    #wom_l = [round(dom/5)/100.0             for dom in dom_l] # maybe use later

    feat_df['dow'] = dow_l #day
    feat_df['moy'] = moy_l #month
    feat_df.to_csv('/tmp/tmp.csv',index=False, float_format="%.3f")

    csv0_s = feat_df.to_csv(index=False,header=True,float_format='%.3f')
    csv_s  = "'"+csv0_s+"'"
    tkr_s  = "'"+row.tkr+"'"

    sql_s  = "insert into features(tkr,csv)values("+tkr_s+","+csv_s+")"
    db.query(sql_s)

    'bye'
Beispiel #2
0
def copy2db(tkrdir) :

    tkrhdir = tkrdir + '\\history'
    tkrddir = tkrdir + '\\div'
    tkrsdir = tkrdir + '\\split'

    # gathering csv to import into database
    for csvf_s in sorted(glob.glob(tkrhdir+'/*.csv')):

      sz_i = os.path.getsize(csvf_s)
      #print(csvf_s, sz_i)

      if (sz_i > 140):
        tkr0_s = csvf_s.split('\\')[-1].split('.')[0] # get ticker name

        try :
            csv_df = pd.read_csv(csvf_s)

        except :
            print(csvf_s + ' failed to read as a csv file')


        # I should convert to String and pick only two columns:
        csv0_s = csv_df.to_csv(index=False,header=False,columns=('Date','Close'),float_format='%.3f')
        csvh_s  = "'"+csv0_s+"'"
        tkr_s   = "'"+tkr0_s+"'"

        csvfd_s = tkrddir+'\\'+tkr0_s+'.csv'
        csvfs_s = tkrsdir+'\\'+tkr0_s+'.csv'

        try :
            csvd0_s = pd.read_csv(csvfd_s).sort_values('Date').to_csv(index=False,header=False)
        except:
            print(csvf_s + ' failed to read as a csv file')
        try:
            csvs0_s = pd.read_csv(csvfs_s).sort_values('Date').to_csv(index=False, header=False)
        except:
            print(csvfs_s + ' failed to read as a csv file')

        csvd_s  = "'"+csvd0_s+"'"
        csvs_s  = "'"+csvs0_s+"'"

        db = pg.PostgreDB()

        sql_s   = "insert into tkrprices(tkr,csvd,csvh,csvs)values("+tkr_s+","+csvd_s+","+csvh_s+","+csvs_s+")"
        db.query(sql_s)

        # sql_check = "select tkr, csvh from tkrprices limit 1;"
        # result = db.execu(sql_check)

        'bye'
Beispiel #3
0
def load_predict_keraslinear(tkr='FB',
                             yrs=3,
                             mnth='2017-08',
                             features='pct_lag1,slope4,moy'):
    """This function should demo how to predict from a model in the db."""
    learn_predict_keraslinear(tkr, yrs, mnth,
                              features)  # Store a model in th db.

    db = pg.PostgreDB('postgres://*****:*****@127.0.0.1/tkrapi')

    sql_s = '''SELECT tkr,yrs,mnth,features,algo,algo_params, kmodel_h5
    FROM predictions
    WHERE tkr      = %s 
    AND   yrs      = %s
    AND   mnth     = %s
    AND   features = %s
    LIMIT 1'''

    result = db.execute_select_sql(sql_s, [tkr, yrs, mnth, features])

    if not result.rowcount:
        return ['no result']  # Probably, a problem.

    myrow = [row for row in result][0]
    kmodel_h5 = (bytes(myrow.kmodel_h5))

    with open('/tmp/kmodel2.h5', 'wb') as fh:
        fh.write(kmodel_h5)
    kmodel = keras.models.load_model('/tmp/kmodel2.h5')

    xtrain_a, ytrain_a, xtest_a, out_df = pgdb.get_train_and_test_data(
        tkr, yrs, mnth, features)
    if ((xtrain_a.size == 0) or (ytrain_a.size == 0) or (xtest_a.size == 0)):
        return out_df  # probably empty too.
    # Start using Keras here.
    # I should predict xtest_a then update out_df
    predictions_a = np.round(kmodel.predict(xtest_a), 3)
    # Done with Keras, I should pass along the predictions.
    predictions_l = [p_f[0] for p_f in predictions_a]  # I want a list
    out_df['prediction'] = predictions_l
    out_df['effectiveness'] = np.sign(
        out_df.pct_lead * out_df.prediction) * np.abs(out_df.pct_lead)
    out_df['accuracy'] = (1 + np.sign(out_df.effectiveness)) / 2
    algo = 'keraslinear'

    return out_df
Beispiel #4
0
    logging.debug('root project path : %s', config.APP_CONFIG['HOME'])

    mode = os.environ.get('mode', None)

    if not mode:
       raise ValueError('You must have "mode" variable')

    logging.debug('mode : %s', mode)
    if mode == 'init-environment' :
        logging.debug('initialize environment ')

        import core.persist.pg as pg

        # setup db connection
        db = pg.PostgreDB()

        # drop & create table
        db.script_execution(config.APP_CONFIG['HOME']+config.DATABASE_CONFIG['init-script'])

    elif mode == 'crawl' :
        print('crawl')
        ticker = os.environ.get('ticker', None)
        if ticker is None :
            crawl()
        else:
            #crawl('BAC')
            #crawl('DIA')
            crawl(ticker)

    elif mode == 'import-to-db' :