예제 #1
0
    def insert_hist_trade(self):
        self.set_data()
        db = Db()

        engine = db._get_engine()
        sql_stocklist = "select code,name from stock_code"
        codes = pd.read_sql_query(sql_stocklist, engine)
        codes = codes.to_dict('records')
        i = 1
        for row in codes:
            gta = td.get_hist_data(code=row['code'],
                                   start=self.nowdate,
                                   end=self.nowdate,
                                   ktype='D',
                                   retry_count=3,
                                   pause=0.001)

            gta['datain_date'] = self.nowtime
            gta['code'] = row['code']
            gta['name'] = row['name']
            gta['c_yearmonthday'] = gta.index

            gta = gta.to_dict('records')
            try:
                db.insertmany(
                    """INSERT INTO trade_hist(c_yearmonthday,code,name,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover,datain_date)
                VALUES (%(c_yearmonthday)s,%(code)s,%(name)s,%(open)s,%(high)s,%(close)s,%(low)s,%(volume)s,%(price_change)s,%(p_change)s,%(ma5)s,%(ma10)s,%(ma20)s,%(v_ma5)s,%(v_ma10)s,%(v_ma20)s,%(turnover)s,%(datain_date)s)""",
                    gta)
            except Exception, e:
                log.error('insert error:%s ', e)

            log.info('%s stock insert finished,%s,%s', i, row['code'],
                     row['name'].decode('utf-8'))
            i += 1
예제 #2
0
파일: stock.py 프로젝트: datablood/stock
    def insert_hist_trade(self):
        self.set_data()
        db = Db()

        engine = db._get_engine()
        sql_stocklist = "select code,name from stock_code"
        codes = pd.read_sql_query(sql_stocklist, engine)
        codes = codes.to_dict('records')
        i = 1
        for row in codes:
            gta = td.get_hist_data(code=row['code'],
                                   start=self.nowdate,
                                   end=self.nowdate,
                                   ktype='D',
                                   retry_count=3,
                                   pause=0.001)

            gta['datain_date'] = self.nowtime
            gta['code'] = row['code']
            gta['name'] = row['name']
            gta['c_yearmonthday'] = gta.index

            gta = gta.to_dict('records')
            try:
                db.insertmany(
                    """INSERT INTO trade_hist(c_yearmonthday,code,name,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover,datain_date)
                VALUES (%(c_yearmonthday)s,%(code)s,%(name)s,%(open)s,%(high)s,%(close)s,%(low)s,%(volume)s,%(price_change)s,%(p_change)s,%(ma5)s,%(ma10)s,%(ma20)s,%(v_ma5)s,%(v_ma10)s,%(v_ma20)s,%(turnover)s,%(datain_date)s)""",
                    gta)
            except Exception, e:
                log.error('insert error:%s ', e)

            log.info('%s stock insert finished,%s,%s', i, row['code'],
                     row['name'].decode('utf-8'))
            i += 1
예제 #3
0
파일: trade.py 프로젝트: datablood/stock
def get_predict_acc1(debug=False):
    db = Db()
    engine = db._get_engine()
    sql_tradehist = "select code,name,p_change from trade_hist where code in (select code from predict_head where c_yearmonthday in (select max(c_yearmonthday) from predict_head) ) order by c_yearmonthday desc"
    sql_predicthead = "select code,predict from predict_head order by c_yearmonthday desc"
    if debug:
        pass
    df_trade = pd.read_sql_query(sql_tradehist, engine).head(2)
    df_predict = pd.read_sql_query(sql_predicthead, engine).head(2)
    df = pd.merge(df_trade, df_predict, on='code')
    df['acc'] = (df.p_change > 0).astype(float)
    return df
예제 #4
0
파일: trade.py 프로젝트: datablood/stock
def get_hist_orgindata(debug=False):
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from trade_hist where code in (select code  from trade_hist  where high<>0.0 and low <>0.0 group by code having count(code)>100)"
    if debug:
        sql_stocklist += " and code in ('002717','601888','002405')"
    df = pd.read_sql_query(sql_stocklist, engine)
    codes = df['code'].unique()
    # 增加技术指标
    df = add_volatility(df)
    df = get_technique(df)
    return df, codes
예제 #5
0
def get_predict_acc1(debug=False):
    db = Db()
    engine = db._get_engine()
    sql_tradehist = "select code,name,p_change from trade_hist where code in (select code from predict_head where c_yearmonthday in (select max(c_yearmonthday) from predict_head) ) order by c_yearmonthday desc"
    sql_predicthead = "select code,predict from predict_head order by c_yearmonthday desc"
    if debug:
        pass
    df_trade = pd.read_sql_query(sql_tradehist, engine).head(2)
    df_predict = pd.read_sql_query(sql_predicthead, engine).head(2)
    df = pd.merge(df_trade, df_predict, on='code')
    df['acc'] = (df.p_change > 0).astype(float)
    return df
예제 #6
0
def get_hist_orgindata(debug=False):
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from trade_hist where code in (select code  from trade_hist  where high<>0.0 and low <>0.0 group by code having count(code)>100)"
    if debug:
        sql_stocklist += " and code in ('002717','601888','002405')"
    df = pd.read_sql_query(sql_stocklist, engine)
    codes = df['code'].unique()
    # 增加技术指标
    df = add_volatility(df)
    df = get_technique(df)
    return df, codes
예제 #7
0
파일: trade.py 프로젝트: datablood/stock
def get_predict(debug=False):
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select * from predicts where datain_date in(select  max(datain_date) from predicts) order by predict desc"
    if debug:
        pass
    df = pd.read_sql_query(sql_stocklist, engine)

    headpredict = df.head(2)
    psummery = df.describe().T
    psummery.columns = ['p_cnt', 'p_mean', 'p_std', 'p_min', 'p25', 'p50',
                        'p75', 'p_max']
    return psummery, headpredict
예제 #8
0
def get_predict(debug=False):
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select * from predicts where datain_date in(select  max(datain_date) from predicts) order by predict desc"
    if debug:
        pass
    df = pd.read_sql_query(sql_stocklist, engine)

    headpredict = df.head(2)
    psummery = df.describe().T
    psummery.columns = [
        'p_cnt', 'p_mean', 'p_std', 'p_min', 'p25', 'p50', 'p75', 'p_max'
    ]
    return psummery, headpredict
예제 #9
0
파일: trade.py 프로젝트: datablood/stock
def get_predict_acc2(debug=False):
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from acc1"
    if debug:
        pass
    df = pd.read_sql_query(sql_stocklist, engine)
    acc2 = df.sort_values('c_yearmonthday', ascending=0)
    acc2 = acc2.head(2)
    acc2 = acc2.groupby('c_yearmonthday').sum()

    acc2_final = pd.DataFrame()
    acc2_final['h_p_acc'] = [df['acc'].sum() / float(df['acc'].count())]
    acc2_final['h_p_change'] = [df['p_change'].sum() / 2.0]
    acc2_final['p_acc'] = [acc2['acc'].sum() / 2.0]
    acc2_final['p_change'] = [acc2['p_change'].sum() / 2.0]

    return acc2_final
예제 #10
0
def get_predict_acc2(debug=False):
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from acc1"
    if debug:
        pass
    df = pd.read_sql_query(sql_stocklist, engine)
    acc2 = df.sort_values('c_yearmonthday', ascending=0)
    acc2 = acc2.head(2)
    acc2 = acc2.groupby('c_yearmonthday').sum()

    acc2_final = pd.DataFrame()
    acc2_final['h_p_acc'] = [df['acc'].sum() / float(df['acc'].count())]
    acc2_final['h_p_change'] = [df['p_change'].sum() / 2.0]
    acc2_final['p_acc'] = [acc2['acc'].sum() / 2.0]
    acc2_final['p_change'] = [acc2['p_change'].sum() / 2.0]

    return acc2_final
예제 #11
0
파일: trade.py 프로젝트: datablood/stock
def get_histdata(split=0.15, seg_len=3, debug=False, datatype='cnn'):
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from trade_record where code in (select code  from trade_record  where high<>0.0 and low <>0.0 group by code having count(code)=(select count(distinct c_yearmonthday) from trade_record))"
    if debug:
        sql_stocklist += " and code in ('300138','002372')"
    df = pd.read_sql_query(sql_stocklist, engine)
    stockcodes = df['code'].unique()

    X_train = []
    X_valid = []
    Y_train = []
    Y_valid = []
    ID_train = []
    ID_valid = []
    log.info('begin generate train data and validate data.')
    begin_time = time.clock()
    k = 0
    for codes in stockcodes:
        temp_df = df[df.code == codes]

        tradedaylist = temp_df.copy(deep=True)['c_yearmonthday'].values
        tradedaylist.sort()
        tradedaylist = tradedaylist[::-1]
        if len(tradedaylist) < seg_len:
            log.info('not enough trade days ,code is :%s', codes)
            continue

        validdays = np.round(split * len(tradedaylist))

        i = 0
        for day in tradedaylist:
            i += 1
            segdays = tradedaylist[i:i + seg_len]
            if len(segdays) < seg_len:
                break
            SEG_X = []
            data = []
            for segday in segdays:
                data = temp_df[temp_df.c_yearmonthday == segday][
                    ['changepercent', 'trade', 'open', 'high', 'low',
                     'settlement', 'volume', 'turnoverratio', 'amount', 'per',
                     'pb', 'mktcap', 'nmc', 'deltat']]
                data = data.values
                SEG_X.append(data[0])
            # SEG_X=np.array(SEG_X).T
            if datatype == 'cnn':
                SEG_X = [SEG_X]
            data_tag = temp_df[temp_df.c_yearmonthday == day][
                ['code', 'name', 'changepercent']]
            temp_y = data_tag['changepercent'].values[0]
            temp_y = to_cate01(temp_y)
            temp_id = data_tag['code'].values[0]
            if (i > 0 and i <= validdays):
                X_valid.append(SEG_X)
                ID_valid.append(temp_id)
                Y_valid.append(temp_y)
            else:
                X_train.append(SEG_X)
                ID_train.append(temp_id)
                Y_train.append(temp_y)
        k += 1
        if k % 500 == 0:
            log.info('%s stock finished ', k)

    log.info('generate data finished ,cost time:%s', time.clock() - begin_time)
    log.info('X_train shape is :%s', np.asarray(X_train).shape)
    log.info('Y_train shape is :%s', np.asarray(Y_train).shape)
    log.info('X_valid shape is :%s', np.asarray(X_valid).shape)
    log.info('Y_valid shape is :%s', np.asarray(Y_valid).shape)

    # X_train=normalize(X_train)
    # X_valid=normalize(X_valid)

    if debug:
        print(np.asarray(X_train), np.asarray(Y_train),
              np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid),
                                      np.asarray(ID_valid))
        print(np.asarray(X_train[0][0][0]))
    pickle.dump(
        ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)),
         (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid))),
        open(datatype + '_seg' + str(seg_len) + '.pkl', 'wb'))
예제 #12
0
파일: trade.py 프로젝트: datablood/stock
def get_hist6years(split=0.2,
                   seg_len=3,
                   debug=False,
                   datatype='cnn',
                   datafile=None,
                   predict_days=18):
    log = logger.log
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from trade_hist where code in (select code  from trade_hist  where high<>0.0 and low <>0.0 group by code having count(code)>100)"
    if debug:
        sql_stocklist += " and code in ('002717','601888','002405')"
    df = pd.read_sql_query(sql_stocklist, engine)
    # 增加技术指标
    df = add_volatility(df)
    stockcodes = df['code'].unique()
    df = get_technique(df, stockcodes)

    X_train = []
    X_valid = []
    Y_train = []
    Y_valid = []
    ID_train = []
    ID_valid = []
    log.info('begin generate train data and validate data.')
    begin_time = time.clock()
    k = 0
    predict_days = predict_days
    for codes in stockcodes:
        temp_df = df[df.code == codes]
        temp_df1 = temp_df.copy(deep=True)
        temp_df1 = temp_df1.sort_values(by='c_yearmonthday', ascending=1)

        tradedaylist = temp_df1['c_yearmonthday'].values
        tradedaylist.sort()
        tradedaylist = tradedaylist[::-1]

        temp_df1 = temp_df1.set_index('c_yearmonthday')
        if len(tradedaylist) < seg_len:
            log.info('not enough trade days ,code is :%s', codes)
            continue

        validdays = np.round(split * len(tradedaylist))
        # validdays = 2

        i = 0
        for day in tradedaylist:
            i += 1
            segdays = tradedaylist[i + predict_days:i + predict_days + seg_len]
            segbegin = segdays[len(segdays) - 1]
            segend = segdays[0]
            if len(segdays) < seg_len:
                break
            data = []
            # for segday in segdays:
            data = temp_df1.loc[segbegin:segend, [
                'open', 'high', 'close', 'low', 'volume', 'price_change',
                'p_change', 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20',
                'turnover', 'deltat', 'BIAS_B', 'BIAS_S', 'BOLL_B', 'BOLL_S',
                'CCI_B', 'CCI_S', 'DMI_B', 'DMI_HL', 'DMI_IF1', 'DMI_IF2',
                'DMI_MAX1', 'DMI_S', 'KDJ_B', 'KDJ_S', 'KD_B', 'KD_S', 'MACD',
                'MACD_B', 'MACD_DEA', 'MACD_DIFF', 'MACD_EMA_12',
                'MACD_EMA_26', 'MACD_EMA_9', 'MACD_S', 'MA_B', 'MA_S', 'PSY_B',
                'PSY_MYPSY1', 'PSY_S', 'ROC_B', 'ROC_S', 'RSI_B', 'RSI_S',
                'VR_B', 'VR_IF1', 'VR_IF2', 'VR_IF3', 'VR_S', 'XYYH_B',
                'XYYH_B1', 'XYYH_B2', 'XYYH_B3', 'XYYH_CC', 'XYYH_DD'
            ]]
            data = data.values
            if datatype == 'cnn':
                data = [data]
            d1 = tradedaylist[i - 1]
            d3 = tradedaylist[i + predict_days - 1]
            data_tag = temp_df[temp_df.c_yearmonthday == d1][
                ['code', 'name', 'p_change', 'close']]
            data_tag3 = temp_df[temp_df.c_yearmonthday == d3][
                ['code', 'name', 'p_change', 'close']]
            temp_y = data_tag['close'].values[0]
            temp_y3 = data_tag3['close'].values[0]
            temp_y = (temp_y - temp_y3) / temp_y3
            temp_y = to_cate01(temp_y)
            temp_id = data_tag['code'].values[0]
            if (i > 0 and i <= validdays):
                X_valid.append(data)
                ID_valid.append(temp_id)
                Y_valid.append(temp_y)
            else:
                X_train.append(data)
                ID_train.append(temp_id)
                Y_train.append(temp_y)
        k += 1
        samples = 12
        if k % samples == 0:
            print k
            log.info('%s stock finished ', k)
            yield ((np.asarray(X_train), np.asarray(Y_train),
                    np.asarray(ID_train)),
                   (np.asarray(X_valid), np.asarray(Y_valid),
                    np.asarray(ID_valid)))
            X_train = []
            X_valid = []
            Y_train = []
            Y_valid = []
            ID_train = []
            ID_valid = []

    yield ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)),
           (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid)))
예제 #13
0
파일: trade.py 프로젝트: datablood/stock
def get_today(split=0.2,
              seg_len=3,
              debug=False,
              datatype='cnn',
              datafile=None):
    log = logger.log
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from trade_hist where code in (select code  from trade_hist  where high<>0.0 and low <>0.0 group by code having count(code)>100)"
    if debug:
        sql_stocklist += " and code in ('002717','601888','002405')"
    df = pd.read_sql_query(sql_stocklist, engine)
    df = add_volatility(df)
    stockcodes = df['code'].unique()
    df = get_technique(df)
    print stockcodes

    X_predict = []
    ID_predict = []
    NAME_predict = []
    log.info('begin generate train data and validate data.')
    k = 0
    for codes in stockcodes:
        temp_df = df[df.code == codes]
        temp_df1 = temp_df.copy(deep=True)
        temp_df1 = temp_df1.sort_values(by='c_yearmonthday', ascending=1)

        tradedaylist = temp_df1['c_yearmonthday'].values
        tradedaylist.sort()
        tradedaylist = tradedaylist[::-1]

        temp_df1 = temp_df1.set_index('c_yearmonthday')
        if len(tradedaylist) < seg_len:
            log.info('not enough trade days ,code is :%s', codes)
            continue

        i = 0
        segdays = tradedaylist[i:i + seg_len]
        segbegin = segdays[len(segdays) - 1]
        segend = segdays[0]
        if len(segdays) < seg_len:
            break
        data = []
        data = temp_df1.loc[segbegin:segend, [
            'open', 'high', 'close', 'low', 'volume', 'price_change',
            'p_change', 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20',
            'turnover', 'deltat', 'BIAS_B', 'BIAS_S', 'BOLL_B', 'BOLL_S',
            'CCI_B', 'CCI_S', 'DMI_B', 'DMI_HL', 'DMI_IF1', 'DMI_IF2',
            'DMI_MAX1', 'DMI_S', 'KDJ_B', 'KDJ_S', 'KD_B', 'KD_S', 'MACD',
            'MACD_B', 'MACD_DEA', 'MACD_DIFF', 'MACD_EMA_12', 'MACD_EMA_26',
            'MACD_EMA_9', 'MACD_S', 'MA_B', 'MA_S', 'PSY_B', 'PSY_MYPSY1',
            'PSY_S', 'ROC_B', 'ROC_S', 'RSI_B', 'RSI_S', 'VR_B', 'VR_IF1',
            'VR_IF2', 'VR_IF3', 'VR_S', 'XYYH_B', 'XYYH_B1', 'XYYH_B2',
            'XYYH_B3', 'XYYH_CC', 'XYYH_DD'
        ]]
        data = data.values
        if datatype == 'cnn':
            data = [data]
        data_tag = temp_df[temp_df.c_yearmonthday == tradedaylist[0]][
            ['code', 'name', 'p_change']]
        temp_id = data_tag['code'].values[0]
        temp_name = data_tag['name'].values[0]
        X_predict.append(data)
        ID_predict.append(temp_id)
        NAME_predict.append(temp_name)
        k += 1
        log.info('%s stock finished ', k)
    return (np.asarray(X_predict), np.asarray(ID_predict),
            np.asarray(NAME_predict))
예제 #14
0
def get_histdata(split=0.15, seg_len=3, debug=False, datatype='cnn'):
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from trade_record where code in (select code  from trade_record  where high<>0.0 and low <>0.0 group by code having count(code)=(select count(distinct c_yearmonthday) from trade_record))"
    if debug:
        sql_stocklist += " and code in ('300138','002372')"
    df = pd.read_sql_query(sql_stocklist, engine)
    stockcodes = df['code'].unique()

    X_train = []
    X_valid = []
    Y_train = []
    Y_valid = []
    ID_train = []
    ID_valid = []
    log.info('begin generate train data and validate data.')
    begin_time = time.clock()
    k = 0
    for codes in stockcodes:
        temp_df = df[df.code == codes]

        tradedaylist = temp_df.copy(deep=True)['c_yearmonthday'].values
        tradedaylist.sort()
        tradedaylist = tradedaylist[::-1]
        if len(tradedaylist) < seg_len:
            log.info('not enough trade days ,code is :%s', codes)
            continue

        validdays = np.round(split * len(tradedaylist))

        i = 0
        for day in tradedaylist:
            i += 1
            segdays = tradedaylist[i:i + seg_len]
            if len(segdays) < seg_len:
                break
            SEG_X = []
            data = []
            for segday in segdays:
                data = temp_df[temp_df.c_yearmonthday == segday][[
                    'changepercent', 'trade', 'open', 'high', 'low',
                    'settlement', 'volume', 'turnoverratio', 'amount', 'per',
                    'pb', 'mktcap', 'nmc', 'deltat'
                ]]
                data = data.values
                SEG_X.append(data[0])
            # SEG_X=np.array(SEG_X).T
            if datatype == 'cnn':
                SEG_X = [SEG_X]
            data_tag = temp_df[temp_df.c_yearmonthday == day][[
                'code', 'name', 'changepercent'
            ]]
            temp_y = data_tag['changepercent'].values[0]
            temp_y = to_cate01(temp_y)
            temp_id = data_tag['code'].values[0]
            if (i > 0 and i <= validdays):
                X_valid.append(SEG_X)
                ID_valid.append(temp_id)
                Y_valid.append(temp_y)
            else:
                X_train.append(SEG_X)
                ID_train.append(temp_id)
                Y_train.append(temp_y)
        k += 1
        if k % 500 == 0:
            log.info('%s stock finished ', k)

    log.info('generate data finished ,cost time:%s', time.clock() - begin_time)
    log.info('X_train shape is :%s', np.asarray(X_train).shape)
    log.info('Y_train shape is :%s', np.asarray(Y_train).shape)
    log.info('X_valid shape is :%s', np.asarray(X_valid).shape)
    log.info('Y_valid shape is :%s', np.asarray(Y_valid).shape)

    # X_train=normalize(X_train)
    # X_valid=normalize(X_valid)

    if debug:
        print(np.asarray(X_train), np.asarray(Y_train),
              np.asarray(ID_train)), (np.asarray(X_valid), np.asarray(Y_valid),
                                      np.asarray(ID_valid))
        print(np.asarray(X_train[0][0][0]))
    pickle.dump(
        ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)),
         (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid))),
        open(datatype + '_seg' + str(seg_len) + '.pkl', 'wb'))
예제 #15
0
def get_hist6years(split=0.2,
                   seg_len=3,
                   debug=False,
                   datatype='cnn',
                   datafile=None,
                   predict_days=18):
    log = logger.log
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from trade_hist where code in (select code  from trade_hist  where high<>0.0 and low <>0.0 group by code having count(code)>100)"
    if debug:
        sql_stocklist += " and code in ('002717','601888','002405')"
    df = pd.read_sql_query(sql_stocklist, engine)
    # 增加技术指标
    df = add_volatility(df)
    stockcodes = df['code'].unique()
    df = get_technique(df, stockcodes)

    X_train = []
    X_valid = []
    Y_train = []
    Y_valid = []
    ID_train = []
    ID_valid = []
    log.info('begin generate train data and validate data.')
    begin_time = time.clock()
    k = 0
    predict_days = predict_days
    for codes in stockcodes:
        temp_df = df[df.code == codes]
        temp_df1 = temp_df.copy(deep=True)
        temp_df1 = temp_df1.sort_values(by='c_yearmonthday', ascending=1)

        tradedaylist = temp_df1['c_yearmonthday'].values
        tradedaylist.sort()
        tradedaylist = tradedaylist[::-1]

        temp_df1 = temp_df1.set_index('c_yearmonthday')
        if len(tradedaylist) < seg_len:
            log.info('not enough trade days ,code is :%s', codes)
            continue

        validdays = np.round(split * len(tradedaylist))
        # validdays = 2

        i = 0
        for day in tradedaylist:
            i += 1
            segdays = tradedaylist[i + predict_days:i + predict_days + seg_len]
            segbegin = segdays[len(segdays) - 1]
            segend = segdays[0]
            if len(segdays) < seg_len:
                break
            data = []
            # for segday in segdays:
            data = temp_df1.loc[segbegin:segend, [
                'open', 'high', 'close', 'low', 'volume', 'price_change',
                'p_change', 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20',
                'turnover', 'deltat', 'BIAS_B', 'BIAS_S', 'BOLL_B', 'BOLL_S',
                'CCI_B', 'CCI_S', 'DMI_B', 'DMI_HL', 'DMI_IF1', 'DMI_IF2',
                'DMI_MAX1', 'DMI_S', 'KDJ_B', 'KDJ_S', 'KD_B', 'KD_S', 'MACD',
                'MACD_B', 'MACD_DEA', 'MACD_DIFF', 'MACD_EMA_12',
                'MACD_EMA_26', 'MACD_EMA_9', 'MACD_S', 'MA_B', 'MA_S', 'PSY_B',
                'PSY_MYPSY1', 'PSY_S', 'ROC_B', 'ROC_S', 'RSI_B', 'RSI_S',
                'VR_B', 'VR_IF1', 'VR_IF2', 'VR_IF3', 'VR_S', 'XYYH_B',
                'XYYH_B1', 'XYYH_B2', 'XYYH_B3', 'XYYH_CC', 'XYYH_DD'
            ]]
            data = data.values
            if datatype == 'cnn':
                data = [data]
            d1 = tradedaylist[i - 1]
            d3 = tradedaylist[i + predict_days - 1]
            data_tag = temp_df[temp_df.c_yearmonthday == d1][[
                'code', 'name', 'p_change', 'close'
            ]]
            data_tag3 = temp_df[temp_df.c_yearmonthday == d3][[
                'code', 'name', 'p_change', 'close'
            ]]
            temp_y = data_tag['close'].values[0]
            temp_y3 = data_tag3['close'].values[0]
            temp_y = (temp_y - temp_y3) / temp_y3
            temp_y = to_cate01(temp_y)
            temp_id = data_tag['code'].values[0]
            if (i > 0 and i <= validdays):
                X_valid.append(data)
                ID_valid.append(temp_id)
                Y_valid.append(temp_y)
            else:
                X_train.append(data)
                ID_train.append(temp_id)
                Y_train.append(temp_y)
        k += 1
        samples = 12
        if k % samples == 0:
            print k
            log.info('%s stock finished ', k)
            yield ((np.asarray(X_train), np.asarray(Y_train),
                    np.asarray(ID_train)), (np.asarray(X_valid),
                                            np.asarray(Y_valid),
                                            np.asarray(ID_valid)))
            X_train = []
            X_valid = []
            Y_train = []
            Y_valid = []
            ID_train = []
            ID_valid = []

    yield ((np.asarray(X_train), np.asarray(Y_train), np.asarray(ID_train)),
           (np.asarray(X_valid), np.asarray(Y_valid), np.asarray(ID_valid)))
예제 #16
0
def get_today(split=0.2,
              seg_len=3,
              debug=False,
              datatype='cnn',
              datafile=None):
    log = logger.log
    db = Db()
    engine = db._get_engine()
    sql_stocklist = "select  * from trade_hist where code in (select code  from trade_hist  where high<>0.0 and low <>0.0 group by code having count(code)>100)"
    if debug:
        sql_stocklist += " and code in ('002717','601888','002405')"
    df = pd.read_sql_query(sql_stocklist, engine)
    df = add_volatility(df)
    stockcodes = df['code'].unique()
    df = get_technique(df)
    print stockcodes

    X_predict = []
    ID_predict = []
    NAME_predict = []
    log.info('begin generate train data and validate data.')
    k = 0
    for codes in stockcodes:
        temp_df = df[df.code == codes]
        temp_df1 = temp_df.copy(deep=True)
        temp_df1 = temp_df1.sort_values(by='c_yearmonthday', ascending=1)

        tradedaylist = temp_df1['c_yearmonthday'].values
        tradedaylist.sort()
        tradedaylist = tradedaylist[::-1]

        temp_df1 = temp_df1.set_index('c_yearmonthday')
        if len(tradedaylist) < seg_len:
            log.info('not enough trade days ,code is :%s', codes)
            continue

        i = 0
        segdays = tradedaylist[i:i + seg_len]
        segbegin = segdays[len(segdays) - 1]
        segend = segdays[0]
        if len(segdays) < seg_len:
            break
        data = []
        data = temp_df1.loc[segbegin:segend, [
            'open', 'high', 'close', 'low', 'volume', 'price_change',
            'p_change', 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20',
            'turnover', 'deltat', 'BIAS_B', 'BIAS_S', 'BOLL_B', 'BOLL_S',
            'CCI_B', 'CCI_S', 'DMI_B', 'DMI_HL', 'DMI_IF1', 'DMI_IF2',
            'DMI_MAX1', 'DMI_S', 'KDJ_B', 'KDJ_S', 'KD_B', 'KD_S', 'MACD',
            'MACD_B', 'MACD_DEA', 'MACD_DIFF', 'MACD_EMA_12', 'MACD_EMA_26',
            'MACD_EMA_9', 'MACD_S', 'MA_B', 'MA_S', 'PSY_B', 'PSY_MYPSY1',
            'PSY_S', 'ROC_B', 'ROC_S', 'RSI_B', 'RSI_S', 'VR_B', 'VR_IF1',
            'VR_IF2', 'VR_IF3', 'VR_S', 'XYYH_B', 'XYYH_B1', 'XYYH_B2',
            'XYYH_B3', 'XYYH_CC', 'XYYH_DD'
        ]]
        data = data.values
        if datatype == 'cnn':
            data = [data]
        data_tag = temp_df[temp_df.c_yearmonthday == tradedaylist[0]][[
            'code', 'name', 'p_change'
        ]]
        temp_id = data_tag['code'].values[0]
        temp_name = data_tag['name'].values[0]
        X_predict.append(data)
        ID_predict.append(temp_id)
        NAME_predict.append(temp_name)
        k += 1
        log.info('%s stock finished ', k)
    return (np.asarray(X_predict), np.asarray(ID_predict),
            np.asarray(NAME_predict))