Exemple #1
0
def backTest(trainEndDate, code, testDate, predictDate):
    conn = db.get_history_data_db('D')
    df = None
    # train more date
    # model = pickle.load(open('%s/%s.pkl' % (config.model_dir, code), 'r'))
    rng = np.random.RandomState(1)
    model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
                              n_estimators=1000,
                              random_state=rng,
                              loss='square')
    df = pd.read_sql_query(
        "select * from history_data where date([date])<='%s' and code='%s' order by code, date([date]) asc"
        % (trainEndDate, code), conn)
    shift_1 = df['close'].shift(-2)
    df['target'] = shift_1
    data = df[df['target'] > -1000]

    X_train = data.ix[:, 'code':'turnover']
    y_train = data.ix[:, 'target']
    if len(X_train) < 500:
        return
    print len(X_train)
    # print data
    # for i in range(0, 10):
    #     model.fit(X_train, y_train)
    model.fit(X_train, y_train)
    # predict tomorrow
    try:
        df = pd.read_sql_query(
            config.sql_history_data_by_code_date % (code, testDate), conn)
        # print df
    except Exception, e:
        print e
Exemple #2
0
def backTest(trainEndDate, code, testDate, predictDate):
    conn = db.get_history_data_db('D')
    df = None
    # train more date
    # model = pickle.load(open('%s/%s.pkl' % (config.model_dir, code), 'r'))
    rng = np.random.RandomState(1)
    model = AdaBoostRegressor(DecisionTreeRegressor(
        max_depth=4), n_estimators=1000, random_state=rng, loss='square')
    df = pd.read_sql_query(
        "select * from history_data where date([date])<='%s' and code='%s' order by code, date([date]) asc" % (
            trainEndDate, code), conn)
    shift_1 = df['close'].shift(-2)
    df['target'] = shift_1
    data = df[df['target'] > -1000]

    X_train = data.ix[:, 'code':'turnover']
    y_train = data.ix[:, 'target']
    if len(X_train) < 500:
        return
    print len(X_train)
    # print data
    # for i in range(0, 10):
    #     model.fit(X_train, y_train)
    model.fit(X_train, y_train)
    # predict tomorrow
    try:
        df = pd.read_sql_query(config.sql_history_data_by_code_date % (code, testDate), conn)
        # print df
    except Exception, e:
        print e
Exemple #3
0
def get_all_stock_code():
    '''
    返回全部股票代码
    '''
    conn = db.get_history_data_db()
    conn.text_factory = str
    try:
        df = pd.read_sql_query(
            'select * from stock_basics where [timeToMarket] !=0', conn)
        return df['code']
    except Exception as e:
        log.error(e)
Exemple #4
0
def get_all_stock_code():
    '''
    返回全部股票代码
    '''
    conn = db.get_history_data_db()
    conn.text_factory = str
    try:
        df = pd.read_sql_query(
            'select * from stock_basics where [timeToMarket] !=0', conn)
        return df['code']
    except Exception as e:
        log.error(e)
Exemple #5
0
def download_history_data_fq(autype='qfq', startTime=None):
    '''
    获取前复权的历史k线数据
    '''

    conn = db.get_history_data_db('D')
    start = startTime
    if startTime is None:
        start = utils.today_last_year(6)

    for code in get_all_stock_code():
        df = ts.get_h_data(code, start=start, drop_factor=False)
        if df is not None:
            try:
                df.insert(0, 'code', code)
                sql_df = df.loc[:, :]
                sql.to_sql(sql_df, name='history_data_%s' %
                                        autype, con=conn, index=True, if_exists='append')
                log.info('%s,%s history qfq data download ok.' % (code, start))
            except Exception as e:
                log.error('error:code:%s,start:%s' % (code, start))
Exemple #6
0
def updata_all_stock_basics():
    '''
    更新所有股票的基本数据
    return
    DataFrame
           code,代码
           name,名称
           industry,细分行业
           area,地区
           pe,市盈率
           outstanding,流通股本
           totals,总股本(万)
           totalAssets,总资产(万)
           liquidAssets,流动资产
           fixedAssets,固定资产
           reserved,公积金
           reservedPerShare,每股公积金
           eps,每股收益
           bvps,每股净资
           pb,市净率
           timeToMarket,上市日期
    '''
    conn = db.get_history_data_db()
    conn.text_factory = str
    retry = 3
    for i in range(retry):
        try:
            df = ts.get_stock_basics()
            if not df.empty:
                sql_df = df.loc[:, :]
                sql.to_sql(sql_df,
                           name='stock_basics',
                           con=conn,
                           index=True,
                           if_exists='replace')
                log.info('all stock basics updated, total size=%d' % len(df))
                break
        except Exception as e:
            log.error(e)
    conn.close()
Exemple #7
0
def updata_all_stock_basics():
    '''
    更新所有股票的基本数据
    return
    DataFrame
           code,代码
           name,名称
           industry,细分行业
           area,地区
           pe,市盈率
           outstanding,流通股本
           totals,总股本(万)
           totalAssets,总资产(万)
           liquidAssets,流动资产
           fixedAssets,固定资产
           reserved,公积金
           reservedPerShare,每股公积金
           eps,每股收益
           bvps,每股净资
           pb,市净率
           timeToMarket,上市日期
    '''
    conn = db.get_history_data_db()
    conn.text_factory = str
    retry = 3
    for i in range(retry):
        try:
            df = ts.get_stock_basics()
            if not df.empty:
                sql_df = df.loc[:, :]
                sql.to_sql(sql_df, name='stock_basics', con=conn,
                           index=True, if_exists='replace')
                log.info('all stock basics updated, total size=%d' % len(df))
                break
        except Exception as e:
            log.error(e)
    conn.close()
Exemple #8
0
def download_history_data_fq(autype='qfq', startTime=None):
    '''
    获取前复权的历史k线数据
    '''

    conn = db.get_history_data_db('D')
    start = startTime
    if startTime is None:
        start = utils.today_last_year(6)

    for code in get_all_stock_code():
        df = ts.get_h_data(code, start=start, drop_factor=False)
        if df is not None:
            try:
                df.insert(0, 'code', code)
                sql_df = df.loc[:, :]
                sql.to_sql(sql_df,
                           name='history_data_%s' % autype,
                           con=conn,
                           index=True,
                           if_exists='append')
                log.info('%s,%s history qfq data download ok.' % (code, start))
            except Exception as e:
                log.error('error:code:%s,start:%s' % (code, start))
Exemple #9
0
 def __init__(self):
     self.log = utils.mylog
     self.history_data_db_conn = db.get_history_data_db()
     self.history_data_db_conn.text_factory = str
Exemple #10
0
def download_index_history_data(start=None, end=None, init_run=False):
    '''
    start:开始时间 yyyyMMdd,第一次调用空则取20100101,之后以数据表中最近时间为准
    end:结束时间 yyyyMMdd,空则取当前日期
    '''
    cur_time = datetime.now()
    conn = db.get_history_data_db()
    if init_run:
        start = default_start_time
    if start is None:
        try:
            onerow = conn.execute(config.sql_last_date_index_all).fetchone()
            if onerow is not None:
                start = onerow[0]
                dt = datetime.strptime(start, '%Y-%m-%d') + timedelta(days=1)
                start = datetime.strftime(dt, '%Y%m%d')
            else:
                start = default_start_time
        except Exception as e:
            start = default_start_time

    if end is None:
        end = datetime.today().date().strftime('%Y%m%d')
    print(start, end)
    if int(end) <= int(start):
        return None
    for code in indices.keys():
        for i in range(retry):
            try:
                url = sohu_history_api % (code, start, end)

                text = request.urlopen(url, timeout=10).read()
                text = text.decode('GBK')
                log.info('url=%s,size=%d, try=%d' % (url, len(text), i))
                if len(text) < 20:
                    continue
                j = demjson.decode(text, 'utf-8')
                head = [
                    'date', 'open', 'close', 'chg', 'chg_r', 'low', 'high',
                    'vibration', 'volume', 'amount'
                ]  # 日期    开盘    收盘    涨跌额    涨跌幅    最低    最高    成交量(手)    成交金额(万)
                # 日期	开盘	收盘	涨跌额	涨跌幅	最低	最高	成交量(手)	成交金额(万)	换手率
                data = []
                for x in j[0].get('hq'):
                    date, open, close, change, _, low, high, valume, amount, _ = x
                    chg_r = '%.4f' % (
                        (float(close) - float(open)) / float(open))
                    vibration = '%.4f' % (float(
                        (float(high) - float(low)) / float(open)))
                    # print date, vibration, str(float(vibration))
                    data.append([
                        date,
                        float(open),
                        float(close),
                        float(change),
                        float(chg_r),
                        float(low),
                        float(high),
                        float(vibration),
                        float(valume),
                        float(amount)
                    ])

                    # sql_str = 'insert OR IGNORE into all_index values(?,?,?,?,?,?,?,?,?,?,?)'
                    # print len(data[0])
                    # conn.executemany(sql_str, data)
                df = pd.DataFrame(data, columns=head)
                if not df.empty:
                    df.insert(1, 'code', code)
                    sql_df = df.loc[:, :]
                    sql.to_sql(sql_df,
                               name='all_index',
                               con=conn,
                               index=False,
                               if_exists='append')
                    log.info('%s,%s index history download ok.' %
                             (code, start))
                    break
            except Exception as e:
                log.error(e)
    conn.close()
    log.info('index history data download complete. cost %d s' %
             (datetime.now() - cur_time).seconds)
Exemple #11
0
def download_history_data(ktype='D', start=None, end=None, init_run=False):
    '''
    获取近不复权的历史k线数据
    '''
    if init_run:
        start = default_start_time
    if end == None:
        end = datetime.today().date().strftime('%Y%m%d')

    conn = db.get_history_data_db(ktype)
    cost = 0
    cur_time = datetime.now()

    for code in get_all_stock_code():
        cost = datetime.now()
        if start is None:
            _start = get_laste_update_dt(code)
            if _start is not None:
                dt = datetime.strptime(_start, '%Y-%m-%d') + timedelta(days=1)
                start = datetime.strftime(dt, '%Y%m%d')
            else:
                row = conn.execute(config.sql_last_date_history_data_by_code %
                                   code).fetchone()
                if row is not None:
                    start = row[0]
                    dt = datetime.strptime(start,
                                           '%Y-%m-%d') + timedelta(days=1)
                    start = datetime.strftime(dt, '%Y%m%d')
                else:
                    start = default_start_time
        for i in range(retry):
            try:
                url = sohu_history_api % ('cn_' + code, start, end)
                text = request.urlopen(url, timeout=10).read()
                text = text.decode('GBK')
                log.info('url=%s,size=%d, try=%d' % (url, len(text), i))
                if len(text) < 20:
                    continue
                j = demjson.decode(text, 'utf-8')
                head = [
                    'date', 'open', 'close', 'chg', 'chg_r', 'low', 'high',
                    'vibration', 'volume', 'amount', 'turnover'
                ]  # 日期    开盘    收盘    涨跌额    涨跌幅    最低    最高    成交量(手)    成交金额(万)
                # 日期	开盘	收盘	涨跌额	涨跌幅	最低	最高	成交量(手)	成交金额(万)	换手率
                data = []

                for x in j[0].get('hq'):
                    date, open, close, change, _, low, high, valume, amount, turnover = x
                    if '-' == turnover:
                        turnover = '0.0%'
                    chg_r = '%.4f' % (
                        (float(close) - float(open)) / float(open))
                    vibration = '%.4f' % float(
                        (float(high) - float(low)) / float(open))
                    chg_r = float(chg_r)
                    vibration = float(vibration)
                    data.append([
                        date,
                        float(open),
                        float(close),
                        float(change),
                        float(chg_r),
                        float(low),
                        float(high),
                        float(vibration),
                        float(valume),
                        float(amount),
                        float(turnover[:-1])
                    ])

                df = pd.DataFrame(data, columns=head)
                if not df.empty:
                    df.insert(1, 'code', code)
                    sql_df = df.loc[:, :]
                    sql.to_sql(sql_df,
                               name='history_data',
                               con=conn,
                               index=False,
                               if_exists='append')
                    log.info('%s,%s,%d history data download ok.' %
                             (code, str(start), len(sql_df)))
                    slog.info('%s,%s' % (code, data[0][0]))
                    break
            except Exception as e:
                log.error('error:code=%s,start=%s,msg=%s' % (code, start, e))
                if str(e).find('UNIQUE constraint') > -1:
                    break
        log.debug('%s,costs:%d s' % (code, (datetime.now() - cost).seconds))
    conn.close()
    log.info('history data download complete. cost %d s' %
             (datetime.now() - cur_time).seconds)
Exemple #12
0
import talib
import numpy as np
from talib.abstract import *
close = np.random.random(100)
output = talib.SMA(close, timeperiod=5)
# print output
from org.tradesafe.db import sqlite_db as db
import pandas as pd
import numpy as np

# note that all ndarrays must be the same length!

inputs = {

    'high': np.random.random(100),
    'open': np.random.random(100),
    'low': np.random.random(100),
    # 'close': np.random.random(100),
    'volume': np.random.random(100)
}
output = SMA(inputs, timeperiod=25, price='open')
# print CCI(inputs,  price='high')
# print output

conn = db.get_history_data_db('D')
df = pd.read_sql_query(
    "select * from history_data where code='%s' order by date([date]) asc" % '600022', conn)
df['sma15'] = talib.SMA(df['high'].values, timeperiod=15)
print df.head(30)
if __name__ == '__main__':
    pass
Exemple #13
0
 def __init__(self):
     self.log = utils.mylog
     self.history_data_db_conn = db.get_history_data_db()
     self.history_data_db_conn.text_factory = str
Exemple #14
0
def download_index_history_data(start=None, end=None, init_run=False):
    '''
    start:开始时间 yyyyMMdd,第一次调用空则取20100101,之后以数据表中最近时间为准
    end:结束时间 yyyyMMdd,空则取当前日期
    '''
    cur_time = datetime.now()
    conn = db.get_history_data_db()
    if init_run:
        start = default_start_time
    if start is None:
        try:
            onerow = conn.execute(config.sql_last_date_index_all).fetchone()
            if onerow is not None:
                start = onerow[0]
                dt = datetime.strptime(start, '%Y-%m-%d') + timedelta(days=1)
                start = datetime.strftime(dt, '%Y%m%d')
            else:
                start = default_start_time
        except Exception as e:
            start = default_start_time

    if end is None:
        end = datetime.today().date().strftime('%Y%m%d')
    print(start, end)
    if int(end) <= int(start):
        return None
    for code in indices.keys():
        for i in range(retry):
            try:
                url = sohu_history_api % (code, start, end)

                text = request.urlopen(url, timeout=10).read()
                text = text.decode('GBK')
                log.info('url=%s,size=%d, try=%d' % (url, len(text), i))
                if len(text) < 20:
                    continue
                j = demjson.decode(text, 'utf-8')
                head = ['date', 'open', 'close', 'chg', 'chg_r', 'low', 'high', 'vibration', 'volume',
                        'amount']  # 日期    开盘    收盘    涨跌额    涨跌幅    最低    最高    成交量(手)    成交金额(万)
                # 日期	开盘	收盘	涨跌额	涨跌幅	最低	最高	成交量(手)	成交金额(万)	换手率
                data = []
                for x in j[0].get('hq'):
                    date, open, close, change, _, low, high, valume, amount, _ = x
                    chg_r = '%.4f' % ((float(close) - float(open)) / float(open))
                    vibration = '%.4f' % (float((float(high) - float(low)) / float(open)))
                    # print date, vibration, str(float(vibration))
                    data.append([date, float(open), float(close), float(change), float(chg_r), float(low), float(high),
                                 float(vibration), float(valume), float(amount)])

                    # sql_str = 'insert OR IGNORE into all_index values(?,?,?,?,?,?,?,?,?,?,?)'
                    # print len(data[0])
                    # conn.executemany(sql_str, data)
                df = pd.DataFrame(data, columns=head)
                if not df.empty:
                    df.insert(1, 'code', code)
                    sql_df = df.loc[:, :]
                    sql.to_sql(sql_df, name='all_index', con=conn, index=False, if_exists='append')
                    log.info('%s,%s index history download ok.' % (code, start))
                    break
            except Exception as e:
                log.error(e)
    conn.close()
    log.info('index history data download complete. cost %d s' % (datetime.now() - cur_time).seconds)
Exemple #15
0
def download_history_data(ktype='D', start=None, end=None, init_run=False):
    '''
    获取近不复权的历史k线数据
    '''
    if init_run:
        start = default_start_time
    if end == None:
        end = datetime.today().date().strftime('%Y%m%d')

    conn = db.get_history_data_db(ktype)
    cost = 0
    cur_time = datetime.now()

    for code in get_all_stock_code():
        cost = datetime.now()
        if start is None:
            _start = get_laste_update_dt(code)
            if _start is not None:
                dt = datetime.strptime(_start, '%Y-%m-%d') + timedelta(days=1)
                start = datetime.strftime(dt, '%Y%m%d')
            else:
                row = conn.execute(config.sql_last_date_history_data_by_code % code).fetchone()
                if row is not None:
                    start = row[0]
                    dt = datetime.strptime(start, '%Y-%m-%d') + timedelta(days=1)
                    start = datetime.strftime(dt, '%Y%m%d')
                else:
                    start = default_start_time
        for i in range(retry):
            try:
                url = sohu_history_api % ('cn_' + code, start, end)
                text = request.urlopen(url, timeout=10).read()
                text = text.decode('GBK')
                log.info('url=%s,size=%d, try=%d' % (url, len(text), i))
                if len(text) < 20:
                    continue
                j = demjson.decode(text, 'utf-8')
                head = ['date', 'open', 'close', 'chg', 'chg_r', 'low', 'high', 'vibration', 'volume',
                        'amount', 'turnover']  # 日期    开盘    收盘    涨跌额    涨跌幅    最低    最高    成交量(手)    成交金额(万)
                # 日期	开盘	收盘	涨跌额	涨跌幅	最低	最高	成交量(手)	成交金额(万)	换手率
                data = []

                for x in j[0].get('hq'):
                    date, open, close, change, _, low, high, valume, amount, turnover = x
                    if '-' == turnover:
                        turnover = '0.0%'
                    chg_r = '%.4f' % ((float(close) - float(open)) / float(open))
                    vibration = '%.4f' % float((float(high) - float(low)) / float(open))
                    chg_r = float(chg_r)
                    vibration = float(vibration)
                    data.append(
                        [date, float(open), float(close), float(change), float(chg_r), float(low), float(high),
                         float(vibration), float(valume), float(amount), float(turnover[:-1])])

                df = pd.DataFrame(data, columns=head)
                if not df.empty:
                    df.insert(1, 'code', code)
                    sql_df = df.loc[:, :]
                    sql.to_sql(sql_df, name='history_data', con=conn, index=False, if_exists='append')
                    log.info('%s,%s,%d history data download ok.' % (code, str(start), len(sql_df)))
                    slog.info('%s,%s' % (code, data[0][0]))
                    break
            except Exception as e:
                log.error('error:code=%s,start=%s,msg=%s' % (code, start, e))
                if str(e).find('UNIQUE constraint') > -1:
                    break
        log.debug('%s,costs:%d s' % (code, (datetime.now() - cost).seconds))
    conn.close()
    log.info('history data download complete. cost %d s' % (datetime.now() - cur_time).seconds)