Esempio n. 1
0
def OLSV2A(code):
    df=wt.get_h_data(code,start='2001-01-01')
    #df=get_hist_csv(code)
    df=df.sort_index()
    df.index=pd.to_datetime(df.index)
    df=df.drop_duplicates(keep=False)
    print ('\n,Caculating the analysing 1/4 statistics:\n')
    print (df.describe(),'\n')
    
    if code[0] in ['6','9']:
        ticker='000001'
    elif code[0] in ['0','2','3']:
        ticker='399001'
    dfi=wt.get_h_data(ticker,index=True,start='2001-01-01')
    dfi=dfi.sort_index()
    dfi.index=pd.to_datetime(dfi.index)

    df['cpct']=df['close'].pct_change()*100
    df['lpct']=df['cpct'].shift(1)
    df['llpct']=df['lpct']**2
    df['sina']=np.sin(df['lpct'])
    df['vpct']=df['volume'].pct_change()*100
    dfi['indpct']=dfi['close'].pct_change()*100

    try:
        rets=pd.concat([df['cpct'],df['lpct'],df['llpct'],df['sina'],dfi['indpct'],df['vpct'],df['turnover']],axis=1)
        rets=rets.dropna(how='any')
        X=np.array(rets.iloc[:,1:])
        X=sm.add_constant(X)
    except:
        rets=pd.concat([df['cpct'],df['lpct'],df['llpct'],df['sina'],dfi['indpct'],df['vpct']],axis=1)
        rets=rets.dropna(how='any')
        X=np.array(rets.iloc[:,1:])
        X=sm.add_constant(X)        
    

    Y=np.array(rets.iloc[:,0])
    model=sm.OLS(Y,X)
    results=model.fit()
    print (results.summary())
    
    print ("The params for the model:",results.params)
    print ("The std for the model:",results.bse)

    #df['predict']=''
    #df.iloc[1:,df.columns.get_loc('predict')]=results.predict()
    #dd=df[['close','cpct','predict']]
    
    return results
Esempio n. 2
0
def _get_index_data(code):
    """
    code only in ['000001','399001']
    """
    if code[0] == '0':
        ticker = "YAHOO/SS_" + code
    else:
        ticker = 'YAHOO/SZ_' + code
    fn = './Quandl/' + ticker + '.csv'
    if not os.path.exists(fn):
        print('\nDownloading the data %s:' % code)
        df = wt.get_h_data(code, index=True, start='2000-01-01', end=today1)
        df.sort_index(ascending=True, inplace=True)
        df.rename(columns={
            'open': 'Open',
            'close': 'Close',
            'low': 'Low',
            'high': 'High',
            'volume': 'Volume',
            'amount': 'Amount',
            'date': 'Date'
        },
                  inplace=True)
        df.to_csv(fn)
    else:
        dftem = pd.read_csv(fn)
        tem = dftem.iloc[-1]['Date']
        if tem < bftoday:
            print('\nUpdating data from %s for %s:' % (tem, code))
            t = time.strptime(tem, "%Y-%m-%d")
            y, m, d = t[0:3]
            tt = datetime.datetime(y, m, d)
            bd = tt + datetime.timedelta(days=1)
            if bd.weekday() == 5:
                bd = bd + datetime.timedelta(days=2)
                print('It is Sat')
            #elif bd.weekday()==6:
            #    bd = bd+datetime.timedelta(days=2)
            #    print 'It is Sun'
            bday = bd.strftime('%Y-%m-%d')
            all_data1 = pd.DataFrame()
            all_data = wt.get_h_data(code, autype=None, start=bday, end=today)
            all_data1 = all_data1.append(all_data)
            if all_data1.empty == False:
                print(all_data1.head(1))
                all_data1.sort_index(ascending=True, inplace=True)
                all_data1.to_csv(fn, header=None, mode='a')
    return
Esempio n. 3
0
def get_h_hdf5(code):
    """
    获取历史复权数据,分为前复权和后复权数据,接口提供股票上市以来所有历史数据,默认为前复权。如果不设定开始和结束日期,则返回近一年的复权数据,从性能上考虑,推荐设定开始日期和结束日期,而且最好不要超过三年以上,获取全部历史数据,请分年段分步获取,取到数据后,请及时在本地存储。
    """
    h5path = './testh5df/stockdata.h5'
    if os.path.exists(h5path):
        h5 = pd.HDFStore(h5path, 'a', complevel=4, complib='blosc')
    else:
        h5 = pd.HDFStore(h5path, 'w', complevel=4, complib='blosc')
    if code[0] == '0' or code[0] == '3' or code[0] == '2':
        label = 'M/sz' + code
    elif code[0] == '6' or code[0] == '9':
        label = 'M/ss' + code

    try:
        dd = h5[label]
        tem = str(dd.index[-1])[0:10]
        if tem != today:
            if datetime.datetime.today().isoweekday() in [1, 2, 3, 4, 5]:
                #print 'Updating the data from%s for %s:'%(tem,code)
                t = time.strptime(tem, '%Y-%m-%d')
                y, m, d = t[0:3]
                tt = datetime.datetime(y, m, d)
                bd = tt + datetime.timedelta(days=1)
                bday = bd.strftime('%Y-%m-%d')
                df1 = wt.get_h_data(code, start=bday, end=today)
                #df1=df1.sort_index(ascending=True,inplace=True)
                df = dd.append(df1)
                #df=df.sort_index(ascending=True)
                h5.append(label, df, data_columns=df.columns)
    except:
        df = wt.get_h_data(code)
        #df=df.sort_index(ascending=True)
        h5.append(label, df, data_columns=df.columns)
    #finally:
    #    h5.close()
    df.index = pd.to_datetime(df.index)
    return df
Esempio n. 4
0
def get_history_data_mp(code):
    h5path = './testh5df/stockdata_history.h5'
    if os.path.exists(h5path):
        h5 = pd.HDFStore(h5path, 'a', complevel=4, complib='blosc')
    else:
        h5 = pd.HDFStore(h5path, 'w', complevel=4, complib='blosc')
    try:
        ddf = h5[code]
        #dftem=datetime.datetime.strftime('%Y-%m-%d',ddf.index[-1])
        dftem = str(ddf.index[-1])[0:10]
        tem = dftem
        if tem != bftoday:
            print('\nUpdating data from %s for %s:' % (tem, code))
            t = time.strptime(tem, "%Y-%m-%d")
            y, m, d = t[0:3]
            tt = datetime.datetime(y, m, d)
            bd = tt + datetime.timedelta(days=1)
            #if bd.weekday()==5:
            #    bd = bd+datetime.timedelta(days=2)
            #    print 'It is Sat'
            bday = bd.strftime('%Y-%m-%d')
            all_data1 = pd.DataFrame()
            all_data = wt.get_h_data(code, autype=None, start=bday, end=today)
            all_data1 = all_data1.append(all_data)
            if all_data1.empty == False:
                #print all_data1.head(1)
                #all_data1.sort_index(ascending=True,inplace=True)
                df = ddf.append(all_data1)
                h5[code] = df
                return df
    except Exception as e:
        print(e)
        print('\nDownloading the data %s:' % code)
        df = wt.get_h_data(code, autype=None, start='2000-01-01', end=today)
        #df.sort_index(ascending=True,inplace=True)
        h5[code] = df
        #h5.close()
        return df
Esempio n. 5
0
def get_open_h_hdf5(code, h5):
    """
    获取个股全部历史交易数据
    """
    if code[0] == '0' or code[0] == '3' or code[0] == '2':
        label = 'M/sz' + code
    elif code[0] == '6' or code[0] == '9':
        label = 'M/ss' + code
    try:
        df = h5[label]
        tem = str(df.index[-1])[0:10]
        if tem < today:
            #if datetime.datetime.today().isoweekday() in [1,2,3,4,5]:
            t = time.strptime(tem, '%Y-%m-%d')
            y, m, d = t[0:3]
            tt = datetime.datetime(y, m, d)
            bd = tt + datetime.timedelta(days=1)
            bday = bd.strftime('%Y-%m-%d')
            df1 = wt.get_h_data(code, start=bday, end=today)
            df = df.append(df1)
            #df.index=pd.to_datetime(df.index)
            #df=df.sort_index(ascending=True)
            #print(df)
            h5[label] = df
    except:
        tem = wf.get_stock_basics()
        date = tem.loc[code]['timeToMarket']
        t = time.strptime(str(date), '%Y%m%d')
        startt = time.strftime('%Y-%m-%d', t)
        df = wt.get_h_data(code, start=startt, end=today)
        if df is not None:
            #df.index=pd.to_datetime(df.index)
            #df=df.sort_index(ascending=True)
            h5[label] = df
    finally:
        pass
    return df
Esempio n. 6
0
def get_h_csv(code, index=False, autype='qfq'):
    """
    获取历史复权数据,分为前复权和后复权数据,接口提供股票上市以来所有历史数据,
    默认为前复权。如果不设定开始和结束日期,则返回近一年的复权数据,从性能上考虑,
    推荐设定开始日期和结束日期,而且最好不要超过三年以上,获取全部历史数据,
    请分年段分步获取,取到数据后,请及时在本地存储。
    index:False(提取非指数的数据;True(提取指数的数据)
    autype:主要是提取复权类型,None 表示不复权;qfq 前复权;hfq 后复权
    """
    if index:
        if code[0] == '0':
            tick = 'SS_' + code
        else:
            tick = 'SZ_' + code
        fn = './stockdata/data/history/' + tick + '.csv'
        if not os.path.exists(fn):
            df = wt.get_h_data(code, index=True, start='1995-01-01', end=today)
            df = df.sort_index(ascending=True)
            df.to_csv(fn)
        else:
            df = pd.read_csv(fn, index_col='date')
            tem = str(df.index[-1])[0:10]
            #dftem=dftem.set_index('date')
            if tem < bftoday:
                print('\nUpdating data from %s for %s:' % (tem, code))
                t = time.strptime(tem, "%Y-%m-%d")
                y, m, d = t[0:3]
                tt = datetime.datetime(y, m, d)
                bd = tt + datetime.timedelta(days=1)
                if bd.weekday() == 5:
                    bd = bd + datetime.timedelta(days=2)
                bday = bd.strftime('%Y-%m-%d')
                all_data = wt.get_h_data(code,
                                         index=True,
                                         start=bday,
                                         end=today)
                if all_data is not None:
                    all_data = all_data.sort_index(ascending=True)
                    all_data.to_csv(fn, header=None, mode='a')
                    df = df.append(all_data)
                df.index = pd.to_datetime(df.index)
    else:
        h5path = './stockdata/data/history/' + code + '.csv'
        if not os.path.exists(h5path):
            tem = wf.get_stock_basics()
            date = tem.loc[code]['timeToMarket']
            t = time.strptime(str(date), '%Y%m%d')
            startt = time.strftime('%Y-%m-%d', t)
            df = wt.get_h_data(code, autype=autype, start=startt, end=today)
            df.to_csv(h5path)
        else:
            df = pd.read_csv(h5path, index_col='date')
            tem = str(df.index[-1])[0:10]
            if tem < today:
                #if datetime.datetime.today().isoweekday() in [1,2,3,4,5]:
                t = time.strptime(tem, '%Y-%m-%d')
                y, m, d = t[0:3]
                tt = datetime.datetime(y, m, d)
                bd = tt + datetime.timedelta(days=1)
                bday = bd.strftime('%Y-%m-%d')
                df1 = wt.get_h_data(code, autype=autype, start=bday, end=today)
                if df1 is not None:
                    df1 = df1.sort_index(ascending=True)
                    df = df.append(df1)
                    df1.to_csv(h5path, mode='a', header=None)
                df.index = pd.to_datetime(df.index)
    return df