Esempio n. 1
0
def get_duration_filter(code, df=None, dtype='d', start=None, end=None, dl=None, filter=True, ptype='low', power=True):
    if start is not None and end is None and filter:
        index_d = cct.day8_to_day10(start)
        start = tdd.get_duration_price_date(code, ptype=ptype, dt=start, df=df, dl=dl, power=power)
        log.debug("start is not None start: %s  index_d:%s" % (start, index_d))
    elif end is not None and filter:
        df = tdd.get_tdx_append_now_df_api(code, start=start, end=end, df=df, dl=dl, power=power).sort_index(ascending=True)
        index_d = cct.day8_to_day10(start)
        start = tdd.get_duration_price_date(code, ptype=ptype, dt=start, df=df, dl=dl, power=power)
        df = df[df.index >= start]
        if len(df) > 2 and dl is None:
            if df.index.values[0] < index_d:
                df = df[df.index >= index_d]
    if dl is not None:
        if power:
            start, index_d, df = tdd.get_duration_price_date(
                code, ptype=ptype, dl=dl, filter=False, df=df, power=True)
        else:
            start, index_d = tdd.get_duration_price_date(
                code, ptype=ptype, dl=dl, filter=False, df=df, power=False)
        log.debug("dl not None code:%s start: %s  index_d:%s" % (code, start, index_d))

    if len(df) > 0 and df is not None:
        df = df.sort_index(ascending=True)
        df = df[df.index >= start]

    if len(df) == 0 or df is None:
        if start is not None and len(start) > 8 and int(start[:4]) > 2500:
            log.warn("code:%s ERROR:%s" % (code, start))
            start = '2016-01-01'
        df = tdd.get_tdx_append_now_df_api(
            code, start, end).sort_index(ascending=True)
        if start is None:
            start = df.index.values[0]
        if len(df) > 2 and dl is None and start is not None and filter:
            if df.index.values[0] < index_d:
                df = df[df.index >= index_d]

    if not dtype == 'd':
        df = tdd.get_tdx_stock_period_to_type(
            df, dtype).sort_index(ascending=True)
    return df
Esempio n. 2
0
def get_linear_model_histogramDouble(code, ptype='f', dtype='d', start=None, end=None, vtype='close', filter='n',
                                     df=None):
    # 399001','cyb':'zs399006','zxb':'zs399005
    # code = '999999'
    # code = '601608'
    # code = '000002'
    # asset = ts.get_hist_data(code)['close'].sort_index(ascending=True)
    # df = tdd.get_tdx_Exp_day_to_df(code, 'f').sort_index(ascending=True)
    # vtype='close'
    # if vtype == 'close' or vtype==''
    # ptype=
    if start is not None and filter == 'y':
        if code not in ['999999', '399006', '399001']:
            index_d, dl = tdd.get_duration_Index_date(dt=start)
            log.debug("index_d:%s dl:%s" % (str(index_d), dl))
        else:
            index_d = cct.day8_to_day10(start)
            log.debug("index_d:%s" % (index_d))
        start = tdd.get_duration_price_date(code, ptype='low', dt=index_d)
        log.debug("start:%s" % (start))
    if df is None:
        # df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True)
        df = tdd.get_tdx_append_now_df_api(code, ptype, start, end).sort_index(ascending=True)
    if not dtype == 'd':
        df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True)
    asset = df[vtype]
    log.info("df:%s" % asset[:1])
    asset = asset.dropna()
    dates = asset.index

    if not code.startswith('999') or not code.startswith('399'):
        if code[:1] in ['5', '6', '9']:
            code2 = '999999'
        elif code[:1] in ['3']:
            code2 = '399006'
        else:
            code2 = '399001'
        df1 = tdd.get_tdx_append_now_df_api(code2, ptype, start, end).sort_index(ascending=True)
        # df1 = tdd.get_tdx_append_now_df(code2, ptype, start, end).sort_index(ascending=True)
        if not dtype == 'd':
            df1 = tdd.get_tdx_stock_period_to_type(df1, dtype).sort_index(ascending=True)
        asset1 = df1.loc[asset.index, vtype]
        startv = asset1[:1]
        # asset_v=asset[:1]
        # print startv,asset_v
        asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2))
        # print asset1[:4]

    # 画出价格随时间变化的图像
    # _, ax = plt.subplots()
    # fig = plt.figure()
    fig = plt.figure(figsize=(16, 10))
    # fig = plt.figure(figsize=(16, 10), dpi=72)
    # fig.autofmt_xdate() #(no fact)

    # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
    plt.subplots_adjust(left=0.05, bottom=0.08, right=0.95, top=0.95, wspace=0.15, hspace=0.25)
    # set (gca,'Position',[0,0,512,512])
    # fig.set_size_inches(18.5, 10.5)
    # fig=plt.fig(figsize=(14,8))
    ax1 = fig.add_subplot(321)
    # asset=asset.apply(lambda x:round( x/asset[:1],2))
    ax1.plot(asset)
    # ax1.plot(asset1,'-r', linewidth=2)
    ticks = ax1.get_xticks()
    # start, end = ax1.get_xlim()
    # print start, end, len(asset)
    # print ticks, ticks[:-1]
    # (ticks[:-1] if len(asset) > end else np.append(ticks[:-1], len(asset) - 1))
    ax1.set_xticklabels([dates[i] for i in (np.append(ticks[:-1], len(asset) - 1))],
                        rotation=15)  # Label x-axis with dates
    # 拟合
    X = np.arange(len(asset))
    x = sm.add_constant(X)
    model = regression.linear_model.OLS(asset, x).fit()
    a = model.params[0]
    b = model.params[1]
    # log.info("a:%s b:%s" % (a, b))
    log.info("X:%s a:%s b:%s" % (len(asset), a, b))
    Y_hat = X * b + a

    # 真实值-拟合值,差值最大最小作为价值波动区间
    # 向下平移
    i = (asset.values.T - Y_hat).argmin()
    c_low = X[i] * b + a - asset.values[i]
    Y_hatlow = X * b + a - c_low

    # 向上平移
    i = (asset.values.T - Y_hat).argmax()
    c_high = X[i] * b + a - asset.values[i]
    Y_hathigh = X * b + a - c_high
    plt.plot(X, Y_hat, 'k', alpha=0.9);
    plt.plot(X, Y_hatlow, 'r', alpha=0.9);
    plt.plot(X, Y_hathigh, 'r', alpha=0.9);
    # plt.xlabel('Date', fontsize=12)
    plt.ylabel('Price', fontsize=12)
    plt.title(code + " | " + str(dates[-1])[:11], fontsize=14)
    plt.legend([asset.iat[-1]], fontsize=12, loc=4)
    plt.grid(True)

    # plt.legend([code]);
    # plt.legend([code, 'Value center line', 'Value interval line']);
    # fig=plt.fig()
    # fig.figsize = [14,8]
    scale = 1.1
    zp = zoompan.ZoomPan()
    figZoom = zp.zoom_factory(ax1, base_scale=scale)
    figPan = zp.pan_factory(ax1)

    ax2 = fig.add_subplot(323)
    # ax2.plot(asset)
    # ticks = ax2.get_xticks()
    ax2.set_xticklabels([dates[i] for i in (np.append(ticks[:-1], len(asset) - 1))], rotation=15)
    # plt.plot(X, Y_hat, 'k', alpha=0.9)
    n = 5
    d = (-c_high + c_low) / n
    c = c_high
    while c <= c_low:
        Y = X * b + a - c
        plt.plot(X, Y, 'r', alpha=0.9);
        c = c + d
    # asset=asset.apply(lambda x:round(x/asset[:1],2))
    ax2.plot(asset)
    # ax2.plot(asset1,'-r', linewidth=2)
    # plt.xlabel('Date', fontsize=12)
    plt.ylabel('Price', fontsize=12)
    plt.grid(True)

    # plt.title(code, fontsize=14)
    # plt.legend([code])

    # 将Y-Y_hat股价偏离中枢线的距离单画出一张图显示,对其边界线之间的区域进行均分,大于0的区间为高估,小于0的区间为低估,0为价值中枢线。
    ax3 = fig.add_subplot(322)
    # distance = (asset.values.T - Y_hat)
    distance = (asset.values.T - Y_hat)[0]
    if code.startswith('999') or code.startswith('399'):
        ax3.plot(asset)
        plt.plot(distance)
        ticks = ax3.get_xticks()
        ax3.set_xticklabels([dates[i] for i in (np.append(ticks[:-1], len(asset) - 1))], rotation=15)
        n = 5
        d = (-c_high + c_low) / n
        c = c_high
        while c <= c_low:
            Y = X * b + a - c
            plt.plot(X, Y - Y_hat, 'r', alpha=0.9);
            c = c + d
        ax3.plot(asset)
        # plt.xlabel('Date', fontsize=12)
        plt.ylabel('Price-center price', fontsize=14)
        plt.grid(True)
    else:
        as3 = asset.apply(lambda x: round(x / asset[:1], 2))
        ax3.plot(as3)
        ax3.plot(asset1, '-r', linewidth=2)
        plt.grid(True)
        zp3 = zoompan.ZoomPan()
        figZoom = zp3.zoom_factory(ax3, base_scale=scale)
        figPan = zp3.pan_factory(ax3)
    # plt.title(code, fontsize=14)
    # plt.legend([code])



    # 统计出每个区域内各股价的频数,得到直方图,为了更精细的显示各个区域的频数,这里将整个边界区间分成100份。

    ax4 = fig.add_subplot(325)
    log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0]))
    # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100)
    # now_distanse=int((asset.iat[-1]-Y_hat[-1])/Y_hat[-1]*100)
    # log.debug("dis:%s now:%s"%(distance[:2],now_distanse))
    # log.debug("now_distanse:%s"%now_distanse)
    distance = (asset.values.T - Y_hat)
    now_distanse = asset.iat[-1] - Y_hat[-1]
    # distance = (asset.values.T-Y_hat)[0]
    pd.Series(distance).plot(kind='hist', stacked=True, bins=100)
    # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9)
    plt.axvline(now_distanse, hold=None, label="1", color='red')
    # plt.axhline(now_distanse,hold=None,label="1",color='red')
    # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--")
    plt.xlabel('Undervalue ------------------------------------------> Overvalue', fontsize=12)
    plt.ylabel('Frequency', fontsize=14)
    # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14)
    plt.legend([code, asset.iat[-1], str(dates[-1])[5:11]], fontsize=12)
    plt.grid(True)

    # plt.show()
    # import os
    # print(os.path.abspath(os.path.curdir))


    ax5 = fig.add_subplot(326)
    # fig.figsize=(5, 10)
    log.info("assert:len:%s %s" % (len(asset.values.T - Y_hat), (asset.values.T - Y_hat)[0]))
    # distance = map(lambda x:int(x),(asset.values.T - Y_hat)/Y_hat*100)
    distance = (asset.values.T - Y_hat) / Y_hat * 100
    now_distanse = ((asset.iat[-1] - Y_hat[-1]) / Y_hat[-1] * 100)
    log.debug("dis:%s now:%s" % (distance[:2], now_distanse))
    log.debug("now_distanse:%s" % now_distanse)
    # n, bins = np.histogram(distance, 50)
    # print n, bins[:2]
    pd.Series(distance).plot(kind='hist', stacked=True, bins=100)
    # plt.plot((asset.iat[-1].T-Y_hat),'b',alpha=0.9)
    plt.axvline(now_distanse, hold=None, label="1", color='red')
    # plt.axhline(now_distanse,hold=None,label="1",color='red')
    # plt.axvline(asset.iat[0],hold=None,label="1",color='red',linestyle="--")
    plt.xlabel('Undervalue ------------------------------------------> Overvalue', fontsize=14)
    plt.ylabel('Frequency', fontsize=12)
    # plt.title('Undervalue & Overvalue Statistical Chart', fontsize=14)
    plt.legend([code, asset.iat[-1]], fontsize=12)
    plt.grid(True)

    ax6 = fig.add_subplot(324)
    h = df.loc[:, ['open', 'close', 'high', 'low']]
    highp = h['high'].values
    lowp = h['low'].values
    openp = h['open'].values
    closep = h['close'].values
    lr = LinearRegression()
    x = np.atleast_2d(np.linspace(0, len(closep), len(closep))).T
    lr.fit(x, closep)
    LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
    xt = np.atleast_2d(np.linspace(0, len(closep) + 200, len(closep) + 200)).T
    yt = lr.predict(xt)
    bV = []
    bP = []
    for i in range(1, len(highp) - 1):
        if highp[i] <= highp[i - 1] and highp[i] < highp[i + 1] and lowp[i] <= lowp[i - 1] and lowp[i] < lowp[i + 1]:
            bV.append(lowp[i])
            bP.append(i)

    d, p = LIS(bV)

    idx = []
    for i in range(len(p)):
        idx.append(bP[p[i]])
    lr = LinearRegression()
    X = np.atleast_2d(np.array(idx)).T
    Y = np.array(d)
    lr.fit(X, Y)
    estV = lr.predict(xt)
    ax6.plot(closep, linewidth=2)
    ax6.plot(idx, d, 'ko')
    ax6.plot(xt, estV, '-r', linewidth=3)
    ax6.plot(xt, yt, '-g', linewidth=3)
    plt.grid(True)

    # plt.tight_layout()
    zp2 = zoompan.ZoomPan()
    figZoom = zp2.zoom_factory(ax6, base_scale=scale)
    figPan = zp2.pan_factory(ax6)
    # plt.ion()
    plt.show(block=False)
             ptype = "high"
         elif p_t == "y":
             filter = "y"
         elif p_t == "n":
             filter = "n"
         else:
             print ("arg error:%s" % p_t)
     elif len(dl) == 4:
         dt = dl[1]
         ptype = dl[2]
         filter = dl[3]
     else:
         dt = ""
     if len(str(dt)) > 0:
         if len(str(dt)) < 8:
             duration_date = tdd.get_duration_price_date("999999", dl=dt, ptype=ptype)
         else:
             duration_date = dt
         top_all = pd.DataFrame()
         time_s = time.time()
         status = False
         lastpTDX_DF = ""
 elif st == "w" or st == "a":
     codew = (top_dd.index).tolist()
     if st == "a":
         cct.write_to_blocknew(block_path, codew)
         # sl.write_to_blocknew(all_diffpath, codew)
     else:
         cct.write_to_blocknew(block_path, codew, False)
         # sl.write_to_blocknew(all_diffpath, codew, False)
     print "wri ok:%s" % block_path
Esempio n. 4
0
def get_linear_model_status(code, df=None, dtype='d', type='m', start=None, end=None, days=1, filter='n',
                            dl=None, countall=True, ptype='low', power=True):
    """[pct test get_linear_model_status]

    [description]

    Arguments:
        code {[type]} -- [description]

    Keyword Arguments:
        df {[type]} -- [description] (default: {None})
        dtype {str} -- [description] (default: {'d'})
        type {str} -- [description] (default: {'m'})
        start {[type]} -- [description] (default: {None})
        end {[type]} -- [description] (default: {None})
        days {number} -- [description] (default: {1})
        filter {str} -- [description] (default: {'n'})
        dl {[type]} -- [description] (default: {None})
        countall {bool} -- [description] (default: {True})
        ptype {str} -- [description] (default: {'low'})
        power {bool} -- [description] (default: {True})

    Returns:
        [type] -- [description]
    """
    if start is not None and end is None and filter == 'y':
        index_d = cct.day8_to_day10(start)
        start = tdd.get_duration_price_date(code, ptype=ptype, dt=start, df=df, dl=dl, power=power)
        log.debug("start is not None start: %s  index_d:%s" % (start, index_d))
    elif end is not None and filter == 'y':
        df = tdd.get_tdx_append_now_df_api(code, start=start, end=end, df=df, dl=dl, power=power).sort_index(ascending=True)
        index_d = cct.day8_to_day10(start)
        start = tdd.get_duration_price_date(code, ptype=ptype, dt=start, df=df, dl=dl, power=power)
        df = df[df.index >= start]
        if len(df) > 2 and dl is None:
            if df.index.values[0] < index_d:
                df = df[df.index >= index_d]
    if dl is not None:
        if power:
            start, index_d, df = tdd.get_duration_price_date(
                code, ptype=ptype, dl=dl, filter=False, df=df, power=power)
        else:
            start, index_d = tdd.get_duration_price_date(
                code, ptype=ptype, dl=dl, filter=False, df=df, power=power)
        log.debug("dl not None code:%s start: %s  index_d:%s" % (code, start, index_d))

    if len(df) > 0 and df is not None:
        df = df.sort_index(ascending=True)
        df = df[df.index >= start]
    if len(df) == 0 or df is None:
        if start is not None and len(start) > 8 and int(start[:4]) > 2500:
            log.warn("code:%s ERROR:%s" % (code, start))
            start = '2016-01-01'
        df = tdd.get_tdx_append_now_df_api(
            code, start, end).sort_index(ascending=True)
        if start is None:
            start = df.index.values[0]
        if len(df) > 2 and dl is None and start is not None and filter == 'y':
            if df.index.values[0] < index_d:
                df = df[df.index >= index_d]

    if not dtype == 'd':
        df = tdd.get_tdx_stock_period_to_type(
            df, dtype).sort_index(ascending=True)

    df = df.fillna(0)
    if len(df) > 1 + days:
        if days != 0:
            asset = df[:-days]
        else:
            asset = df
    else:
        asset = df
    if len(asset) > 1:
        operationcount = 0
        # ratio_l = []
        if countall:
            assetratio = asset
            nowpratio = df['close'][-days] if len(df) > 1 + days else None
            # print assetratio
            op, ratio_l, status, sdl = get_linear_model_rule(code, df=assetratio, nowP=nowpratio, ptype=ptype)
            # print op,ratio,status,sdl
            # ratio_l.append(round(ratio, 2))
            operationcount += op
        else:
            assetratio = asset
            nowpratio = df['close'][-days] if len(df) > 1 + days else None
            op, ratio_l, status, sdl = get_linear_model_rule(code, df=assetratio, nowP=nowpratio, ptype=ptype)
            # ratio_l.append(round(ratio, 2))
            operationcount += op

        return operationcount, (ratio_l), df[:1].index.values[0], [len(df), df[:1]]

    elif len(asset) == 1:
        ## log.error("powerCompute code:%s"%(code))
        if ptype == 'high':
            if df.close[-1] >= df.high[-1] * 0.99 and df.close[-1] >= df.open[-1]:
                return 12, 0, df.index.values[0], [len(df), df[:1]]

            elif df.close[-1] > df.open[-1]:
                if df.close[-1] > df.high[-1] * 0.97:
                    if len(df) > 2 and df.close[-1] > df.close[-2]:
                        return 10, 0, df.index.values[0], [len(df), df[:1]]
                    else:
                        return 11, 0, df.index.values[0], [len(df), df[:1]]
                else:
                    return 9, 0, df.index.values[0], [len(df), df[:1]]
            else:
                if len(df) >= 2:
                    if df.close[-1] > df.close[-2] * 1.01:
                        return 9, 0, df.index.values[0], [len(df), df[:1]]
                    elif df.close[-1] > df.close[-2]:
                        return 8, 0, df.index.values[0], [len(df), df[:1]]
                    elif df.low[-1] > df.low[-2]:
                        return 6, 0, df.index.values[0], [len(df), df[:1]]
                    else:
                        return 3, 0, df.index.values[0], [len(df), df[:1]]
                else:
                    return 1, 0, df.index.values[0], [len(df), df[:1]]
        else:
            return -10, 0, df.index.values[0], [len(df), df[:1]]
    else:
        if ptype == 'high':
            return 13, 1, cct.get_today(), [len(df), df[:1]]
        else:
            return -10, -10, cct.get_today(), [len(df), df[:1]]
Esempio n. 5
0
def get_linear_model_histogram(code,
                               ptype='low',
                               dtype='d',
                               start=None,
                               end=None,
                               vtype='f',
                               filter='n',
                               df=None):
    # 399001','cyb':'zs399006','zxb':'zs399005
    # code = '999999'
    # code = '601608'
    # code = '000002'
    # asset = get_kdate_data(code)['close'].sort_index(ascending=True)
    # df = tdd.get_tdx_Exp_day_to_df(code, 'f').sort_index(ascending=True)
    # ptype='close'
    # if ptype == 'close' or ptype==''
    # ptype=

    if start is not None and filter == 'y':
        if code not in ['999999', '399006', '399001']:
            index_d, dl = tdd.get_duration_Index_date(dt=start)
            log.debug("index_d:%s dl:%s" % (str(index_d), dl))
        else:
            index_d = cct.day8_to_day10(start)
            log.debug("index_d:%s" % (index_d))
        start = tdd.get_duration_price_date(code, ptype='low', dt=index_d)
        log.debug("start:%s" % (start))
    if df is None:
        # df = tdd.get_tdx_append_now_df(code, ptype, start, end).sort_index(ascending=True)
        df = tdd.get_tdx_append_now_df_api(code, start,
                                           end).sort_index(ascending=True)
    if not dtype == 'd':
        df = tdd.get_tdx_stock_period_to_type(df,
                                              dtype).sort_index(ascending=True)
    asset = df[ptype]
    log.info("df:%s" % asset[:1])
    asset = asset.dropna()
    dates = asset.index

    if not code.startswith('999') and not code.startswith('399'):
        # print "code:",code
        if code[:1] in ['5', '6', '9']:
            code2 = '999999'
        elif code[:2] in ['30']:
            # print "cyb"
            code2 = '399006'
        else:
            code2 = '399001'
        df1 = tdd.get_tdx_append_now_df_api(code2, start,
                                            end).sort_index(ascending=True)
        # df1 = tdd.get_tdx_append_now_df(code2, ptype, start, end).sort_index(ascending=True)
        if not dtype == 'd':
            df1 = tdd.get_tdx_stock_period_to_type(
                df1, dtype).sort_index(ascending=True)
            # if len(asset) < len(df1):
            # asset1 = df1.loc[asset.index, ptype]
            # else:
            # asset1 = df1.loc[asset.index, ptype]
        # startv = asset1[:1]
        # asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2))
        # print asset[:1].index[0] , df1[:1].index[0]
        if asset[:1].index[0] > df1[:1].index[0]:
            asset1 = df1.loc[asset.index, ptype]
            startv = asset1[:1]
            asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2))
        else:
            df = df[df.index >= df1.index[0]]
            asset = df[ptype]
            asset = asset.dropna()
            dates = asset.index
            asset1 = df1.loc[df.index, ptype]
            asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2))

    else:
        if code.startswith('399001'):
            code2 = '999999'
        elif code.startswith('399006'):
            code2 = '399005'
        else:
            code2 = '399001'
        df1 = tdd.get_tdx_append_now_df_api(code2, start,
                                            end).sort_index(ascending=True)
        # print df1[:1]
        # df1 = tdd.get_tdx_append_now_df(code2, ptype, start, end).sort_index(ascending=True)
        if not dtype == 'd':
            df1 = tdd.get_tdx_stock_period_to_type(
                df1, dtype).sort_index(ascending=True)
        if len(asset) < len(df1):
            asset1 = df1.loc[asset.index, ptype]
            asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2))
        else:

            df = df[df.index >= df1.index[0]]
            asset = df[ptype]
            asset = asset.dropna()
            dates = asset.index
            asset1 = df1.loc[df.index, ptype]
            asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2))
    # print len(df),len(asset),len(df1),len(asset1)

    if end is not None:
        # print asset[-1:]
        asset = asset[:-1]
        dates = asset.index
        asset1 = asset1[:-1]
        asset1 = asset1.apply(lambda x: round(x / asset1[:1], 2))

    # 画出价格随时间变化的图像
    # _, ax = plt.subplots()
    # fig = plt.figure()
    fig = plt.figure(figsize=(16, 5))
    # fig = plt.figure(figsize=(16, 10), dpi=72)
    # fig.autofmt_xdate() #(no fact)

    # plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
    plt.subplots_adjust(left=0.05,
                        bottom=0.08,
                        right=0.95,
                        top=0.95,
                        wspace=0.15,
                        hspace=0.25)
    # set (gca,'Position',[0,0,512,512])
    # fig.set_size_inches(18.5, 10.5)
    # fig=plt.fig(figsize=(14,8))
    ax1 = fig.add_subplot(121)
    # asset=asset.apply(lambda x:round( x/asset[:1],2))
    ax1.plot(asset)
    # ax1.plot(asset1,'-r', linewidth=2)
    ticks = ax1.get_xticks()
    # start, end = ax1.get_xlim()
    # print start, end, len(asset)
    # print ticks, ticks[:-1]
    # (ticks[:-1] if len(asset) > end else np.append(ticks[:-1], len(asset) - 1))
    ax1.set_xticklabels(
        [dates[int(i)] for i in (np.append(ticks[:-1],
                                           len(asset) - 1))],
        rotation=15)  # Label x-axis with dates
    # 拟合
    X = np.arange(len(asset))
    x = sm.add_constant(X)
    model = regression.linear_model.OLS(asset, x).fit()
    a = model.params[0]
    b = model.params[1]
    # log.info("a:%s b:%s" % (a, b))
    log.info("X:%s a:%s b:%s" % (len(asset), a, b))
    Y_hat = X * b + a

    # 真实值-拟合值,差值最大最小作为价值波动区间
    # 向下平移
    i = (asset.values.T - Y_hat).argmin()
    c_low = X[i] * b + a - asset.values[i]
    Y_hatlow = X * b + a - c_low

    # 向上平移
    i = (asset.values.T - Y_hat).argmax()
    c_high = X[i] * b + a - asset.values[i]
    Y_hathigh = X * b + a - c_high
    plt.plot(X, Y_hat, 'k', alpha=0.9)
    plt.plot(X, Y_hatlow, 'r', alpha=0.9)
    plt.plot(X, Y_hathigh, 'r', alpha=0.9)
    # plt.xlabel('Date', fontsize=12)
    plt.ylabel('Price', fontsize=12)
    plt.title(code + " | " + str(dates[-1])[:11], fontsize=14)
    plt.legend([asset.iat[-1]], fontsize=12, loc=4)
    plt.grid(True)

    # plt.legend([code]);
    # plt.legend([code, 'Value center line', 'Value interval line']);
    # fig=plt.fig()
    # fig.figsize = [14,8]
    scale = 1.1
    zp = zoompan.ZoomPan()
    figZoom = zp.zoom_factory(ax1, base_scale=scale)
    figPan = zp.pan_factory(ax1)

    # 将Y-Y_hat股价偏离中枢线的距离单画出一张图显示,对其边界线之间的区域进行均分,大于0的区间为高估,小于0的区间为低估,0为价值中枢线。
    ax3 = fig.add_subplot(122)
    # distance = (asset.values.T - Y_hat)
    distance = (asset.values.T - Y_hat)[0]
    # if code.startswith('999') or code.startswith('399'):
    if len(asset) > len(df1):
        ax3.plot(asset)
        plt.plot(distance)
        ticks = ax3.get_xticks()
        ax3.set_xticklabels(
            [dates[int(i)] for i in (np.append(ticks[:-1],
                                               len(asset) - 1))],
            rotation=15)
        n = 5
        d = (-c_high + c_low) / n
        c = c_high
        while c <= c_low:
            Y = X * b + a - c
            plt.plot(X, Y - Y_hat, 'r', alpha=0.9)
            c = c + d
        ax3.plot(asset)
        ## plt.xlabel('Date', fontsize=12)
        plt.ylabel('Price-center price', fontsize=14)
        plt.grid(True)
    else:
        as3 = asset.apply(lambda x: round(x / asset[:1], 2))
        ax3.plot(as3)
        ax3.plot(asset1, '-r', linewidth=2)

        # assvol = df.loc[asset.index]['vol']
        # assvol = assvol.apply(lambda x: round(x / assvol[:1], 2))
        # ax3.plot(assvol, '-g', linewidth=2)

        plt.grid(True)
        zp3 = zoompan.ZoomPan()
        figZoom = zp3.zoom_factory(ax3, base_scale=scale)
        figPan = zp3.pan_factory(ax3)
    # plt.title(code, fontsize=14)
    if 'name' in df.columns:
        plt.legend([df.name[-1], df1.name[-1]], loc=0)
    else:
        dm = tdd.get_sina_data_df(code)
        if 'name' in dm.columns:
            cname = dm.name[0]
        else:
            cname = '-'
        # plt.legend([code, code2], loc=0)
        plt.legend([cname, code2], loc=0)
    plt.show(block=False)
Esempio n. 6
0
def get_roll_mean_all(single=True,
                      tdx=False,
                      app=True,
                      duration=100,
                      ma_250_l=1.02,
                      ma_250_h=1.11):
    # df = tdd.search_Tdx_multi_data_duration('tdx_all_df_300', 'all_300', df=None,code_l=code_list, start=start, end=None, freq=None, col=None, index='date')
    block_path = tdd.get_tdx_dir_blocknew() + '060.blk'

    if not app and cct.get_file_size(
            block_path) > 100 and cct.creation_date_duration(block_path) == 0:
        print "It's Today Update"
        return True
    code_list = sina_data.Sina().market('all').index.tolist()
    print "all code:", len(code_list)
    if duration < 300:
        h5_fname = 'tdx_all_df' + '_' + str(300)
        h5_table = 'all' + '_' + str(300)
    else:
        h5_fname = 'tdx_all_df' + '_' + str(900)
        h5_table = 'all' + '_' + str(900)
    # df = tdd.search_Tdx_multi_data_duration('tdx_all_df_300', 'all_300', df=None,code_l=code_list, start='20150501', end=None, freq=None, col=None, index='date')
    df = tdd.search_Tdx_multi_data_duration(h5_fname,
                                            h5_table,
                                            df=None,
                                            code_l=code_list,
                                            start=None,
                                            end=None,
                                            freq=None,
                                            col=None,
                                            index='date')
    # df = tdd.search_Tdx_multi_data_duration(h5_fname, h5_table, df=None,code_l=code_list, start=None, end=None, freq=None, col=None, index='date',tail=1)

    code_uniquelist = df.index.get_level_values('code').unique()

    code_select = code_uniquelist[random.randint(0, len(code_uniquelist) - 1)]
    print round(time.time() - time_s, 2), df.index.get_level_values(
        'code').unique().shape, code_select, df.loc[code_select].shape
    # df.groupby(level=[0]),df.index.get_level_values(0)
    # len(df.index.get_level_values('code').unique())
    # df = df[~df.index.duplicated(keep='first')]
    dfs = df

    def get_groupby_mean_median_close(dfs):

        groupd = dfs.groupby(level=[0])
        df = groupd['close'].agg({'median': 'median', 'mean': 'mean'})
        df['close'] = groupd.tail(1).reset_index().set_index(['code'])['close']
        # dfs['mean'] = groupd['close'].agg('mean')
        # dfs['median'] = groupd['close'].agg('median')

        # dfs = dfs.fillna(0)
        # idx = pd.IndexSlice
        # mask = ( (dfs['mean'] > dfs['median'])
        #         & (dfs['close'] > dfs['mean'])
        #         )
        # df=dfs.loc[idx[mask, :]]

        df = df[(df['mean'] > df['median']) & (df['close'] > df['mean'])]

        # dt_low = None
        # if dl == 1:
        #     dfs = groupd.tail(1)
        #     print("dfs tail1")
        # else:
        #     dl = 30
        #     dindex = tdd.get_tdx_Exp_day_to_df(
        #         '999999', dl=dl).sort_index(ascending=False)
        #     dt = tdd.get_duration_price_date('999999', df=dindex)
        #     dt = dindex[dindex.index >= dt].index.values
        #     dt_low = dt[-1]
        #     dtlen = len(dt) if len(dt) >0 else 1
        #     dfs = groupd.tail(dtlen)
        #     print("dfs tail:%s dt:%s"%(dtlen,dt))
        #     dfs = get_multi_date_duration(dfs,dt[-1])
        return df

    groupd = dfs.groupby(level=[0])

    # rollma = ['5','10','60','100','200']
    # rollma = ['5','10','250']
    if duration < 300:
        rollma = ['10']
    else:
        rollma = ['10', '250']

    rollma.extend([str(duration)])

    # import ipdb;ipdb.set_trace()
    # df.loc['300130'][:2]

    # dfs['mean'] = groupd['close'].agg('mean')
    # dfs['median'] = groupd['close'].agg('median')

    for da in rollma:
        cumdays = int(da)
        dfs['ma%d' % cumdays] = groupd['close'].apply(pd.rolling_mean, cumdays)
        if cumdays == 10:
            dfs['upper'] = dfs['ma%d' % cumdays].apply(lambda x: round(
                (1 + 11.0 / 100) * x, 1))
            dfs['lower'] = dfs['ma%d' % cumdays].apply(lambda x: round(
                (1 - 9.0 / 100) * x, 1))
            dfs['ene'] = map(lambda x, y: round((x + y) / 2, 1), dfs['upper'],
                             dfs['lower'])
        # df['upper'] = map(lambda x: round((1 + 11.0 / 100) * x, 1), df.ma10d)
        # df['lower'] = map(lambda x: round((1 - 9.0 / 100) * x, 1), df.ma10d)
        # df['ene'] = map(lambda x, y: round((x + y) / 2, 1), df.upper, df.lower)
        # dfs['amount%d'%cumdays] = groupd['amount'].apply(pd.rolling_mean, cumdays)
    # df.ix[df.index.levels[0]]
    #df.ix[df.index[len(df.index)-1][0]] #last row
    # dfs = tdd.search_Tdx_multi_data_duration(df=dfs,code_l=code_list, start='20170918', end='20170918', freq=None, col=None, index='date')

    # print dfs[:1],len(dfs)
    # groupd.agg({'low': 'min'})
    # '''idx mask filter'''
    # '''
    dt_low = None
    df_idx = None
    if single:
        dfs = groupd.tail(1)
        print("dfs tail1")
    else:
        dl = 30
        dindex = tdd.get_tdx_Exp_day_to_df('999999',
                                           dl=dl).sort_index(ascending=False)
        dt = tdd.get_duration_price_date('999999', df=dindex)
        dt = dindex[dindex.index >= dt].index.values
        dt_low = dt[-1]
        dtlen = len(dt) if len(dt) > 0 else 1
        dfs = groupd.tail(dtlen)
        # import ipdb;ipdb.set_trace()
        df_idx = get_groupby_mean_median_close(dfs)

        print("dfs tail:%s dt:%s" % (dtlen, dt))
        dfs = get_multi_date_duration(dfs, dt[-1])

        # groupd2 = dfs.groupby(level=[0])
        # dfs['ma%d'%cumdays] = groupd['close'].apply(pd.rolling_mean, cumdays)

        # dfs.reset_index().groupby(['code'])['date'].transform('count')
        single = True

    dfs = dfs.fillna(0)
    idx = pd.IndexSlice
    # mask = (dfs[('ma%s')%(rollma[0])] > dfs[('ma%s')%(rollma[1])]) & (dfs[('ma%s')%(rollma[-1])] > 0) & (dfs[('close')] > dfs[('ma%s')%(rollma[0])])  & (dfs[('close')] > dfs[('ma%s')%(rollma[-1])])
    # mask = (dfs[('ma%s')%(rollma[0])] > dfs[('ma%s')%(rollma[1])]) & (dfs[('ma%s')%(rollma[-1])] > 0) & (dfs[('close')] > dfs[('ma%s')%(rollma[1])])  & (dfs[('close')] > dfs[('ma%s')%(rollma[-1])])
    # mask = (dfs[('ma%s')%(rollma[0])] > dfs[('ma%s')%(rollma[1])]) & (dfs[('ma%s')%(rollma[-1])] > 0) &  (dfs[('close')] > dfs[('ma%s')%(rollma[-1])])

    # mask = ( (dfs[('ma%s')%(rollma[0])] > 0) & (dfs[('ma%s')%(rollma[-1])] > 0) & (dfs[('close')] > dfs[('ma%s')%(rollma[-1])]) & (dfs[('close')] > dfs[('ma%s')%(rollma[0])]))

    # mask = ( (dfs[('ma%s')%(rollma[0])] > 0) & (dfs[('ma%s')%(rollma[-1])] > 0)
    #         & (dfs[('close')] > dfs[('ma%s')%(rollma[-1])]*ma_250_l)
    #         & (dfs[('close')] < dfs[('ma%s')%(rollma[-1])]*ma_250_h)
    #         & (dfs[('close')] > dfs[('ma%s')%(rollma[0])]))

    # & (dfs['mean'] > dfs['median'])
    # & (dfs['close'] > dfs['mean'])

    if len(rollma) > 1:

        mask = ((dfs[('ma%s') % (rollma[0])] > 0) & (dfs[('ma%s') %
                                                         (rollma[-1])] > 0)
                & (dfs[('ma%s') % (rollma[0])] > dfs[('ma%s') % (rollma[-1])])
                & (dfs[('close')] > dfs[('ma%s') % (rollma[0])])
                & (dfs[('close')] > dfs[('ma%s') % (rollma[-1])] * ma_250_h)
                & ((dfs[('close')] > dfs['ene']) |
                   (dfs[('close')] > dfs['upper'])))
    else:
        mask = ((dfs[('ma%s') % (rollma[0])] > 0)
                & (dfs[('close')] > dfs[('ma%s') % (rollma[0])])
                & ((dfs[('close')] > dfs['ene']) |
                   (dfs[('close')] > dfs['upper'])))

    # mask = ((dfs[('close')] > dfs[('ma%s')%(rollma[-1])]))
    df = dfs.loc[idx[mask, :]]
    df = get_multi_code_count(df)
    print(df.couts[:5])

    # import ipdb;ipdb.set_trace()
    # df.sort_values(by='couts',ascending=0)
    # groupd.first()[:2],groupd.last()[:2]
    # groupd = df250.groupby(level=[0])
    # '''
    # groupd.transform(lambda x: x.iloc[-1])
    # groupd.last()
    # groupd.apply(lambda x: x.close > x.ma250)
    # df.shape,df.sort_index(ascending=False)[:5]
    # ?groupd.agg
    # groupd = df.groupby(level=[0])
    # groupd['close'].apply(pd.rolling_mean, 250, min_periods=1)
    #ex:# Group df by df.platoon, then apply a rolling mean lambda function to df.casualties
    # df.groupby('Platoon')['Casualties'].apply(lambda x:x.rolling(center=False,window=2).mean())

    code_uniquelist = df.index.get_level_values('code').unique()
    code_select = code_uniquelist[random.randint(0, len(code_uniquelist) - 1)]

    if app:
        print round(time.time() - time_s, 2), 's', df.index.get_level_values(
            'code').unique().shape, code_select, df.loc[code_select][-1:]

    if single:
        # groupd = df.groupby(level=[0])
        if tdx:
            # block_path = tdd.get_tdx_dir_blocknew() + '060.blk'
            # if cct.get_work_time():
            #     codew = df[df.date == cct.get_today()].index.tolist()

            if dt_low is not None:
                groupd2 = df.groupby(level=[0])
                df = groupd2.tail(1)
                df = df.reset_index().set_index('code')
                # import ipdb;ipdb.set_trace()

                # df = df[(df.date >= dt_low) & (df.date <= cct.get_today())]
                dd = df[(df.date == dt_low)]
                df = df[(df.date >= cct.last_tddate(1))]
                # import ipdb;ipdb.set_trace()
                print("df:%s df_idx:%s" % (len(df), len(df_idx)))

                if df_idx is not None and len(df_idx) > 0:
                    df = df.loc[df_idx.index, :].dropna()
                print("Main Down dd :%s MainUP df:%s couts std:%0.1f " %
                      (len(dd), len(df), df.couts.std()))
                # print df.date.mode()[0]
                df = df.sort_values(by='couts', ascending=1)
                df = df[df.couts > df.couts.std()]
                # df = df[(df.date >= df.date.mode()[0]) & (df.date <= cct.get_today())]
                codew = df.index.tolist()

                if app:
                    print round(time.time() - time_s, 2), 'groupd2', len(df)

            else:
                df = df.reset_index().set_index('code')
                df = df[(df.date >= cct.last_tddate(days=10))
                        & (df.date <= cct.get_today())]
                codew = df.index.tolist()

            top_temp = tdd.get_sina_datadf_cnamedf(codew, df)
            top_temp = top_temp[(~top_temp.index.str.contains('688'))
                                & (~top_temp.name.str.contains('ST'))]
            codew = top_temp.index.tolist()

            #clean st and 688

            if app:
                hdf5_wri = cct.cct_raw_input("rewrite code [Y] or append [N]:")
                if hdf5_wri == 'y' or hdf5_wri == 'Y':
                    append_status = False
                else:
                    append_status = True
            else:
                append_status = False

            if len(codew) > 10:
                cct.write_to_blocknew(block_path,
                                      codew,
                                      append_status,
                                      doubleFile=False,
                                      keep_last=0)
                print "write:%s block_path:%s" % (len(codew), block_path)
            else:
                print "write error:%s block_path:%s" % (len(codew), block_path)

        # df['date'] = df['date'].apply(lambda x:(x.replace('-','')))
        # df['date'] = df['date'].astype(int)
        # print df.loc[code_select].T,df.shape
        MultiIndex = False
    else:
        MultiIndex = True
    h5a.write_hdf_db('all300',
                     df,
                     table='roll200',
                     index=False,
                     baseCount=500,
                     append=False,
                     MultiIndex=MultiIndex)
    return df
Esempio n. 7
0
def get_linear_model_status(code, ptype="f", df=None, dtype="d", type="m", start=None, end=None, days=1, filter="y"):
    if start is not None and filter == "y":
        if code not in ["999999", "399006", "399001"]:
            index_d, dl = tdd.get_duration_Index_date(dt=start)
            log.debug("index_d:%s dl:%s" % (str(index_d), dl))
        else:
            index_d = cct.day8_to_day10(start)
            log.debug("index_d:%s" % (index_d))
        start = tdd.get_duration_price_date(code, ptype="low", dt=index_d)
        log.debug("start: %s" % (start))
    if df is None:
        # df = tdd.get_tdx_append_now_df(code,ptype, start, end).sort_index(ascending=True)
        df = tdd.get_tdx_append_now_df_api(code, ptype, start, end).sort_index(ascending=True)
    log.info("Code:%s start:%s end:%s" % (code, start, df[-1:].index.values[0]))
    if not dtype == "d":
        df = tdd.get_tdx_stock_period_to_type(df, dtype).sort_index(ascending=True)
    # df = tdd.get_tdx_Exp_day_to_df(code, 'f').sort_index(ascending=True)
    def get_linear_model_ratio(asset):
        log.info("asset:%s" % asset[-1:])
        duration = asset[-1:].index.values[0]
        log.debug("duration:%s" % duration)
        log.debug("duration:%s" % cct.get_today_duration(duration))
        # log.debug("duration:%s"%cct.get_duration_date(duration))
        asset = asset.dropna()
        X = np.arange(len(asset))
        x = sm.add_constant(X)
        model = regression.linear_model.OLS(asset, x).fit()
        a = model.params[0]
        b = model.params[1]
        log.info("X:%s a:%0.1f b:%0.1f" % (len(asset), a, b))
        Y = np.append(X, X[-1] + int(days))
        log.debug("X:%s Y:%s" % (X[-1], Y[-1]))
        # print ("X:%s" % (X[-1]))
        Y_hat = X * b + a
        # Y_hat_t = Y * b + a
        # log.info("Y_hat:%s " % (Y_hat))
        # log.info("asset:%s " % (asset.values))
        ratio = b / a * 100
        operation = 0
        if Y_hat[-1] > Y_hat[1]:
            log.debug("u-Y_hat[-1]:%0.1f" % (Y_hat[-1]))
            log.debug("price:%0.1f" % asset.iat[-1])
            log.debug("u:%0.1f" % Y_hat[1])
            log.debug("price:%0.1f" % asset.iat[1])
            if type.upper() == "M":
                Y_Future = Y * b + a
                # ratio = b/a*100
                log.info("ratio: %0.1f %0.1f Y_Mid: %0.1f" % (b, ratio, Y_Future[-1]))
                # diff = asset.iat[-1] - Y_hat[-1]
                # if diff > 0:
                # return True, len(asset), diff
                # else:
                # return False, len(asset), diff
            elif type.upper() == "L":
                i = (asset.values.T - Y_hat).argmin()
                c_low = X[i] * b + a - asset.values[i]
                # Y_hatlow = X * b + a - c_low
                Y_Future = Y * b + a - c_low
                log.info("b: %0.1f ratio:%0.1f Y_Mid: %0.1f" % (b, ratio, Y_Future[-1]))
                # diff = asset.iat[-1] - Y_hatlow[-1]
                # if asset.iat[-1] - Y_hatlow[-1] > 0:
                # return True, len(asset), diff
                # else:
                # return False, len(asset), diff
            elif type.upper() == "H":
                i = (asset.values.T - Y_hat).argmax()
                c_high = X[i] * b + a - asset.values[i]
                # Y_hathigh = X * b + a - c_high
                Y_Future = Y * b + a - c_high
                log.info("ratio: %0.1f %0.1f Y_Mid: %0.1f" % (b, ratio, Y_Future[-1]))
            diff = asset[-1] - Y_Future[-1]
            # print ("as:%s Y:%s"%(asset[-1] ,Y_Future[-1]))
            if diff > 0:
                operation += 1
                log.info("UP !!%s Y_Future: %0.1f b:%0.1f ratio:%0.1f " % (type.upper(), Y_Future[-1], b, ratio))
            else:
                log.info("Down %s Y_Future: %0.1f b:%0.1f ratio:%0.1f" % (type.upper(), Y_Future[-1], b, ratio))
            return operation, ratio
        else:
            log.debug("down !!! d:%s" % Y_hat[1])
            print ("down !!! d:%s" % Y_hat[1])
            return 0, 0

    # print "high:",
    operationcount = 0
    ratio_l = []
    for co in ["high", "close", "low"]:
        op, ratio = get_linear_model_ratio(df[co])
        ratio_l.append(round(ratio, 2))
        operationcount += op
    log.info("op:%s min:%s ratio_l:%s" % (operationcount, min(ratio_l), ratio_l))
    return operationcount, ratio.min()