Esempio n. 1
0
def get_sd(id, label_start_date, fea_delta = 240):
    # delta 指的是 确定label;的时候 是一个星期的涨幅还是什么
    # fea delta 指的是, 我要准备多久的数据
    ## 获取分笔数据
    # get all time ticks
    # 创建一个日期列表
    ticks = None
    date_list = dt_tool.dt_range(label_start_date, -fea_delta)
    for date in date_list:
        try:
            tick = ts.get_tick_data(id, date)
        except Exception, e:
            print e
            continue
        if tick is None:
            continue
        tick.type = tick.type.apply(lambda x : type2id(x))
        ft = tick.sort('amount', ascending=False).head(10).reset_index().drop(['index', 'time', 'change'], 1).stack().reset_index()
        ft.index = ft.level_0.map(str) + '_' + ft.level_1
        fT = ft.drop(['level_0', 'level_1'], 1).T
        fT['date'] = dt_tool.format(date)
        if ticks is None:
            ticks = fT
        else:
            ticks = ticks.append(fT)
        ticks.to_csv('data/ticks.csv', index=None)
Esempio n. 2
0
def get_tick_feature(id, end_str, delta):
    if isinstance(end_str, str):
        end = dt.datetime.strptime(end_str, '%Y-%m-%d')
    date_list = [end - dt.timedelta(days=x) for x in range(0, 90)]
    ticks = None
    for date in date_list:
        date_str = dt.datetime.strftime(date, '%Y-%m-%d')
        try:
            tick = ts.get_tick_data(id, date_str)
        except:
            continue
        if tick is None:
            continue
        ft = tick.sort('amount', ascending=False).head(10).reset_index().drop(['index', 'time'], 1).stack().reset_index()
        ft['fea'] = ft.level_0.map(str) + '_' + ft.level_1
        fea_pd = ft.drop(['level_0', 'level_1'], 1)
        fea_pd.index = fea_pd.fea
        fT = fea_pd.T
        fT['date'] = dt_tool.format(date_str)

        if ticks is None:
            ticks = fT
        else:
            ticks = ticks.append(fT)
    return ticks
Esempio n. 3
0
def get_sd(id):
    try:
        hist = ts.get_h_data(id, autype='qfq', start='2005-06-10')
    except:
        return None
    ls = hist.index.format()
    hist.index = [ dt_tool.format(x) for x in ls]
    fea_tick = get_tick_feature(id, '2015-06-10', 1500)
    print 'fea', fea_tick.columns
    print 'hist', hist.columns
    hist.to_csv('hist.csv')
    fea_tick.to_csv('tick.csv')
    h = hist.merge(fea_tick, how='left', left_index=True, right_on='date')
    h.to_csv('merged.csv')
    s = h.stack()
    spd = pd.DataFrame(s)
    sd = spd.reset_index()
    sd.columns = ('date', 'type', 'value')
    return sd
Esempio n. 4
0
        fT['date'] = dt_tool.format(date)
        if ticks is None:
            ticks = fT
        else:
            ticks = ticks.append(fT)
        ticks.to_csv('data/ticks.csv', index=None)
    
    # 获取历史数据矩阵
    # 将数据stack为series, 方便处理
    try:
        #hist = ts.get_h_data(id, autype='qfq', start='2005-06-10')
        hist = ts.get_hist_data(id)
    except Exception, e:
        print e
    ls = hist.index.format()
    hist['date'] = [dt_tool.format(x) for x in ls]
    hist.to_csv('data/hist.csv', index=None)

    if hist is None or ticks is None:
        return None
   
    # 聚合两份数据
    h = hist.merge(ticks, how='left',  on='date')
    h.to_csv('data/merged.csv', index=None)
    h.index = h.date
    h = h.drop('date', 1)
    s = h.stack()
    spd = pd.DataFrame(s)
    sd = spd.reset_index()
    sd.columns = ('date', 'type', 'value')
    sd.to_csv('data/sd.csv', index=None)