Exemple #1
0
def get_(folder):
    # step 2 因子数据预处理
    # 加载dataview数据集
    dv = DataView()
    dataview_folder = folder
    dv.load_dataview(dataview_folder)
    # 定义信号过滤条件-非指数成分
    # df_index_member = dv.get_ts('index_member')

    signal = dv.get_ts("pb")
    price = dv.get_ts("close_adj")
    group = dv.get_ts("sw1")

    # mask
    mask = dv.get_ts('index_member') == 0  # 定义信号过滤条件-非指数成分

    # 定义可买入卖出条件——未停牌、未涨跌停
    trade_status = dv.get_ts('trade_status')
    can_trade = trade_status == 1  # 可以交易
    # 涨停
    up_limit = dv.add_formula(
        'up_limit',
        '(open - Delay(close, 1)) / Delay(close, 1) > 0.095',
        is_quarterly=False)
    # 跌停
    down_limit = dv.add_formula(
        'down_limit',
        '(open - Delay(close, 1)) / Delay(close, 1) < -0.095',
        is_quarterly=False)
    can_enter = np.logical_and(up_limit < 1, can_trade)  # 未涨停未停牌
    can_exit = np.logical_and(down_limit < 1, can_trade)  # 未跌停未停牌
    benchmark = dv.data_benchmark
    return signal, price, mask, group, can_enter, can_exit, benchmark
Exemple #2
0
def test_q_add_formula():
    dv = DataView()
    folder_path = '../output/prepared/20160609_20170601_freq=1D'
    dv.load_dataview(folder_path=quarterly_path)
    nrows, ncols = dv.data_d.shape
    n_securities = len(dv.data_d.columns.levels[0])

    formula = 'total_oper_rev / close'
    dv.add_formula('myvar1', formula, is_quarterly=False, add_data=True)
    df1 = dv.get_ts('myvar1')
    assert not df1.empty

    formula2 = 'Delta(oper_exp * myvar1 - open, 3)'
    dv.add_formula('myvar2', formula2, is_quarterly=False, add_data=True)
    df2 = dv.get_ts('myvar2')
    assert not df2.empty
Exemple #3
0
def test_load():
    dv = DataView()
    dv.load_dataview(folder_path=daily_path)

    assert dv.start_date == 20160601 and set(dv.symbol) == set(
        '000001.SZ,600030.SH,000063.SZ'.split(','))

    # test get_snapshot
    snap1 = dv.get_snapshot(20170504,
                            symbol='600030.SH,000063.SZ',
                            fields='close,pb')
    assert snap1.shape == (2, 2)
    assert set(snap1.columns.values) == {'close', 'pb'}
    assert set(snap1.index.values) == {'600030.SH', '000063.SZ'}

    # test get_ts
    ts1 = dv.get_ts('close',
                    symbol='600030.SH,000063.SZ',
                    start_date=20170101,
                    end_date=20170302)
    assert ts1.shape == (38, 2)
    assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'}
    assert ts1.index.values[-1] == 20170302
        'freq': 1
    }

    dv.init_from_config(props, ds)
    dv.prepare_data()
    dv.save_dataview(dataview_folder)  # 保存数据文件到指定路径,方便下次直接加载


save_dataview()

# 加载数据
dv = DataView()
dv.load_dataview(dataview_folder)

print(dv.fields)  #查看dv中取得的数据
print(dv.get_ts("pb").head())  #查看dv中取得的市净率

#3_因子分析
#step1:因子收集及处理
import pandas as pd
from datetime import datetime

factor = dv.get_ts("pb")
factor.index = pd.Index(
    map(lambda x: datetime.strptime(str(x), "%Y%m%d"), factor.index))
# 改时间索引: 将整数类型的日期转成datetime的格式的日期datetime.strptime  (注意pd.Index,I要大写)
factor = factor.stack()  #修改成Mutiindex格式(Alphalen因子分析必要) 得到一窜Series


#这里先定义一个函数,方便下次的转换
def change_index(df):
#1_初始化
from jaqs_fxdayu.data import DataView
import warnings

warnings.filterwarnings("ignore")
dataview_folder = 'G:/data/hs300_2'
dv = DataView()
dv.load_dataview(dataview_folder)
dv.add_formula("momentum",
               "Return(close_adj, 20)",
               is_quarterly=False,
               add_data=True)  #直接使用内置的函数,添加新因子,可能之前已经添加过了
dv.get_ts("momentum").head()
print(dv.fields)  #查看dv中取得的数据

import numpy as np


#定义过滤条件
def mask_index_member():
    df_index_member = dv.get_ts('index_member')
    mask_index_member = ~(df_index_member > 0)  #定义信号过滤条件-非指数成分
    return mask_index_member


def limit_up_down():
    # 定义可买卖条件——未停牌、未涨跌停
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == u'停牌'
    # 涨停
    dv.add_formula('up_limit',
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status != 1
    # 涨停
    dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False, add_data=True)
    # 跌停
    dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False, add_data=True)
    can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌
    can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌
    return can_enter,can_exit

mask = mask_index_member()
can_enter,can_exit = limit_up_down()

print(mask.shape)
print(can_enter.shape)
print(dv.get_ts('close').shape)

from jaqs_fxdayu.research.signaldigger import multi_factor

ic = dict()
factors_dict = {signal:dv.get_ts(signal) for signal in FactorList}
Period=[20]
for period in Period:
    ic[period]=multi_factor.get_factors_ic_df(factors_dict,
                                              price=dv.get_ts("close_adj"),
                                              high=dv.get_ts("high_adj"), # 可为空
                                              low=dv.get_ts("low_adj"),# 可为空
                                              n_quantiles=5,# quantile分类数
                                              mask=mask,# 过滤条件
                                              can_enter=can_enter,# 是否能进场
                                              can_exit=can_exit,# 是否能出场
Exemple #7
0
FactorList = set({})
for i in range(df.loc[:, :, 'name'].T.values.shape[0]):
    FactorList |= set(df.loc[:, :, 'name'].T.values[i])
FactorList = list(FactorList)

i = 0
for name in FactorList:
    dv.add_field(name)
    i = i + 1
    print('导入进度%s' % (i / len(FactorList)))
    print('总体进度%s' % (i / (3 * len(FactorList))))

alpha_signal = list(set(dv.fields) & set(FactorList))
dv.add_field('sw1')

sw1 = dv.get_ts('sw1')
dict_classify = {
    '480000': '银行',
    '430000': '房地产',
    '460000': '休闲服务',
    '640000': '机械设备',
    '240000': '有色金属',
    '510000': '综合',
    '410000': '公用事业',
    '450000': '商业贸易',
    '730000': '通信',
    '330000': '家用电器',
    '720000': '传媒',
    '630000': '电气设备',
    '270000': '电子',
    '490000': '非银金融',
jaqs_fxdayu.patch_all()
from jaqs_fxdayu.data import DataView
from jaqs_fxdayu.data import RemoteDataService
import os
import numpy as np
import warnings

warnings.filterwarnings("ignore")

dv = DataView()
dataview_folder = 'G:/data/hs300'
dv.load_dataview(dataview_folder)

#2_因子绩效 例子以roe_pb为因子
factor = dv.get_ts('roe_pb')  #之前自定义做的roe/pb的因子(需要先运行前面第二节roe_pb)
print(factor.tail())

#读取数据:
mask = dv.get_ts('mask_index_member')  #是否在指数成分里
can_enter = dv.get_ts('can_enter')  #能否买入
can_exit = dv.get_ts('can_exit')  #能否卖出
price = dv.get_ts('close_adj')  #价格
group = dv.get_ts('group')  #分类信息

print(can_enter.shape)
print(group.shape)

#定义函数:
import matplotlib.pyplot as plt
from jaqs.research import SignalDigger  #jaqs.research自带的挖掘器SignalDigger
Exemple #9
0
dv = DataView()
ds = LocalDataService(fp=dataview_folder)

dv_props = {'start_date': start, 'end_date': end, 'symbol':','.join(stock_symbol),
         'fields': check_factor,
         'freq': 1,
         "prepare_fields": True}

dv.init_from_config(dv_props, data_api=ds)
dv.prepare_data()

dv.init_from_config(dv_props, data_api=ds)
dv.prepare_data()
dv.add_field('sw1')

sw1 = dv.get_ts('sw1')
dict_classify = {'480000': '银行', '430000': '房地产', '460000': '休闲服务', '640000': '机械设备', '240000': '有色金属', '510000': '综合', '410000': '公用事业', '450000': '商业贸易', '730000': '通信', '330000': '家用电器', '720000': '传媒', '630000': '电气设备', '270000': '电子', '490000': '非银金融', '370000': '医药生物', '710000': '计算机', '280000': '汽车', '340000': '食品饮料', '220000': '化工', '210000': '采掘', '230000': '钢铁', '650000': '国防军工', '110000': '农林牧渔', '420000': '交通运输', '620000': '建筑装饰', '350000': '纺织服装', '610000': '建筑材料', '360000': '轻工制造'}

sw1_name = sw1.replace(dict_classify)
sw1_name.tail()

dv.add_field('close',ds)
dv.add_field('high',ds)
dv.add_field('low',ds)
dv.add_field('turnover',ds)
dv.add_field('turnover_ratio',ds)
dv.add_field('price_div_dps',ds)
dv.add_field('oper_rev',ds)
dv.add_field('roa',ds)
dv.add_field('total_share',ds)
dv.add_field('pe_ttm',ds)
Exemple #10
0
from jaqs_fxdayu.data import DataView
import warnings

warnings.filterwarnings("ignore")
dataview_folder = '../Factor'
dv = DataView()
dv.load_dataview(dataview_folder)

from jaqs_fxdayu.research.signaldigger import process

Open = dv.get_ts("open_adj")
High = dv.get_ts("high_adj")
Low = dv.get_ts("low_adj")
Close = dv.get_ts("close_adj")
trade_status = dv.get_ts('trade_status')
mask_sus = trade_status == 0
# 剔除掉停牌期的数据 再计算指标
open_masked = process._mask_df(Open, mask=mask_sus)
high_masked = process._mask_df(High, mask=mask_sus)
low_masked = process._mask_df(Low, mask=mask_sus)
close_masked = process._mask_df(Close, mask=mask_sus)

from jaqs_fxdayu.data import signal_function_mod as sfm
cci = sfm.ta(ta_method='CCI',
             ta_column=0,
             Open=open_masked,
             High=high_masked,
             Low=low_masked,
             Close=close_masked,
             Volume=None)
dv.append_df(cci, 'CCI')
Exemple #11
0
        'freq': 1,
        'timeout': 180
    }

    dv.init_from_config(props, ds)
    dv.prepare_data()
    dv.save_dataview(dataview_folder)  # 保存数据文件到指定路径,方便下次直接加载


save_dataview()
# 加载数据

dv = DataView()
dv.load_dataview(dataview_folder)

factor = dv.get_ts("ps")
factor.index = pd.Index(
    map(lambda x: datetime.strptime(str(x), "%Y%m%d"),
        factor.index))  #索引调整为datetime日期格式
factor = factor.stack()  #处理成MultiIndex格式(alphalens分析因子必须的格式)

print(factor.head())


def change_index(df):
    df.index = pd.Index(
        map(lambda x: datetime.strptime(str(x), "%Y%m%d"),
            df.index))  #索引调整为datetime日期格式
    return df

Exemple #12
0
        'timeout': 180
    }

    dv.init_from_config(props, ds)
    dv.prepare_data()
    dv.save_dataview(dataview_folder)  # 保存数据文件到指定路径,方便下次直接加载


save_dataview()

# 加载数据

dv = DataView()
dv.load_dataview(dataview_folder)

print(dv.get_ts("pe").head())

import numpy as np


#定义信号过滤条件-非指数成分
def mask_index_member():
    df_index_member = dv.get_ts('index_member')
    mask_index_member = df_index_member == 0
    return mask_index_member


# 定义可买卖条件——未停牌、未涨跌停
def limit_up_down():
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == 0
Exemple #13
0
import numpy as np
import warnings

warnings.filterwarnings("ignore")

dataview_folder = 'G:/data/hs300_2'  #档案地址
dv = DataView()
dv.load_dataview(dataview_folder)  #加载档案地址,结果出现Dataview loaded successfully则成功

print(dv.fields)  #查看dv中取得的数据

#2_过滤停牌涨跌停(可买可卖) mask_index_member为要过滤的为True,can_enter与can_exit皆为可交易为True
from jaqs_fxdayu.util import dp
from jaqs.data.dataapi import DataApi

A = dv.get_ts('index_member')  #得到一张表,1表示在指数成分里,0表示不在指数成分里
B = dv.get_ts('trade_status')  #得到一张表,从中可以得出股票能否交易(或停牌)


def mask_index():
    df_index_member = dv.get_ts('index_member')
    mask_index_member = df_index_member == 0  #定义信号过滤条件-非指数成分,若df_index_member==0则mask_index_member=true
    return mask_index_member


def limit_up_down():
    # 定义可买卖条件——未停牌、未涨跌停
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == u'停牌'
    # 涨停:
    dv.remove_field('up_limit')
:param high: dataFrame (N) 最高价 用于计算上行收益空间 
:param low: dataFrame (N) 最低价 用于计算下行收益空间 
:param benchmark_price: dataFrame (N) 基准价格 若不为空收益计算模式为相对benchmark的收益 
:param period: int (5) 选股持有期 
:param n_quantiles: int (5) 
:param mask: 过滤条件 dataFrame (N) 
:param can_enter: dataFrame (N) 是否能进场 
:param can_exit: dataFrame (N) 是否能出场 
:param forward: bool(True) 是否forward return 
:param commission: float(0.0008) 手续费率 
:param is_event: bool(False) 是否是事件(0/1因子) 
:param is_quarterly: bool(False) 是否是季度因子 
'''

#step1:因子参数优化
price = dv.get_ts('close_adj')
high = dv.get_ts('high_adj')
low = dv.get_ts('low_adj')
price_bench = dv.data_benchmark #指数收盘价
optimizer = Optimizer(dataview=dv,
                      formula='- Correlation(vwap_adj, volume, LEN)', #formula:这里是需要优化的因子,
                      #这里的定义是成交量加权的平均价格与成交量的相关关系的负值;LEN是参数,需要优化,表示时间长度,
                      params={"LEN":range(2,15,1)}, #设置时间长度,这里认为这些时间长度都有可能,我们就是要找到哪些参数是优秀的
                      name='divert', #因子的名称
                      price=price,
                      high=high,
                      low=low,
                      benchmark_price=None, #=None求绝对收益 #=price_bench求相对收益
                      period=30,
                      n_quantiles=5,
                      mask=mask,
Exemple #15
0
dv.init_from_config(zz800_props, ds)
dv.load_dataview(dataview_folder)

import talib as ta
import numpy as np
import pandas as pd
from datetime import datetime


def change_time(df):
    df.index = pd.Index(
        map(lambda x: datetime.strptime(str(x), "%Y%m%d"), df.index))
    return df


EMA = dv.get_ts('close').ewm(span=5, adjust=False).mean()
EMA2 = EMA.ewm(span=5, adjust=False).mean()
EMA3 = EMA2.ewm(span=5, adjust=False).mean()
dv.append_df(EMA3, 'EMA3')
dv.add_formula('a',
               'close/Delay(close,1)-1',
               is_quarterly=False,
               add_data=True)
dv.add_formula('b',
               '(close/Delay(EMA3,19))^(1/20)-1',
               is_quarterly=False,
               add_data=True)
Factor1 = dv.add_formula(
    'Factor1',
    '-Log((Ts_Sum(If(a>b,1,0),20)-1)*Ts_Sum(If(a<b,(a-b)^2,0),20)/(Ts_Sum(If(a<b,1,0),20))*Ts_Sum(If(a>b,(a-b)^2,0),20))',
    is_quarterly=False,
Exemple #16
0
from jaqs_fxdayu.data import DataView
import warnings

warnings.filterwarnings("ignore")
dataview_folder = '../Factor'
dv = DataView()
dv.load_dataview(dataview_folder)

dv.add_formula("Divert", "Corr(volume,close_adj,20)",
               is_quarterly=False).head()

# 添加到数据集dv里,则计算结果之后可以反复调用
dv.add_formula("Divert",
               "Corr(volume,close_adj,20)",
               is_quarterly=False,
               add_data=True)
dv.get_ts("Divert").head()
print(group2_code.tail()
      )  #value="industry1_code"表示以industry2_name为分类标准,不过返回的code类型(类别代号)

group3 = dp.daily_sec_industry(api,
                               symbols,
                               start,
                               end,
                               source='zz',
                               value="industry1_name")
print(group3.tail())  #source='zz'表示以中证为分类标准,只是分类标准不同而已

group3_code = dp.daily_sec_industry(api,
                                    symbols,
                                    start,
                                    end,
                                    source='zz',
                                    value="industry1_code")
print(group3_code.tail()
      )  #source='zz'表示以中证为分类标准,只是分类标准不同而已,industry1_code返回的是类别代号(code类型)

#3_添加数据保存
dv = DataView()
dataview_folder = 'G:/data/hs300'  #档案地址
dv.load_dataview(dataview_folder)  #加载档案地址
dv.append_df(group, 'group')  #将group列加入dv中,后面的'group'为列名
dv.save_dataview('G:/data/hs300')  #保存

#获取数据
print(dv.get_ts('group').tail())
A = dv.get_ts('group')  #这样看的更清楚点
Exemple #18
0
        'fields': "volume,pb,pe,ps,float_mv,sw1",
        'freq': 1
    }

    dv.init_from_config(props, ds)
    dv.prepare_data()
    dv.save_dataview(dataview_folder)  # 保存数据文件到指定路径,方便下次直接加载


save_dataview()

# 加载数据
dv = DataView()
dv.load_dataview(dataview_folder)  #加载数据
print(dv.fields)  #查看dv中取得的数据
print(dv.get_ts("pb").head())  #查看dv中取得的市净率

import numpy as np


#定义信号过滤条件-非指数成分
def mask_index_member():
    df_index_member = dv.get_ts(
        'index_member')  #A=dv.get_ts('index_member'),1表示在沪深300中,0表示不在沪深300中
    mask_index_member = df_index_member == 0  #过滤
    return mask_index_member


# 定义可买卖条件——未停牌、未涨跌停
def limit_up_down():
    trade_status = dv.get_ts('trade_status')