def fundfilter(start_date, end_date): funddf = data.fund_value(start_date, end_date) indexdf = data.index_value(start_date, end_date, '000300.SH') #按照规模过滤 scale_data = sf.scalefilter(2.0 / 3) #按照基金创立时间过滤 setuptime_data = sf.fundsetuptimefilter(funddf.columns, start_date, data.establish_data()) #按照jensen测度过滤 jensen_data = sf.jensenfilter(funddf, indexdf, rf, 0.5) #按照索提诺比率过滤 sortino_data = sf.sortinofilter(funddf, rf, 0.5) #按照ppw测度过滤 ppw_data = sf.ppwfilter(funddf, indexdf, rf, 0.5) #print ppw_data stability_data = sf.stabilityfilter(funddf, 2.0 / 3) #print stability_data #print 'jensen' jensen_dict = {} for k,v in jensen_data: jensen_dict[k] = v #print k, v #print #print 'sortino' sortino_dict = {} for k,v in sortino_data: sortino_dict[k] = v #print k,v #print #print 'ppw' ppw_dict = {} for k,v in ppw_data: ppw_dict[k] = v #print k,v #print #print 'statbility' stability_dict = {} for k,v in stability_data: stability_dict[k] = v #print k,v ''' codes = list(jensen_dict.keys()) codes.sort() jensen_array = [] sortino_array = [] ppw_array = [] stability_array = [] for code in codes: jensen_array.append(jensen_dict[code] if jensen_dict.has_key(code) else 0) sortino_array.append(sortino_dict[code] if sortino_dict.has_key(code) else 0) ppw_array.append(ppw_dict[code] if ppw_dict.has_key(code) else 0) stability_array.append(stability_dict[code] if stability_dict.has_key(code) else 0) indicators = {'code':codes, 'jensen':jensen_array, 'sortino':sortino_array, 'ppw':ppw_array,'stability':stability_array} frame = pd.DataFrame(indicators) frame.to_csv('./wind/fund_indicator.csv') ''' scale_set = set() for k, v in scale_data: scale_set.add(k) setuptime_set = set(setuptime_data) jensen_set = set() for k, v in jensen_data: jensen_set.add(k) sortino_set = set() for k, v in sortino_data: sortino_set.add(k) ppw_set = set() for k, v in ppw_data: ppw_set.add(k) stability_set = set() for k, v in stability_data: stability_set.add(k) codes = [] for code in scale_set: if (code in setuptime_set) and (code in jensen_set) and (code in sortino_set) and (code in ppw_set) and (code in stability_set): codes.append(code) #按照业绩持续性过滤 #stability_data = sf.stabilityfilter(funddf[codes], 2.0 / 3) #print stability_data #codes = [] #for k, v in stability_data: # codes.append(k) return codes
def tagstockfund(start_date, end_date, codes): funddf = data.fund_value(start_date, end_date) funddf = funddf[codes] capindexdf = data.index_value(start_date, end_date, ['399314.SZ', '399316.SZ']) largecapindexdf = data.index_value(start_date, end_date, ['399314.SZ']) smallcapindexdf = data.index_value(start_date, end_date, ['399316.SZ']) hs300indexdf = data.index_value(start_date, end_date, ['000300.SH']) growthvalueindexdf = data.index_value(start_date, end_date, ['399372.SZ', '399373.SZ', '399376.SZ', '399377.SZ']) positiondf = data.fund_position(start_date, end_date) columns = set(positiondf.columns) tmp_codes = [] for code in codes: if code in columns: tmp_codes.append(code) codes = tmp_codes positiondf = positiondf[codes] largecapfitness_result = largecapfitness(funddf, capindexdf, 0.5) smallcapfitness_result = smallcapfitness(funddf, capindexdf, 0.5) risefitness_result = risefitness(funddf, hs300indexdf, 0.5) declinefitness_result = declinefitness(funddf, hs300indexdf, 0.5) oscillationfitness_result = oscillationfitness(funddf, hs300indexdf, 0.5) growthfitness_result = growthfitness(funddf, growthvalueindexdf, 0.5) valuefitness_result = valuefitness(funddf, growthvalueindexdf, 0.5) positionprefer_result = positionprefer(positiondf, 0.5) largecapprefer_result = largecapprefer(funddf, largecapindexdf, 0.5) smallcapprefer_result = smallcapprefer(funddf, smallcapindexdf, 0.5) growthcapprefer_result = growthcapprefer(funddf, growthvalueindexdf, 0.5) valuecapprefer_result = valuecapprefer(funddf, growthvalueindexdf, 0.5) #print 'largecap' largecapfitness_set = set() for k,v in largecapfitness_result: largecapfitness_set.add(k) #print k, v #print #print 'smallcap' smallcapfitness_set = set() for k,v in smallcapfitness_result: smallcapfitness_set.add(k) #print k, v #print #print 'rise' risefitness_set = set() for k,v in risefitness_result: risefitness_set.add(k) #print k, v #print declinefitness_set = set() #print 'decline' for k,v in declinefitness_result: declinefitness_set.add(k) #print k, v #print 'oscillation' oscillation_set = set() for k,v in oscillationfitness_result: oscillation_set.add(k) #print k, v #print #print 'growth' growthfitness_set = set() for k,v in growthfitness_result: growthfitness_set.add(k) #print k, v #print #print 'value' valuefitness_set = set() for k,v in valuefitness_result: valuefitness_set.add(k) #print k, v #print #print 'positionprefer' positionprefer_set = set() for k,v in positionprefer_result: positionprefer_set.add(k) #print k, v #print #print 'largecapprefer' largecapprefer_set = set() for k, v in largecapprefer_result: largecapprefer_set.add(k) #print k, v #print #print 'smallcapprefer' smallcapprefer_set = set() for k, v in smallcapprefer_result: smallcapprefer_set.add(k) #print k, v #print largecapfitness #print #print 'grwothcapprefer' growthcapprefer_set = set() for k, v in growthcapprefer_result: growthcapprefer_set.add(k) #print k, v #print #print 'valuecapprefer' valuecapprefer_set = set() for k, v in valuecapprefer_result: valuecapprefer_set.add(k) #print k, v final_codes = set() #print #print 'rise' for code in positionprefer_set: if code in risefitness_set: #print code final_codes.add(code) #print #print 'largecap' for code in largecapprefer_set: if code in largecapfitness_set: #print code final_codes.add(code) #print #print 'smallcap' for code in smallcapprefer_set: if code in smallcapfitness_set: #print code final_codes.add(code) #print #print 'growth' for code in growthcapprefer_set: if code in growthfitness_set: #print code final_codes.add(code) #print #print 'value' for code in valuecapprefer_set: if code in valuefitness_set: #print code final_codes.add(code) #print #print len(final_codes) #print final_codes funddf = funddf[list(final_codes)] #print #print 'tm' #print tmmeasure(funddf, hs300indexdf) #print #print 'hm' #print hmmeasure(funddf, hs300indexdf) codes = list(final_codes) funddf = funddf[codes] #funddf = data.fund_value(start_date, end_date) #funddf = funddf[codes] #funds = set() fund_tags = {} #print 'large' codes = [] for code in largecapfitness_set: if code in final_codes: codes.append(code) fund_tags['largecap'] = codes codes = [] for code in smallcapfitness_set: if code in final_codes: codes.append(code) fund_tags['smallcap'] = codes codes = [] for code in risefitness_set: if code in final_codes: codes.append(code) fund_tags['risefitness'] = codes codes = [] for code in declinefitness_set: if code in final_codes: codes.append(code) fund_tags['declinefitness'] = codes codes = [] for code in oscillation_set: if code in final_codes: codes.append(code) fund_tags['oscillationfitness'] = codes codes = [] for code in growthfitness_set: if code in final_codes: codes.append(code) fund_tags['growthfitness'] = codes codes = [] for code in valuefitness_set: if code in final_codes: codes.append(code) fund_tags['valuefitness'] = codes return list(final_codes) , fund_tags
def stockfundfilter(start_date, end_date): indicator = {} funddf = data.fund_value(start_date, end_date) #print 'hehe' #print funddf['000457.OF'] #print funddf['163001.OF'].to_csv('./tmp/163001.csv') indexdf = data.index_value(start_date, end_date, '000300.SH') #按照规模过滤 scale_data = scalefilter(2.0 / 3) #scale_data = sf.scalefilter(1.0) #print scale_data #按照基金创立时间过滤 setuptime_data = fundsetuptimefilter(funddf.columns, start_date, data.establish_data()) #print setuptime_data #按照jensen测度过滤 jensen_data = jensenfilter(funddf, indexdf, rf, 0.5) #jensen_data = sf.jensenfilter(funddf, indexdf, rf, 1.0) #按照索提诺比率过滤 sortino_data = sortinofilter(funddf, rf, 0.5) #sortino_data = sf.sortinofilter(funddf, rf, 1.0) #按照ppw测度过滤 ppw_data = ppwfilter(funddf, indexdf, rf, 0.5) #ppw_data = sf.ppwfilter(funddf, indexdf, rf, 1.0) #print ppw_data stability_data = stabilityfilter(funddf, 2.0 / 3) #stability_data = sf.stabilityfilter(funddf, 1.0) sharpe_data = fi.fund_sharp_annual(funddf) #print stability_data #print 'jensen' jensen_dict = {} for k,v in jensen_data: jensen_dict[k] = v #print k, v #print #print 'sortino' sortino_dict = {} for k,v in sortino_data: sortino_dict[k] = v #print k,v #print #print 'ppw' ppw_dict = {} for k,v in ppw_data: ppw_dict[k] = v #print k,v #print #print 'statbility' stability_dict = {} for k,v in stability_data: stability_dict[k] = v #print k,v sharpe_dict = {} for k,v in sharpe_data: sharpe_dict[k] = v scale_set = set() for k, v in scale_data: scale_set.add(k) setuptime_set = set(setuptime_data) jensen_set = set() for k, v in jensen_data: jensen_set.add(k) sortino_set = set() for k, v in sortino_data: sortino_set.add(k) ppw_set = set() for k, v in ppw_data: ppw_set.add(k) stability_set = set() for k, v in stability_data: stability_set.add(k) codes = [] for code in scale_set: if (code in setuptime_set) and (code in jensen_set) and (code in sortino_set) and (code in ppw_set) and (code in stability_set): codes.append(code) for code in codes: ind = indicator.setdefault(code, {}) ind['sharpe'] = sharpe_dict[code] ind['jensen'] = jensen_dict[code] ind['sortino'] = sortino_dict[code] ind['ppw'] = ppw_dict[code] ind['stability'] = stability_dict[code] indicator_codes = [] indicator_datas = [] indicator_set = set() for code in scale_set: if code in setuptime_set: indicator_set.add(code) for code in indicator_set: indicator_codes.append(code) indicator_datas.append([sharpe_dict.setdefault(code, None), jensen_dict.setdefault(code, None), sortino_dict.setdefault(code, None), ppw_dict.setdefault(code, None), stability_dict.setdefault(code, None)]) indicator_df = pd.DataFrame(indicator_datas, index = indicator_codes, columns=['sharpe', 'jensen', 'sortino', 'ppw', 'stability']) indicator_df.to_csv('./tmp/stock_indicator_' + end_date + '.csv') f = open('./tmp/stockfilter_codes_' + end_date + '.csv','w') for code in codes: f.write(str(code) + '\n') f.flush() f.close() return codes, indicator
#for i in range(0 ,len(train_start_date)): for i in range(4 ,5): ##################################################### #训练和评测数据时间 train_start = train_start_date[i] train_end = train_end_date[i] test_start = test_start_date[i] test_end = test_end_date[i] #################################################### ################################################### #评测数据 funddf = data.fund_value(train_start, test_end) codes = funddf.columns evaluationdf = data.fund_value(test_start, test_end) evaluationdf = evaluationdf[codes] ################################################### #################################################################### #筛选基金池,基金打标签 codes = fundfilter(train_start, train_end) fund_codes, fund_tags = st.tagfunds(train_start, train_end, codes) #################################################################### ##################################################################################### #blacklitterman 资产配置
def asset_allocation(start_date, end_date, largecap_fund, smallcap_fund, P, Q): ######################################################################### delta = 2.5 tau = 0.05 ps = [] for p in P: ps.append(np.array(p)) P = np.array(ps) qs = [] for q in Q: qs.append(np.array(q)) Q = np.array(qs) indexdf = data.index_value(start_date, end_date, [const.largecap_code, const.smallcap_code]) indexdfr = indexdf.pct_change().fillna(0.0) indexrs = [] for code in indexdfr.columns: indexrs.append(indexdfr[code].values) #print indexdfr sigma = np.cov(indexrs) #print type(sigma) #print sigma #print np.cov(indexrs) #print indexdfr weq = np.array([0.5, 0.5]) tauV = tau * sigma Omega = np.dot(np.dot(P, tauV), P.T) * np.eye(Q.shape[0]) er, ws, lmbda = fin.black_litterman(delta, weq, sigma, tau, P, Q, Omega) sum = 0 for w in ws: sum = sum + w for i in range(0, len(ws)): ws[i] = 1.0 * ws[i] / sum #print er indexws = ws #print indexws #largecap_fund, smallcap_fund = largesmallcapfunds(fund_tags) #print largecap_fund #risk, returns, ws, sharp = markowitz( #print smallcap_fund funddf = data.fund_value(start_date, end_date) bounds = boundlimit(len(largecap_fund)) risk, returns, ws, sharp = markowitz(funddf[largecap_fund], bounds) largecap_fund_w = {} for i in range(0, len(largecap_fund)): code = largecap_fund[i] largecap_fund_w[code] = ws[i] * indexws[0] bounds = boundlimit(len(smallcap_fund)) risk, returns, ws, sharp = markowitz(funddf[smallcap_fund], bounds) smallcap_fund_w = {} for i in range(0, len(smallcap_fund)): code = smallcap_fund[i] smallcap_fund_w[code] = ws[i] * indexws[1] ''' #平均分配 largecap_fund_w = {} for code in largecap_fund: largecap_fund_w[code] = 1.0 / len(largecap_fund) * indexws[0] smallcap_fund_w = {} for code in smallcap_fund: smallcap_fund_w[code] = 1.0 / len(smallcap_fund) * indexws[1] ''' fundws = {} for code in largecap_fund: w = fundws.setdefault(code, 0) fundws[code] = w + largecap_fund_w[code] for code in smallcap_fund: w = fundws.setdefault(code, 0) fundws[code] = w + smallcap_fund_w[code] ####################################################################### #print largecap #print smallcap #print risefitness #print declinefitness #print oscillafitness #print growthfitness #print valuefitness #print fund_codes = [] ws = [] for k, v in fundws.items(): fund_codes.append(k) ws.append(v) #for code in largecap: return fund_codes, ws
def stockLabelAsset(dates, interval, funddf, indexdf): df = data.funds() dfr = df.pct_change().fillna(0.0) funddfr = funddf.pct_change().fillna(0.0) indexdfr = indexdf.pct_change().fillna(0.0) tag = {} result_dates = [] columns = [] result_datas = [] select_datas = [] allcodes = [] filtercodes = [] poolcodes = [] selectcodes = [] for i in range(interval + 156, len(dates)): if (i - 156) % interval == 0: start_date = dates[i - 52].strftime('%Y-%m-%d') end_date = dates[i].strftime('%Y-%m-%d') allocation_start_date = dates[i - interval].strftime('%Y-%m-%d') allocationdf = data.fund_value(allocation_start_date, end_date) alldf = data.fund_value(start_date, end_date) codes, indicator = FundFilter.stockfundfilter(start_date, end_date) fund_pool, fund_tags = st.tagstockfund(start_date, end_date, codes) allocationdf = allocationdf[fund_pool] fund_code, tag = fund_selector.select_stock( allocationdf, fund_tags) allcodes = alldf.columns filtercodes = codes poolcodes = fund_pool selectcodes = fund_code #print tag['largecap'] , tag['smallcap'], tag['rise'], tag['oscillation'], tag['decline'], tag['growth'], tag['value'] d = dates[i] result_dates.append(d) result_datas.append([ funddfr.loc[d, tag['largecap']], funddfr.loc[d, tag['smallcap']], funddfr.loc[d, tag['rise']], funddfr.loc[d, tag['oscillation']], funddfr.loc[d, tag['decline']], funddfr.loc[d, tag['growth']], funddfr.loc[d, tag['value']] ]) print d.strftime('%Y-%m-%d'), funddfr.loc[ d, tag['largecap']], funddfr.loc[d, tag['smallcap']], funddfr.loc[ d, tag['rise']], funddfr.loc[d, tag['oscillation']], funddfr.loc[ d, tag['decline']], funddfr.loc[ d, tag['growth']], funddfr.loc[d, tag['value']] allcode_r = 0 for code in allcodes: allcode_r = allcode_r + 1.0 / len(allcodes) * dfr.loc[d, code] filtercode_r = 0 for code in filtercodes: filtercode_r = filtercode_r + 1.0 / len(filtercodes) * dfr.loc[ d, code] poolcode_r = 0 for code in poolcodes: poolcode_r = poolcode_r + 1.0 / len(poolcodes) * dfr.loc[d, code] selectcode_r = 0 for code in selectcodes: selectcode_r = selectcode_r + 1.0 / len(selectcodes) * dfr.loc[ d, code] select_datas.append( [allcode_r, filtercode_r, poolcode_r, selectcode_r]) result_df = pd.DataFrame(result_datas, index=result_dates, columns=[ 'largecap', 'smallcap', 'rise', 'oscillation', 'decline', 'growth', 'value' ]) result_df.to_csv('./tmp/stocklabelasset.csv') select_df = pd.DataFrame( select_datas, index=result_dates, columns=['allcodes', 'filtercodes', 'poolcode', 'selectcode']) select_df.to_csv('./tmp/stockselectasset.csv') return result_df
#基金的最大回撤 def fund_maxdrawdown(funddf): return 0 if __name__ == '__main__': start_date = '2015-04-20' end_date = '2016-04-22' funddf = data.fund_value(start_date, end_date) indexdf = data.index_value(start_date, end_date, '000300.SH') #df = funddf['000398.OF'] #print np.mean(df.pct_change()) * 52 #按照规模过滤 scale_data = sf.scalefilter(3.0 / 3) #按照基金创立时间过滤 setuptime_data = sf.fundsetuptimefilter(funddf.columns, start_date, data.establish_data()) #按照jensen测度过滤 jensen_data = sf.jensenfilter(funddf, indexdf, const.rf, 1.0) #按照索提诺比率过滤
def fundfilter(start_date, end_date): indicator = {} funddf = data.fund_value(start_date, end_date) #print 'hehe' #print funddf['000457.OF'] #print funddf['163001.OF'].to_csv('./tmp/163001.csv') indexdf = data.index_value(start_date, end_date, '000300.SH') #按照规模过滤 scale_data = sf.scalefilter(2.0 / 3) #scale_data = sf.scalefilter(1.0) #print scale_data #按照基金创立时间过滤 setuptime_data = sf.fundsetuptimefilter(funddf.columns, start_date, data.establish_data()) #print setuptime_data #按照jensen测度过滤 jensen_data = sf.jensenfilter(funddf, indexdf, rf, 0.5) #jensen_data = sf.jensenfilter(funddf, indexdf, rf, 1.0) #按照索提诺比率过滤 sortino_data = sf.sortinofilter(funddf, rf, 0.5) #sortino_data = sf.sortinofilter(funddf, rf, 1.0) #按照ppw测度过滤 ppw_data = sf.ppwfilter(funddf, indexdf, rf, 0.5) #ppw_data = sf.ppwfilter(funddf, indexdf, rf, 1.0) #print ppw_data stability_data = sf.stabilityfilter(funddf, 2.0 / 3) #stability_data = sf.stabilityfilter(funddf, 1.0) sharpe_data = fi.fund_sharp_annual(funddf) #print stability_data #print 'jensen' jensen_dict = {} for k,v in jensen_data: jensen_dict[k] = v #print k, v #print #print 'sortino' sortino_dict = {} for k,v in sortino_data: sortino_dict[k] = v #print k,v #print #print 'ppw' ppw_dict = {} for k,v in ppw_data: ppw_dict[k] = v #print k,v #print #print 'statbility' stability_dict = {} for k,v in stability_data: stability_dict[k] = v #print k,v sharpe_dict = {} for k,v in sharpe_data: sharpe_dict[k] = v ''' codes = list(jensen_dict.keys()) codes.sort() jensen_array = [] sortino_array = [] ppw_array = [] stability_array = [] for code in codes: jensen_array.append(jensen_dict[code] if jensen_dict.has_key(code) else 0) sortino_array.append(sortino_dict[code] if sortino_dict.has_key(code) else 0) ppw_array.append(ppw_dict[code] if ppw_dict.has_key(code) else 0) stability_array.append(stability_dict[code] if stability_dict.has_key(code) else 0) indicators = {'code':codes, 'jensen':jensen_array, 'sortino':sortino_array, 'ppw':ppw_array,'stability':stability_array} frame = pd.DataFrame(indicators) frame.to_csv('./wind/fund_indicator.csv') ''' scale_set = set() for k, v in scale_data: scale_set.add(k) setuptime_set = set(setuptime_data) jensen_set = set() for k, v in jensen_data: jensen_set.add(k) sortino_set = set() for k, v in sortino_data: sortino_set.add(k) ppw_set = set() for k, v in ppw_data: ppw_set.add(k) stability_set = set() for k, v in stability_data: stability_set.add(k) #print 'jensen', '000457.OF' in jensen_set #print 'sortino', '000457.OF' in sortino_set #print 'ppw', '000457.OF' in ppw_set #print 'stability', '000457.OF' in stability_set codes = [] for code in scale_set: if (code in setuptime_set) and (code in jensen_set) and (code in sortino_set) and (code in ppw_set) and (code in stability_set): codes.append(code) for code in codes: ind = indicator.setdefault(code, {}) ind['sharpe'] = sharpe_dict[code] ind['jensen'] = jensen_dict[code] ind['sortino'] = sortino_dict[code] ind['ppw'] = ppw_dict[code] ind['stability'] = stability_dict[code] ''' indicator_str = "%s,%f,%f,%f,%f,%f\n" f = open('./tmp/indicator.csv','w') f.write("code,sharpe,jensen,sortino,ppw,stability\n") for code in codes: f.write(indicator_str % (code, sharpe_dict[code],jensen_dict[code], sortino_dict[code], ppw_dict[code], stability_dict[code])) #print code,jensen_dict[code], sortino_dict[code], ppw_dict[code], stability_dict[code] f.flush() f.close() ''' #按照业绩持续性过滤 #stability_data = sf.stabilityfilter(funddf[codes], 2.0 / 3) #print stability_data #codes = [] #for k, v in stability_data: # codes.append(k) return codes, indicator
#change_position_index = i start_date = dates[i - 52].strftime('%Y-%m-%d') allocation_start_date = dates[i - 13].strftime('%Y-%m-%d') end_date = dates[i].strftime('%Y-%m-%d') future_end_date = dates[-1].strftime('%Y-%m-%d') if i + 13 >= len(dates): future_end_date = dates[-1].strftime('%Y-%m-%d') else: future_end_date = dates[i + 13].strftime('%Y-%m-%d') codes, indicator = fundfilter(start_date, end_date) fund_pool, fund_tags = st.tagfunds(start_date, end_date, codes) allocation_funddf = data.fund_value(allocation_start_date, end_date)[fund_pool] #allocation_funddfr = allocation_funddf.pct_change().fillna(0.0) fund_codes, tag = fs.select_fund(allocation_funddf, fund_tags) allocation_funddf = allocation_funddf[fund_codes] #fund_codes = list(fund_pool) #print fund_pool tags = {} for key in fund_tags.keys(): cs = fund_tags[key] for c in cs: ts = tags.setdefault(c,[]) ts.append(key)