def default_china_equity_universe_mask(unmask, asset_finder=None): #a_stocks = [] info = load_tushare_df("basic") sme = load_tushare_df("sme") gem = load_tushare_df("gem") st = load_tushare_df("st") uset = pd.concat([sme, gem, st]) try: maskdf = info.drop([y for y in uset['code']], axis=0) # st,sme,gem 的都不要,稳健型只要主板股票 maskdf = maskdf.drop(unmask, axis=0) except: pass #Returns a factor indicating membership (=1) in the given iterable of securities #print("==enter IsInSymbolsList==") def _sid(sid): return asset_finder.retrieve_asset(sid) class IsInDefaultChinaUniverse(CustomFilter): inputs = [] window_length = 1 def compute(self, today, asset_ids, out, *inputs): #print asset_ids #print maskset if asset_finder != None: assets = [_sid(id).symbol for id in asset_ids] else: assets = [sid(id).symbol for id in asset_ids] #print "--------------" #print pd.Series(assets) out[:] = pd.Series(assets).isin(maskdf.index) #print out return IsInDefaultChinaUniverse()
def get_sector(sector_dict=None): if sector_dict is None: sector_dict = get_sector_class() #print("++enter getSector++",len(sector_dict)) basic = load_tushare_df("basic") class Sector(CustomClassifier): #CustomClassifier 是int , factor 是float inputs = [] window_length = 1 dtype = np.int64 missing_value = 0 #result[isnan(result)] = self.missing_value #params = ('universes',) def findSector(self, assets): sector_list = [] for msid in assets: stock = sid(msid).symbol try: industry = basic.loc[stock].industry sector_no = sector_dict[industry] sector_list.append(sector_no) except: #print "stock %s in industry %s not find in default sector set, set zero" % (stock,industry) sector_list.append(self.missing_value) else: pass return sector_list def compute(self, today, assets, out, *inputs): out[:] = self.findSector(assets) return Sector()
def get_sector_class(limit_size=Sector_TOPN, umask=Sector_Umask): df = load_tushare_df("basic") df = df[-df['industry'].isin(umask)] # 排除给定行 industryClass = {} no = Sector_StartNo #for industry,_ in load_tushare_df("industry").groupby('c_name'): for industry, _ in df.groupby('industry').industry.value_counts().nlargest( limit_size).iteritems(): industryClass[industry[0]] = no no = no + 1 return industryClass
def get_sectors_no(mids): basic = load_tushare_df("basic") _class, _ = get_sector_class() no_ = [] missing_value = 0 for msid in mids: stock = sid(msid).symbol try: industry = basic.loc[stock].industry sector_no = _class[industry] no_.append(sector_no) except: # print "stock %s in industry %s not find in default sector set, set zero" % (stock,industry) no_.append(missing_value) return no_
def get_sector_by_onehot(sector_dict=None, mask=None, asset_finder=None): if sector_dict is None: sector_dict, _ = get_sector_class() basic = load_tushare_df("basic") def _sid(sid): return asset_finder.retrieve_asset(sid) def _onehot_sectors(sector_keys): ##- Convert the Sectors column into binary labels sector_binarizer = preprocessing.LabelBinarizer() strlbls = map( str, sector_keys ) # LabelBinarizer didn't like float values, so convert to strings sector_binarizer.fit(strlbls) sector_labels_bin = sector_binarizer.transform( strlbls) # this is now 12 binary columns from 1 categorical ##- Create a pandas dataFrame from the new binary labels #print(sector_labels_bin) colNames = [] for i in range(len(sector_labels_bin[0])): colNames.append("S_L_" + strlbls[i]) # TODO sLabels = pd.DataFrame(data=sector_labels_bin, index=strlbls, columns=colNames) return sLabels sector_indict, sector_rindict = get_sector_class() #TODO ORDERDICT???? sector_indict_keys = sector_indict.keys() sector_indict_keys.sort() onehot_sector = _onehot_sectors(sector_indict_keys) #print sector_indict #print sector_inddict #print onehot_sector class OneHotSector(CustomFactor): #CustomClassifier 是int , factor 是float inputs = [] window_length = 1 outputs = sector_indict_keys def _find_sector(self, asset): sector_no = 0 sector_name = "" if asset_finder != None: stock = _sid(asset).symbol else: stock = sid(asset).symbol try: industry = basic.loc[stock].industry sector_no = sector_dict[industry] sector_name = industry except: #print "stock %s in not find in default sector set, set zero" % (stock) pass else: pass return sector_no, sector_name def compute(self, today, assets, out): idx = 0 for asset in assets: sno, sname = self._find_sector(asset) if sno != 0: onehots = onehot_sector.loc[sname] #print onehots i = 0 for output in self.outputs: if sno != 0: #print ("++++",idx,output,onehots.values[i]) out[idx][output] = int(onehots.values[i]) else: out[idx][output] = 0 i += 1 idx += 1 #print out if mask != None: return OneHotSector(mask=mask), sector_indict_keys else: return OneHotSector(), sector_indict_keys
def Fundamental(asset_finder = None): def _sid(sid): return asset_finder.retrieve_asset(sid) columns = ['pe', # 市盈率 'outstanding', # 流通股本(亿) 'totals', # 总股本(亿) 'totalAssets', # 总资产(万) 'liquidAssets', # 流动资产 'fixedAssets', # 固定资产 'reserved', # 公积金 'reservedPerShare', # 每股公积金 'esp', # 每股收益 'bvps', # 每股净资 'pb', # 市净率 'timeToMarket', # 上市日期 0:未上市 'undp', # 未分利润 'perundp', # 每股未分配 'rev', # 收入同比(%) 'profit', # 利润同比(%) 'gpr', # 毛利率(%) 'npr', # 净利润率 'holders', # 股东人数 ] info=load_tushare_df("basic") class Fundamental(CustomFactor): outputs = columns inputs = [USEquityPricing.close] window_length = 1 window_safe = True def handle(self, assets): if asset_finder != None: stocks = [_sid(msid).symbol for msid in assets] else: stocks = [sid(msid).symbol for msid in assets] #print stocks #print info.ix[stocks][columns] return info.ix[stocks][columns] def compute(self, today, assets, out,close): df = self.handle(assets) out.pe[:] = df.pe out.outstanding[:] = close[-1] * df.outstanding * 1.0e+8 out.totals[:] = close[-1] * df.totals * 1.0e+8 out.totalAssets[:] = df.totalAssets * 1.0e+4 out.liquidAssets[:] = df.liquidAssets out.fixedAssets[:] = df.fixedAssets out.reserved[:] =df.reserved out.reservedPerShare[:] = df.reservedPerShare out.esp[:] = df.esp out.bvps[:] = df.bvps out.pb[:] = df.pb out.timeToMarket[:] = df.timeToMarket out.undp[:] = df.undp out.perundp[:] = df.perundp out.rev[:] = df.rev out.profit[:] = df.profit out.gpr[:] = df.gpr out.npr[:] = df.npr out.holders[:] = df.holders return Fundamental()