예제 #1
0
def default_china_equity_universe_mask(unmask, asset_finder=None):
    #a_stocks = []
    info = load_tushare_df("basic")
    sme = load_tushare_df("sme")
    gem = load_tushare_df("gem")
    st = load_tushare_df("st")
    uset = pd.concat([sme, gem, st])
    try:
        maskdf = info.drop([y for y in uset['code']],
                           axis=0)  # st,sme,gem 的都不要,稳健型只要主板股票
        maskdf = maskdf.drop(unmask, axis=0)
    except:
        pass
    #Returns a factor indicating membership (=1) in the given iterable of securities
    #print("==enter IsInSymbolsList==")
    def _sid(sid):
        return asset_finder.retrieve_asset(sid)

    class IsInDefaultChinaUniverse(CustomFilter):
        inputs = []
        window_length = 1

        def compute(self, today, asset_ids, out, *inputs):
            #print asset_ids
            #print maskset
            if asset_finder != None:
                assets = [_sid(id).symbol for id in asset_ids]
            else:
                assets = [sid(id).symbol for id in asset_ids]
            #print "--------------"
            #print pd.Series(assets)
            out[:] = pd.Series(assets).isin(maskdf.index)
            #print out

    return IsInDefaultChinaUniverse()
예제 #2
0
def get_sector(sector_dict=None):
    if sector_dict is None:
        sector_dict = get_sector_class()
    #print("++enter getSector++",len(sector_dict))
    basic = load_tushare_df("basic")

    class Sector(CustomClassifier):  #CustomClassifier 是int , factor 是float
        inputs = []
        window_length = 1
        dtype = np.int64
        missing_value = 0

        #result[isnan(result)] = self.missing_value
        #params = ('universes',)
        def findSector(self, assets):
            sector_list = []
            for msid in assets:
                stock = sid(msid).symbol
                try:
                    industry = basic.loc[stock].industry
                    sector_no = sector_dict[industry]
                    sector_list.append(sector_no)
                except:
                    #print "stock %s in industry %s not find in default sector set, set zero" % (stock,industry)
                    sector_list.append(self.missing_value)
                else:
                    pass
            return sector_list

        def compute(self, today, assets, out, *inputs):
            out[:] = self.findSector(assets)

    return Sector()
예제 #3
0
def get_sector_class(limit_size=Sector_TOPN, umask=Sector_Umask):
    df = load_tushare_df("basic")
    df = df[-df['industry'].isin(umask)]  # 排除给定行
    industryClass = {}
    no = Sector_StartNo
    #for industry,_ in load_tushare_df("industry").groupby('c_name'):
    for industry, _ in df.groupby('industry').industry.value_counts().nlargest(
            limit_size).iteritems():
        industryClass[industry[0]] = no
        no = no + 1
    return industryClass
예제 #4
0
def get_sectors_no(mids):
    basic = load_tushare_df("basic")
    _class, _ = get_sector_class()
    no_ = []
    missing_value = 0
    for msid in mids:
        stock = sid(msid).symbol
        try:
            industry = basic.loc[stock].industry
            sector_no = _class[industry]
            no_.append(sector_no)
        except:
            # print "stock %s in industry %s not find in default sector set, set zero" % (stock,industry)
            no_.append(missing_value)
    return no_
예제 #5
0
def get_sector_by_onehot(sector_dict=None, mask=None, asset_finder=None):
    if sector_dict is None:
        sector_dict, _ = get_sector_class()
    basic = load_tushare_df("basic")

    def _sid(sid):
        return asset_finder.retrieve_asset(sid)

    def _onehot_sectors(sector_keys):
        ##- Convert the Sectors column into binary labels
        sector_binarizer = preprocessing.LabelBinarizer()
        strlbls = map(
            str, sector_keys
        )  # LabelBinarizer didn't like float values, so convert to strings
        sector_binarizer.fit(strlbls)
        sector_labels_bin = sector_binarizer.transform(
            strlbls)  # this is now 12 binary columns from 1 categorical
        ##- Create a pandas dataFrame from the new binary labels
        #print(sector_labels_bin)
        colNames = []
        for i in range(len(sector_labels_bin[0])):
            colNames.append("S_L_" + strlbls[i])  # TODO
        sLabels = pd.DataFrame(data=sector_labels_bin,
                               index=strlbls,
                               columns=colNames)
        return sLabels

    sector_indict, sector_rindict = get_sector_class()  #TODO ORDERDICT????
    sector_indict_keys = sector_indict.keys()
    sector_indict_keys.sort()
    onehot_sector = _onehot_sectors(sector_indict_keys)

    #print sector_indict
    #print sector_inddict
    #print onehot_sector
    class OneHotSector(CustomFactor):  #CustomClassifier 是int , factor 是float
        inputs = []
        window_length = 1
        outputs = sector_indict_keys

        def _find_sector(self, asset):
            sector_no = 0
            sector_name = ""
            if asset_finder != None:
                stock = _sid(asset).symbol
            else:
                stock = sid(asset).symbol
            try:
                industry = basic.loc[stock].industry
                sector_no = sector_dict[industry]
                sector_name = industry
            except:
                #print "stock %s in not find in default sector set, set zero" % (stock)
                pass
            else:
                pass
            return sector_no, sector_name

        def compute(self, today, assets, out):
            idx = 0
            for asset in assets:
                sno, sname = self._find_sector(asset)
                if sno != 0:
                    onehots = onehot_sector.loc[sname]
                    #print onehots
                i = 0
                for output in self.outputs:
                    if sno != 0:
                        #print ("++++",idx,output,onehots.values[i])
                        out[idx][output] = int(onehots.values[i])
                    else:
                        out[idx][output] = 0
                    i += 1
                idx += 1
            #print out

    if mask != None:
        return OneHotSector(mask=mask), sector_indict_keys
    else:
        return OneHotSector(), sector_indict_keys
예제 #6
0
def Fundamental(asset_finder = None):

    def _sid(sid):
        return asset_finder.retrieve_asset(sid)
    columns = ['pe',  # 市盈率
               'outstanding',  # 流通股本(亿)
               'totals',  # 总股本(亿)
               'totalAssets',  # 总资产(万)
               'liquidAssets',  # 流动资产
               'fixedAssets',  # 固定资产
               'reserved',  # 公积金
               'reservedPerShare',  # 每股公积金
               'esp',  # 每股收益
               'bvps',  # 每股净资
               'pb',  # 市净率
               'timeToMarket',  # 上市日期 0:未上市
               'undp',  # 未分利润
               'perundp',  # 每股未分配
               'rev',  # 收入同比(%)
               'profit',  # 利润同比(%)
               'gpr',  # 毛利率(%)
               'npr',  # 净利润率
               'holders',  # 股东人数
               ]
    info=load_tushare_df("basic")
    class Fundamental(CustomFactor):
        outputs = columns
        inputs = [USEquityPricing.close]
        window_length = 1
        window_safe = True
        def handle(self, assets):
            if asset_finder != None:
                stocks = [_sid(msid).symbol for msid in assets]
            else:
                stocks = [sid(msid).symbol for msid in assets]
            #print stocks
            #print info.ix[stocks][columns]
            return info.ix[stocks][columns]
        def compute(self, today, assets, out,close):
            df = self.handle(assets)
            out.pe[:] = df.pe
            out.outstanding[:] =  close[-1] * df.outstanding * 1.0e+8
            out.totals[:] = close[-1] * df.totals * 1.0e+8
            out.totalAssets[:] = df.totalAssets * 1.0e+4
            out.liquidAssets[:] = df.liquidAssets
            out.fixedAssets[:] = df.fixedAssets
            out.reserved[:] =df.reserved
            out.reservedPerShare[:] = df.reservedPerShare
            out.esp[:] = df.esp
            out.bvps[:] = df.bvps
            out.pb[:] = df.pb
            out.timeToMarket[:] = df.timeToMarket
            out.undp[:] = df.undp
            out.perundp[:] = df.perundp
            out.rev[:] = df.rev
            out.profit[:] = df.profit
            out.gpr[:] = df.gpr
            out.npr[:] = df.npr
            out.holders[:] = df.holders

    return Fundamental()