Ejemplo n.º 1
0
    def get_negative_net_asset_stock_date(self, date):
        """ 净资产为负 """

        path = StockFactorData().get_h5_path("mfc_primary")
        data = StockFactorData().read_factor_h5(
            factor_name="TotalShareHoldeRequityDaily", path=path)
        data_date = pd.DataFrame(data[date])
        data_date = data_date.dropna()
        data_date = data_date.sort_values(by=[date])

        data_date = data_date[data_date[date] < 0.0]

        filter_stock_list = list(set(data_date.index))
        filter_stock_list.sort()

        name = "NegativeNetAssetStockPool"
        sub_path = os.path.join(self.data_path_forbid_pool, name)
        file = os.path.join(sub_path, "%s_%s.csv" % (name, date))

        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        data_pd = pd.DataFrame([], index=filter_stock_list, columns=[name])
        data_pd.loc[filter_stock_list, name] = data_date.loc[filter_stock_list,
                                                             date]
        data_pd.to_csv(file)

        return filter_stock_list
Ejemplo n.º 2
0
    def get_negative_income_ttm_stock_date(self, date):
        """ 股票池 营业收入TTM为负数的股票 """

        path = StockFactorData().get_h5_path("mfc_primary")
        data = StockFactorData().read_factor_h5(
            factor_name="OperatingIncomeTotal", path=path)
        data_ttm = StockFactorOperate().change_single_quarter_to_ttm_quarter(
            data)

        quarter_date = Date().get_last_stock_quarter_date(date)

        data_date = pd.DataFrame(data_ttm[quarter_date])
        data_date = data_date.dropna()
        data_date = data_date[data_date[quarter_date] < 0.0]

        filter_stock_list = list(set(data_date.index))
        filter_stock_list.sort()

        name = "NegativeIncomeTTMStockPool"
        sub_path = os.path.join(self.data_path_forbid_pool, name)
        file = os.path.join(sub_path, "%s_%s.csv" % (name, date))

        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        data_pd = pd.DataFrame([], index=filter_stock_list, columns=[name])
        data_pd.loc[filter_stock_list, name] = data_date.loc[filter_stock_list,
                                                             quarter_date]
        data_pd.to_csv(file)

        return filter_stock_list
Ejemplo n.º 3
0
    def get_freemv_ratio_stock_date(self, ratio, date):
        """ 股票池 取自由流通市值后x%的股票 """

        path = StockFactorData().get_h5_path("mfc_primary")
        data = StockFactorData().read_factor_h5(factor_name="Mkt_freeshares",
                                                path=path)

        data /= 100000000
        data_date = pd.DataFrame(data[date])
        data_date = data_date.dropna()
        data_date = data_date.sort_values(by=[date])

        beg_loc = 0
        end_loc = int(len(data_date) * ratio)
        data_date = data_date.iloc[beg_loc:end_loc, :]

        filter_stock_list = list(set(data_date.index))
        filter_stock_list.sort()

        name = "SmallFreeMVRatioStockPool"
        sub_path = os.path.join(self.data_path_forbid_pool, name)
        file = os.path.join(sub_path, "%s_%s.csv" % (name, date))

        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        data_pd = pd.DataFrame([], index=filter_stock_list, columns=[name])
        data_pd[name] = "自由流通市值后%s" % ratio
        data_pd.to_csv(file)

        return filter_stock_list
Ejemplo n.º 4
0
    def replace_suspension_with_nan(self, data):

        factor = data.copy()
        status = StockFactorData().read_factor_h5("TradingStatus")
        factor, status = FactorOperate().make_same_index_columns(
            [factor, status])
        statusif = status.applymap(lambda x: x == 1.0)
        factor[statusif] = np.nan
        return factor
Ejemplo n.º 5
0
    def get_bad_goodwill_ratio_stock_date(self, date):
        """ 商誉总资产占比 > 30% 并且 占比-行业占比平均 > 30% """

        goodwill = StockFactorData().read_factor_h5("goodwillDaily")
        totalasset = StockFactorData().read_factor_h5("TotalAssetDaily")
        industry = StockFactorData().read_factor_h5("industry_citic1")

        try:
            goodwill_date = pd.DataFrame(goodwill[date])
            totalasset_date = pd.DataFrame(totalasset[date])
            industry_date = pd.DataFrame(industry[date])
            data = pd.concat([goodwill_date, totalasset_date, industry_date],
                             axis=1)
            data = data.dropna()
            data.columns = ['goodwill', 'totalasset', 'industry']
            data['ratio'] = data['goodwill'] / data['totalasset']

            data_industry = pd.DataFrame(
                data.groupby(by=['industry']).median()['ratio'])
            data_industry.columns = ['industry_median_ratio']
            data_industry['industry'] = data_industry.index
            data = data.sort_values(by=['ratio'], ascending=False)

            concat_data = pd.merge(data,
                                   data_industry,
                                   on="industry",
                                   right_index=True)
            concat_data['ratio_diff'] = concat_data['ratio'] - concat_data[
                'industry_median_ratio']
            concat_data = concat_data[concat_data['ratio_diff'] > 0.30]
            concat_data = concat_data[concat_data['ratio'] > 0.30]

            list_code = list(concat_data.index)
            list_code.sort()
        except Exception as e:
            list_code = []

        name = "BadGoodwillRatioStockPool"
        sub_path = os.path.join(self.data_path_forbid_pool, name)
        file = os.path.join(sub_path, "%s_%s.csv" % (name, date))

        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        data_pd = pd.DataFrame([], index=list_code, columns=[name])
        data_pd.loc[list_code, name] = "商誉占比大于30%且超过行业30%"
        data_pd.to_csv(file)
Ejemplo n.º 6
0
    def get_trade_amount_ratio_stock_date(self, ratio, date):
        """
        股票池 过去60个交易日交易额后x%的股票
        (交易额为0的情况,很可能是未上市或者股票停牌)需要特还成为NAN
        """

        path = StockFactorData().get_h5_path("mfc_primary")
        data = StockFactorData().read_factor_h5(factor_name="TradeAmount",
                                                path=path).T
        data /= 100000000
        beg_date = Date().get_trade_date_offset(date, -60)
        end_date = date
        data = data.replace(0.0, np.nan)
        data = data.loc[beg_date:end_date, :]
        data_mean = data.mean()

        data_date = pd.DataFrame(data_mean)
        data_date = data_date.dropna()
        data_date.columns = ['TradeAmount']
        data_date = data_date.loc[data_date['TradeAmount'] > 0.0, :]
        data_date = data_date.sort_values(by=["TradeAmount"])

        beg_loc = 0
        end_loc = int(len(data_date) * ratio)
        data_date = data_date.iloc[beg_loc:end_loc, :]

        filter_stock_list = list(set(data_date.index))
        filter_stock_list.sort()

        name = "SmallTradeAmountRatioStockPool"
        sub_path = os.path.join(self.data_path_forbid_pool, name)
        file = os.path.join(sub_path, "%s_%s.csv" % (name, date))

        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        data_pd = pd.DataFrame([], index=filter_stock_list, columns=[name])
        data_pd[name] = "最近交易额后%s" % ratio
        data_pd.to_csv(file)

        return filter_stock_list
Ejemplo n.º 7
0
    def fillna_with_mad_market(self, data):

        factor = data.copy()
        factor_val = factor.values

        status = StockFactorData().read_factor_h5("TradingStatus")
        factor, status = FactorOperate().make_same_index_columns(
            [factor, status])

        if_list = status.applymap(lambda x: x in [0.0, 1.0])
        if_nan = factor.isnull()

        mask_val = (if_list & if_nan).values
        md_val = factor.median(axis=0).values
        md_remat_val = np.tile(np.vstack(md_val), (1, factor.shape[0])).T
        factor_fill_mad_val = np.where(mask_val, md_remat_val, factor_val)
        factor_fill_mad_pandas = pd.DataFrame(factor_fill_mad_val,
                                              index=factor.index,
                                              columns=factor.columns)

        return factor_fill_mad_pandas
Ejemplo n.º 8
0
    def get_bad_pledge_ratio_stock_date(self, date):
        """ 历史质押记录中未解压的部分,质押股本比例 / 前3大股东持股比例超90%
        且 质押股本比例 / 前1、2、3大股东持股比例 排前10%的上市公司 """

        pledge = StockStatic().get_stock_pledge()
        pledge = pledge[[
            'wind_code', 'pledged_shares', 'pledge_start_date',
            'pledge_end_date', 'pledge_termination_date'
        ]]
        pledge = pledge[pledge['pledge_start_date'] <= date]
        pledge['pledge_termination_date'] = pledge[
            'pledge_termination_date'].replace("None", "20991231")
        pledge['pledge_end_date'] = pledge['pledge_end_date'].replace(
            "None", "20991231")
        pledge = pledge[pledge['pledge_termination_date'] > date]
        pledge = pledge[pledge['pledge_end_date'] > date]

        path = StockFactorData().get_h5_path("mfc_primary")
        netasset = StockFactorData().read_factor_h5(
            factor_name="SharePledgeRatio", path=path)
        return []
Ejemplo n.º 9
0
    def get_trade_amount_threshold_stock_date(self, threshold, date):
        """
        股票池 过去60个交易日交易额大于一定金额的股票
        (交易额为0的情况,很可能是未上市或者股票停牌)需要替换成为NAN
        简单考虑 基金规模5个亿 持股比例1% 单次换手30% 交易额为150万 假设不能超过其总交易额的5%
        日均交易额的最小值约为3000万,即0.3亿
        """

        path = StockFactorData().get_h5_path("mfc_primary")
        data = StockFactorData().read_factor_h5(factor_name="TradeAmount",
                                                path=path).T
        data /= 100000000
        beg_date = Date().get_trade_date_offset(date, -60)
        end_date = date
        data = data.replace(0.0, np.nan)
        data = data.loc[beg_date:end_date, :]
        data_mean = data.mean()

        data_date = pd.DataFrame(data_mean)
        data_date = data_date.dropna()
        data_date.columns = ['TradeAmount']
        data_date = data_date.sort_values(by=["TradeAmount"])
        data_date = data_date.loc[data_date['TradeAmount'] < threshold, :]

        filter_stock_list = list(set(data_date.index))
        filter_stock_list.sort()

        name = "SmallTradeAmountThresholdStockPool"
        sub_path = os.path.join(self.data_path_forbid_pool, name)
        file = os.path.join(sub_path, "%s_%s.csv" % (name, date))

        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        data_pd = pd.DataFrame([], index=filter_stock_list, columns=[name])
        data_pd[name] = "最近交易额小于%s亿" % threshold
        data_pd.to_csv(file)

        return filter_stock_list
Ejemplo n.º 10
0
    def get_bad_accounts_receivable_stock_date(self, date):
        """ 应收账款/总资产比例 不能超过200% 非国企不能超过150 """

        company_nature = StockStatic().get_nature_info()
        path = StockFactorData().get_h5_path("mfc_primary")
        recep = StockFactorData().read_factor_h5(
            factor_name="AccountsReceivables", path=path)

        path = StockFactorData().get_h5_path("mfc_primary")
        netasset = StockFactorData().read_factor_h5(
            factor_name="TotalShareHoldeRequity", path=path)

        ratio = recep.div(netasset)

        quarter_date = Date().get_last_stock_quarter_date(date)
        data_date = pd.DataFrame(ratio[quarter_date])
        data_date = data_date.dropna()
        data_date = data_date[data_date[quarter_date] > 2.0]

        list_bigger_200 = list(data_date.index)

        data_date = pd.DataFrame(ratio[quarter_date])
        data_date = data_date.dropna()
        data_date = data_date[data_date[quarter_date] > 1.5]
        data_date = pd.concat([data_date, company_nature], axis=1)
        data_date = data_date.dropna()
        data_date['if_gq'] = data_date['NATURE'].map(lambda x: "国有" in x)
        data_date = data_date[~data_date['if_gq']]

        list_bigger_150 = list(data_date.index)

        list_code = list(set(list_bigger_150) | set(list_bigger_200))
        list_code.sort()

        name = "BadAccountsReceivableStockPool"
        sub_path = os.path.join(self.data_path_forbid_pool, name)
        file = os.path.join(sub_path, "%s_%s.csv" % (name, date))

        if not os.path.exists(sub_path):
            os.makedirs(sub_path)

        data_pd = pd.DataFrame([], index=list_code, columns=[name])
        data_pd.loc[list_bigger_200, name] = "国企应收账款/总资产比例大于超过200%"
        data_pd.loc[list_bigger_150, name] = "非国企应收账款/总资产比例大于超过150%"
        data_pd.to_csv(file)

        return list_code
Ejemplo n.º 11
0
            if i_date == 0:
                result = res
            else:
                result = pd.concat([result, res], axis=1)
        return result


if __name__ == '__main__':

    from quant.stock.stock_factor_data import StockFactorData

    beg_date = "20180101"

    # 单季度TTM
    data = StockFactorData().read_factor_h5("OperatingIncomeTotal")
    report_data = StockFactorData().read_factor_h5("ReportDateDaily")
    data = StockFactorOperate().change_single_quarter_to_ttm_quarter(data)
    data = StockFactorOperate().change_quarter_to_daily_with_disclosure_date(
        data, report_data, beg_date='20161231')
    print(data)
    data = StockFactorOperate().change_quarter_to_daily_with_report_date(
        data, beg_date=beg_date)
    print(data)

    # 当年流量存量(季度数据和半年报数据)
    data = StockFactorData().read_factor_h5("EBIT")
    data = StockFactorOperate().change_cum_quarter_to_ttm_quarter(
        data, "quarter")
    print(data)
Ejemplo n.º 12
0
    def __init__(self):

        FactorStandard.__init__(self)
        FactorRemoveValue.__init__(self)
        FactorOperate.__init__(self)
        FactorFillNa.__init__(self)


if __name__ == '__main__':

    # Data
    ###################################################################
    from quant.stock.stock_factor_data import StockFactorData
    name = 'EP_Roll'
    data_pandas = StockFactorData().read_factor_h5(
        name,
        StockFactorData().get_h5_path(type='mfc_alpha'))
    data_series = data_pandas["20171229"]
    data_second = pd.DataFrame([],
                               columns=["20171228", "20171229"],
                               index=["000001.SZ", "600000.SH"])
    data_second_series = data_second["20171229"]

    # Series
    ###################################################################

    [same_one, same_two] = FactorPreProcess().make_same_index_columns(
        [data_series, data_second_series])
    # print(same_one, '\n', same_two)

    remove_std_series = FactorPreProcess().remove_extreme_value_std(
Ejemplo n.º 13
0
        std = factor.std(axis=1)
        factor = factor.sub(mean_weight_free_mv, axis='index')
        factor = factor.div(std, axis="index")
        factor = factor.T
        return factor


if __name__ == '__main__':

    # Data
    ###################################################################
    from quant.stock.stock_factor_data import StockFactorData

    data = StockFactorData().read_factor_h5(
        'BP',
        StockFactorData().get_h5_path(type='my_alpha'))
    data_date = data["20171229"]
    free_mv = StockFactorData().read_factor_h5("Mkt_freeshares") / 100000000
    free_mv_date = free_mv["20171228"]

    # Series
    ###################################################################

    inv_normalization_series = FactorStandard().inv_normalization(data_date)
    standardization_series = FactorStandard().standardization(data_date)
    standardization_series_mv = FactorStandard().standardization_free_mv(
        data_date, free_mv_date)

    result = pd.concat([
        data_date, inv_normalization_series, standardization_series,
Ejemplo n.º 14
0
    def __init__(self):

        StockFactorOperate.__init__(self)
        StockFactorData.__init__(self)
        FactorPreProcess.__init__(self)
Ejemplo n.º 15
0
from quant.utility.factor_preprocess import FactorPreProcess
from quant.stock.stock_factor_data import StockFactorData
from quant.stock.stock_factor_operate import StockFactorOperate


class StockFactor(StockFactorData, StockFactorOperate, FactorPreProcess):
    """
    包括
    读取写入 股票因子数据
    股票因子处理
    因子数据的预先处理
    """
    def __init__(self):

        StockFactorOperate.__init__(self)
        StockFactorData.__init__(self)
        FactorPreProcess.__init__(self)


if __name__ == '__main__':
    """ 读取 H5 Stock Factor文件 """
    path = StockFactorData().get_h5_path('mfc_primary')
    data = StockFactorData().read_factor_h5("PriceCloseAdjust", path)
    print(data)
    """ 写入 H5 Stock Factor文件 """
    path = StockFactorData().get_h5_path('my_alpha')
    factor_name = "PriceCloseAdjust"
    StockFactorData().write_factor_h5(data, factor_name, path)