Ejemplo n.º 1
0
def load_data(code, type, period, index_name):
    """
    按照周期加载三类数据:基金/股票;指数(市场);国债(无风险)
    :param code: 代码
    :param type: stock|fund
    :param period: 期间:day,week,month,year
    :param index_name:指数名称,中文的
    :return:
    """
    # 加载基金/股票数据
    if type == const.FUND:
        data = data_utils.load_fund_data(code)
        if data is None:
            logger.warning("[%s]数据有问题,忽略它...", code)
            return -999, None, None
        data_rate = calculate_rate(data, const.COL_ACCUMULATIVE_NET, period,
                                   'price')
    elif type == const.STOCK:
        data = data_utils.load_stock_data(code)
        data_rate = calculate_rate(data, 'close', period, 'price')
    else:
        raise ValueError("type不合法:" + type)

    # 加载指数数据
    index_data = data_utils.load_index_data_by_name(index_name, period)
    index_rate = data_utils.calculate_rate(index_data, 'close', period)

    # 加载无风险利率(/365=每天利率)
    bond_rate = data_utils.load_bond_interest_data() / PERIOD_NUM[period]
    bond_rate = calculate_rate(bond_rate, '收盘', period, 'rate')

    return data_rate, index_rate, bond_rate
Ejemplo n.º 2
0
def calculate_one_fund_by_period(fund, period):
    # # 不计算今年才开始的基金
    # if fund.start_date > datetime.strptime('2020-1-1', DATE_FORMAT).date():
    #     logger.debug("此基金开始日期[%r],太新了,不具备分析价值")
    #     return None

    start_year = fund.start_date.year
    end_year = datetime.now().date().year

    periods = []
    for year in range(start_year, end_year + 1):
        periods += date_utils.get_peroid(year, period)

    trade_data = data_utils.load_fund_data(fund.code)
    if trade_data is None:
        return None

    data = filter_trade_by_period(trade_data, periods)
    logger.debug("过滤出%d条基金净值记录,%r~%r", len(data), data.index[0],
                 data.index[-1])

    bond_interests = data_utils.load_bond_interest_data(data.index)
    bond_interests = calculate_rate(bond_interests, '收盘', period)
    logger.debug("过滤出%d条基准利率记录", len(bond_interests))

    # assert len(data) == len(bond_interests), "基金净值数据 ~ 基准利率 个数不一致"
    sharpe_ratio = calculate_sharpe(data, bond_interests, period)

    return sharpe_ratio
Ejemplo n.º 3
0
def test_KS_test():
    """
    https://www.cnblogs.com/eat-drink-breathe-hard/p/13798547.html

    As Stijn pointed out, the k-s test returns a D statistic and a p-value corresponding to the D statistic.
    The D statistic is the absolute max distance (supremum) between the CDFs of the two samples.
    The closer this number is to 0 the more likely it is that the two samples were drawn from the same distribution.
    Check out the Wikipedia page for the k-s test.
    It provides a good explanation: https://en.m.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test

    The p-value returned by the k-s test has the same interpretation as other p-values.
    You reject the null hypothesis that the two samples were drawn from the same distribution
    if the p-value is less than your significance level.
    You can find tables online for the conversion of the D statistic into a p-value if you are interested in the procedure.
    """
    data = data_utils.load_fund_data('519778')
    data = data[[const.COL_DAILY_RATE]]
    data = data.dropna()
    test_stat = stats.kstest(data, 'norm', args=(data.mean(), data.std()))
    logger.debug("KS检验结果:%r", test_stat)
    if test_stat.pvalue < 0.05:
        logger.debug("KS检验%d条数据,p=%.2f<0.05,拒绝原假设(是正态分布),不是正态分布", len(data),
                     test_stat.pvalue)
    else:
        logger.debug("KS检验%d条数据,p=%.2f>0.05,不拒绝原假设(是正态分布),应该是正态分布", len(data),
                     test_stat.pvalue)
Ejemplo n.º 4
0
def main(args):
    codes = args.codes.split(",")
    data_list = [data_utils.load_fund_data(code) for code in codes]
    data = data_utils.merge_by_date(data_list,
                                    [COL_DAILY_RATE] * len(data_list), codes)
    calculated_data = calculate(data, args.sample)
    show(calculated_data)
Ejemplo n.º 5
0
    def crawle_one(self, code, force=False, period=None):
        total_data = load_fund_data(code)
        if force:
            logger.info("强制重新爬取 基金[%s]", code)
            total_data = None

        start_date, end_date = self.get_start_end_date(code, total_data)

        if start_date is None and end_date is None:
            logger.info("爬取失败[%s],原因:无法获得起止日期", code)
            return

        if start_date == end_date:
            logger.info("无需爬取[%s],原因:开始和结束日期[%r]一样", code, start_date)
            return

        logger.info("准备爬取 [%s] --> [%s] 的数据", start_date, end_date)

        page_num = self.get_page_num(code, start_date, end_date)

        for i in range(1, page_num + 1):

            html = self.get_content(code, i, NUM_PER_PAGE, start_date,
                                    end_date)

            data = self.parse_html(html)

            if data is None:
                continue

            # 修改数据类型
            data[COL_DATE] = pd.to_datetime(data[COL_DATE],
                                            format=const.DATE_FORMAT)
            data.set_index([COL_DATE],
                           inplace=True)  # 这里需要提前设置一下index,为了和旧数据兼容
            data[COL_UNIT_NET] = data[COL_UNIT_NET].astype(float)
            data[COL_ACCUMULATIVE_NET] = data[COL_ACCUMULATIVE_NET].astype(
                float)
            data[COL_DAILY_RATE] = data[COL_DAILY_RATE].str.strip('%').astype(
                float)

            if total_data is None:
                total_data = data
                logger.debug("基金[%s]不存在,创建[%d]条", code, len(data))
            else:
                total_data = total_data.append(data)
                logger.debug("追加[%d]条到基金[%s]中,合计[%d]条", len(data), code,
                             len(total_data))

            time.sleep(random.random() * 1)
            logger.info("已爬完第%d页数据,准备爬取第%d页", i, i + 1)

        if total_data is None:
            logger.error("代码 [%s] 爬取失败!!!")
            return

        data_path = save_fund_data(code, total_data)
        logger.info("保存%d行所有数据,到[%s]中", len(total_data), data_path)
Ejemplo n.º 6
0
def random_caculate(args):
    files = os.listdir(const.FUND_DATA_DIR)
    random.shuffle(files)

    if args.code:
        num = 1
        files = [args.code + ".csv"]
    else:
        num = args.num

    result = None
    counter = 0

    for f in files:
        code, _ = os.path.splitext(f)
        data = data_utils.load_fund_data(code)

        if data is None: continue

        if data.index[0] > date_utils.str2date(args.start) or \
                data.index[-1] < date_utils.str2date(args.end):
            continue

        # logger.debug("start:%r/%r",data.index[0], date_utils.str2date(args.start))
        # logger.debug("end:%r/%r", data.index[-1], date_utils.str2date(args.end))

        if counter > num: break

        data = data[[const.COL_DAILY_RATE]]  # only left rate col
        data.columns = [code]  # give him a name

        if result is None:
            result = data
        else:
            result = pd.concat([data, result], axis=1)
            result = result.dropna(how="any", axis=0)

            # logger.debug("-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-")
            # logger.debug("结果2:%r", result)

        counter += 1

    # logger.debug("最终结果:\n%r", result)
    logger.debug("=============================================")
    logger.debug("描述    :\n%r", result.describe())
    logger.debug("=============================================")
    logger.debug('信息    :\n%r', result.info())
    logger.debug("=============================================")
    logger.debug('协方差  :\n%r', result.cov())
    logger.debug("=============================================")
    logger.debug('相关系数:\n%r', result.corr())
    # plot(result[const.COL_DAILY_RATE])
    logger.debug("=============================================")
    logger.debug("从[%d]个基金中筛出[%d]个,跨[%d]天,叠加偏度:\n%r", len(files), counter,
                 len(result), result.skew())
Ejemplo n.º 7
0
    def load_data(self, args):
        data = data_utils.load_fund_data(args.code)
        if data is None:
            raise ValueError("数据不存在,代码:" + args.code)

        index_data = data_utils.load_index_data_by_name('上证指数')
        index_rate = data_utils.calculate_rate(index_data, 'close')

        self.load_info(args.code)

        return data, index_data, index_rate
Ejemplo n.º 8
0
def test_shapiro_test():
    """
    https://zhuanlan.zhihu.com/p/26539771
    https://www.jianshu.com/p/e202069489a6
    测试是不是符合正太分布
    """
    data = data_utils.load_fund_data('519778').iloc[0:50]
    W, p_value = stats.shapiro(data[const.COL_DAILY_RATE])
    # p_value
    if p_value < 0.05:
        logger.debug("W=%.2f,p=%.2f,拒绝原假设(是正态分布),不是正态分布", W, p_value)
    else:
        logger.debug("W=%.2f,p=%.2f,接受原假设(正态分布),是正态分布", W, p_value)
Ejemplo n.º 9
0
def main(args):
    data = data_utils.load_fund_data(args.code)

    if data is None: return

    data = data.loc[args.start:args.end]
    start_time = time.time()

    invest_data = filter_invest_by(data, args.period, args.day)
    # print(invest_data.info())
    # print(invest_data.describe())
    # print(invest_data)

    price_of_last_day = data[[COL_ACCUMULATIVE_NET]].iloc[-1]

    # print(invest_data.info())
    logger.debug("最后一天[%s]的价格为:%.2f", invest_data.index[-1], price_of_last_day)
    profit_percentage = invest(invest_data, price_of_last_day)
    logger.info("代码[%s] 按[%s]定投 %d 次, [%s] -> [%s] 定投收益率: %.3f%%, 耗时: %.2f",
                args.code, PERIOD_NAMES[args.period], len(invest_data),
                args.start, args.end, profit_percentage * 100 - 100,
                time.time() - start_time)
Ejemplo n.º 10
0
def main(code, threshold):
    data = data_utils.load_fund_data(code)
    data = data[[const.COL_ACCUMULATIVE_NET]]

    # data_mean = resample('1W', how='mean').fillna(0)

    exp_smooth_data = exponential_smoothing(data, alpha=0.1)
    show_plot(x_data=exp_smooth_data.index, y_data=exp_smooth_data, color='y')

    data_diff1 = exp_smooth_data.diff(1)
    data_diff2 = exp_smooth_data.diff(2)  # data_diff1.diff(1)
    show_plot(x_data=data.index, y_data=data, color='b')
    show_plot(x_data=data_diff1.index, y_data=data_diff1, color='r')
    show_plot(x_data=data_diff2.index, y_data=data_diff2, color='g')

    up = data_diff2[data_diff2['value'] > threshold]
    down = data_diff2[data_diff2['value'] < -threshold]

    ax1 = plt.gca()
    ax1.scatter(data.loc[up.index].index, data.loc[up.index], color='g')
    ax1.scatter(data.loc[down.index].index, data.loc[down.index], color='r')

    plt.show()
Ejemplo n.º 11
0
def main(args):
    codes = args.codes.split(",")
    fund_list = [data_utils.load_fund(code) for code in codes]
    fund_data_list = [data_utils.load_fund_data(code) for code in codes]
    calculate(fund_data_list, fund_list)
Ejemplo n.º 12
0
def main(fund_list, start=None):
    funds = [data_utils.load_fund_data(f) for f in fund_list]
    calculate(funds, start)