Python Utilities.read_csv 예제들, Common.Utilities.read_csv Python 예제들

예제 #1

0

파일 보기

파일: UnitTest.py 프로젝트: dxcv/fQuant

def fileCompare(src_fullpath, tar_fullpath):
    # Check File Existence
    if not u.hasFile(src_fullpath):
        print('Source File %s Does Not Exist' % src_fullpath)
        raise SystemExit
    if not u.hasFile(tar_fullpath):
        print('Target File %s Does Not Exist' % tar_fullpath)
        raise SystemExit

    # Load Data Files
    src = u.read_csv(src_fullpath)
    tar = u.read_csv(tar_fullpath)
    src_row_number = len(src)
    tar_row_number = len(tar)
    src_col_number = len(src.columns)
    tar_col_number = len(tar.columns)
    matched = True
    print('File Compare Start: %s vs %s' % (src_fullpath, tar_fullpath))
    if src_row_number != tar_row_number:
        matched = False
        print('Row Number Un-matched')
    elif src_col_number != tar_col_number:
        matched = False
        print('Col Number Un-matched')
    else:
        for i in range(src_row_number):
            for j in range(src_col_number):
                if src.iloc[i,j] != tar.iloc[i,j]:
                    matched = False
                    print('Element(%s,%s) Un-matched' % (i,j))
    print('File Compare End: %s' % ('Matched' if matched else 'Un-Matched'))

    # Return Result
    return matched

예제 #2

0

파일 보기

def analyzePriceFollow(target_date, stock_id, is_index, threshold):
    file_postfix = 'Timing_%s_%s' % (u.stockFileName(stock_id,
                                                     is_index), threshold)
    timing = u.read_csv(c.path_dict['strategy'] + file_postfix + '.csv',
                        encoding='gbk')
    timing_number = len(timing)

    # Find the matched timing date and trend
    timing_index = -1
    for i in range(timing_number):
        date = dt.datetime.strptime(timing.ix[i, 'date'], '%Y-%m-%d').date()
        if date <= target_date:
            timing_index = i
        else:
            break

    # Report results
    if timing_index != -1:
        date = dt.datetime.strptime(timing.ix[timing_index, 'date'],
                                    '%Y-%m-%d').date()
        trend = timing.ix[timing_index, 'trend']
        if date == target_date:  # Given target_date is Timing Date
            print('Date', target_date, ': Trend of',
                  u.stockFileName(stock_id, is_index), 'Goes', trend)
        else:
            print('Date', target_date, ': Trend of',
                  u.stockFileName(stock_id, is_index),
                  'Does Not Change, Still', trend)
    else:
        print('Date', target_date, ': Trend of',
              u.stockFileName(stock_id, is_index),
              'Not Available, No Timing Data')

예제 #3

0

파일 보기

파일: CoefficientStrategy.py 프로젝트: dxcv/fQuant

def extractRollingBeta(postfix):
    # Load Rolling Coefficient
    fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % postfix
    coef = u.read_csv(fullpath)
    if u.isNoneOrEmpty(coef):
        print('Require Coefficient File: %s!' % fullpath)
        return False

    # Extract Rolling Beta
    row_number = len(coef)
    beta = u.createDataFrame(row_number, ['date', 'beta'])
    beta['date'] = coef['date']
    for column in coef.columns:
        if len(column) >= 4 and column[0:4] == 'beta':
            beta[column] = coef[column]

    # Calculate Rolling Beta Average
    beta_number = len(beta.columns) - 2
    for i in range(row_number):
        beta_avg = 0.0
        beta_count = 0
        for j in range(beta_number):
            b = beta.ix[i, beta.columns[j + 2]]
            if not np.isnan(b):
                beta_avg = beta_avg + b
                beta_count = beta_count + 1
        if beta_count > 0:
            beta.ix[i, 'beta'] = beta_avg / float(beta_count)

    beta.set_index('date', inplace=True)
    postfix = '_'.join([postfix, 'Beta'])
    u.to_csv(beta, c.path_dict['strategy'], c.file_dict['strategy'] % postfix)

예제 #4

0

파일 보기

파일: Common.py 프로젝트: dxcv/fQuant

def loadAllIndex():
    '''
    函数功能：
    --------
    加载所有指数列表。

    输入参数：
    --------
    无

    输出参数：
    --------
    加载成功时，index_ids : pandas.Series, 所有指数列表。
    加载失败时，None
    '''
    # Load Local Cache
    file_postfix = '_'.join(['Common', 'AllIndex'])
    fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % file_postfix
    allindex = u.read_csv(fullpath)
    if not u.isNoneOrEmpty(allindex):
        allindex['code'] = allindex['code'].map(lambda x: str(x).zfill(6))
        return allindex['code']

    print('Failed to Load File: %s!' % fullpath)
    return None

예제 #5

0

파일 보기

파일: GetReference.py 프로젝트: dxcv/fQuant

def getRZRQDetailsSH():
    # Download RZRQ Stock Data of SH Market
    rzrq_sh_details = pd.DataFrame()
    rzrq_sh = u.read_csv(c.fullpath_dict['rzrq'] % 'Market_SH')
    date_number = len(rzrq_sh)
    for i in range(date_number):
        date = rzrq_sh.ix[i,'date']
        rzrq = get_rzrq_sh_details(date, date)
        print(rzrq.head(10))
        if i == 0:
            rzrq_sh_details = pd.DataFrame.copy(rzrq)
        else:
            rzrq_sh_details = pd.concat([rzrq_sh_details, rzrq])
        print(rzrq_sh_details.head(10))
        rzrq.set_index('date', inplace=True)
        if not u.isNoneOrEmpty(rzrq):
            u.to_csv(rzrq, c.path_dict['rzrq'], c.file_dict['rzrq'] % ('Details_SH_%s'%date))

    # Process RZRQ Stock Data of SH Market
    rzrq_sh_details.set_index('date', inplace=True)
    rzrq_sh_details.sort_index(ascending=True,inplace=True)
    if gs.is_debug:
        print(rzrq_sh_details.head(10))

    # Save to CSV File
    if not u.isNoneOrEmpty(rzrq_sh_details):
        u.to_csv(rzrq_sh_details, c.path_dict['rzrq'], c.file_dict['rzrq'] % 'Details_SH')

예제 #6

0

파일 보기

파일: Common.py 프로젝트: dxcv/fQuant

def loadSamplePriceAllIndex(benchmark_id, period):
    '''
    函数功能：
    --------
    根据基准指数的时间范围和采样周期，加载所有指数的收盘价格采样文件。

    输入参数：
    --------
    benchmark_id : string, 指数代码 e.g. '000300'，隐含起止时间。
    period : string, 采样周期 e.g. 'M'，支持'D', 'W', and 'M'。

    输出参数：
    --------
    加载成功时，allprice : pandas.DataFrame, 所有指数的收盘价格采样结果。
    加载失败时，None
    '''
    # Check if AllPrice File Already Exists
    file_postfix = '_'.join(
        ['Common', 'AllPrice', benchmark_id, period, 'AllIndex'])
    fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % file_postfix
    allprice = u.read_csv(fullpath)
    if not u.isNoneOrEmpty(allprice):
        return allprice

    print('Failed to Load File: %s!' % fullpath)
    return None

예제 #7

0

파일 보기

def loadHPE(stock_id, period, ratio):
    key = 'hpe' if ratio == 'PE' else 'hep'
    path = c.path_dict[key] % period
    file = c.file_dict[key] % (period, stock_id)

    df = u.read_csv(path + file)
    return df

예제 #8

0

파일 보기

파일: GetReference.py 프로젝트: dxcv/fQuant

def mergeRZRQMarket():
    rzrq_sh = u.read_csv(c.fullpath_dict['rzrq'] % 'Market_SH')
    rzrq_sz = u.read_csv(c.fullpath_dict['rzrq'] % 'Market_SZ')
    rzrq = pd.merge(rzrq_sh, rzrq_sz, how='inner', on='date')
    # Combine data from both market
    rzrq_columns = ['rzye', 'rzmre', 'rqyl', 'rqylje', 'rqmcl', 'rzrqye'] 
    for column in rzrq_columns:
        rzrq[column] = 0.0
    rzrq_number = len(rzrq)
    for row in range(rzrq_number):
        for col in rzrq_columns:
            rzrq.ix[row, col] = rzrq.ix[row, col+'_sh'] + rzrq.ix[row, col+'_sz']
    rzrq.set_index('date',inplace=True)
    rzrq.sort_index(ascending=True,inplace=True)
    if gs.is_debug:
        print(rzrq.head(10))
    if not u.isNoneOrEmpty(rzrq):
        u.to_csv(rzrq, c.path_dict['rzrq'], c.file_dict['rzrq'] % 'Market_Total')

예제 #9

0

파일 보기

def loadStockBasics():
    # Ensure data file is available
    fullpath = c.fullpath_dict['basics']
    if not u.hasFile(fullpath):
        print('Require Stock Basics: %s!' % fullpath)
        return None

    basics = u.read_csv(fullpath)
    return basics

예제 #10

0

파일 보기

def loadFinanceSummary(stock_id):
    # Ensure data file is available
    fullpath = c.fullpath_dict['finsum'] % stock_id
    if not u.hasFile(fullpath):
        print('Require Finance Summary of %s!' % fullpath)
        return None

    fs = u.read_csv(fullpath)
    return fs

예제 #11

0

파일 보기

파일: CoefficientStrategy.py 프로젝트: dxcv/fQuant

def loadCoefficient(postfix, completeness_threshold):
    # Load Coefficient File
    fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % '_'.join(
        [postfix, completeness_threshold])
    allcoef = u.read_csv(fullpath)
    if u.isNoneOrEmpty(allcoef):
        print('Require Coefficient File: %s!' % fullpath)
        return None

    return allcoef

예제 #12

0

파일 보기

def plot_index(index_name, benchmark_name):
    # Load Index Data File
    index_path = c.path_dict['index']
    index_file = c.file_dict['index_r'] % index_name
    df = u.read_csv(index_path + index_file)

    # Plot Figure
    fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)
    fig.set_size_inches(32, 18)

    # Define Font
    font = {
        'family': 'serif',
        'color': 'black',
        'weight': 'normal',
        'size': 18,
    }

    # Plot Sub-figure 1
    title = '%s vs. %s' % (index_name, benchmark_name)
    ax1.set_title(title, fontdict=font)
    ax1.set_xlabel('', fontdict=font)
    ax1.set_ylabel('Ratio', fontdict=font)
    for column in ['ratio', 'b_ratio']:
        df.plot(x='date', y=column, ax=ax1)

    # Plot Sub-figure 2
    title = 'Index %s' % index_name
    ax2.set_title(title, fontdict=font)
    ax2.set_xlabel('', fontdict=font)
    ax2.set_ylabel('Close Price', fontdict=font)
    df.plot(x='date', y='index', ax=ax2)

    # Plot Sub-figure 3
    title = 'Index %s' % benchmark_name
    ax3.set_title(title, fontdict=font)
    ax3.set_xlabel('', fontdict=font)
    ax3.set_ylabel('Close Price', fontdict=font)
    df.plot(x='date', y='b_index', ax=ax3)

    # Common Format for Both Sub-figures
    for ax in [ax1, ax2, ax3]:
        ax.grid(True)
    fig.autofmt_xdate()
    fig.tight_layout()
    plt.setp(plt.gca().get_xticklabels(), rotation=30)
    plt.show()

    # Save Figure
    fig_key = 'fig_index'
    fig_path = c.path_dict[fig_key]
    fig_file = c.file_dict[fig_key] % (index_name + '_' +
                                       u.dateToStr(u.today()))
    u.saveFigure(fig, fig_path, fig_file)

예제 #13

0

파일 보기

파일: GetTrading.py 프로젝트: dxcv/fQuant

def loadDailyHFQ(stock_id, is_index):
    fullpath = c.fullpath_dict['lshq'] % u.stockFileName(stock_id, is_index)

    # Ensure data file is available
    if not u.hasFile(fullpath):
        print('Require LSHQ of %s!' % u.stockFileName(stock_id, is_index))
        return None

    # Read data file
    df = u.read_csv(fullpath)
    return df

예제 #14

0

파일 보기

파일: Index.py 프로젝트: dxcv/fQuant

def load_component(index_name):
    fullpath = c.path_dict['index'] + c.file_dict['index_c'] % index_name

    # Ensure data file is available
    if not u.hasFile(fullpath):
        print('Require Index Component of %s!' % index_name)
        return None

    # Read data file
    df = u.read_csv(fullpath)
    return df

예제 #15

0

파일 보기

파일: GetReference.py 프로젝트: dxcv/fQuant

def mergeRZRQMarketSZ(files_number):
    rzrq_sz = pd.DataFrame()
    for index in range(files_number):
        rzrq = u.read_csv(c.fullpath_dict['rzrq'] % ('Market_SZ_%s'%index))
        if index == 0:
            rzrq_sz = pd.DataFrame.copy(rzrq)
        else:
            rzrq_sz = pd.concat([rzrq_sz, rzrq])
    rzrq_sz.set_index('date',inplace=True)
    rzrq_sz.sort_index(ascending=True,inplace=True)
    if gs.is_debug:
        print(rzrq_sz.head(10))
    if not u.isNoneOrEmpty(rzrq_sz):
        u.to_csv(rzrq_sz, c.path_dict['rzrq'], c.file_dict['rzrq'] % 'Market_SZ')

예제 #16

0

파일 보기

def plot_index_series(index_names, series_name, benchmark_name):
    # Load Index Data Files
    series_path = c.path_dict['index']
    series_file = c.file_dict['index_r'] % series_name
    df = u.read_csv(series_path + series_file)

    # Plot Figure
    fig = plt.figure(figsize=(32, 18), dpi=72, facecolor="white")
    axes = plt.subplot(111)
    axes.cla()  # Clear Axes

    # Define Font
    font = {
        'family': 'serif',
        'color': 'black',
        'weight': 'normal',
        'size': 18,
    }

    # Plot Sub-figure 1
    title = '%s vs. %s' % (series_name, benchmark_name)
    plt.title(title, fontdict=font)
    axes.set_xlabel('', fontdict=font)
    axes.set_ylabel('Ratio', fontdict=font)
    df.plot(x='date',
            y='ratio_benchmark',
            ax=axes,
            color='grey',
            lw=2.0,
            ls='--')
    index_number = len(index_names)
    for i in range(index_number):
        index_name = index_names[i]
        column = 'ratio_' + index_name
        df.plot(x='date', y=column, ax=axes)

    # Common Format for Both Sub-figures
    axes.grid(True)
    fig.autofmt_xdate()
    fig.tight_layout()
    plt.setp(plt.gca().get_xticklabels(), rotation=30)
    plt.show()

    # Save Figure
    fig_key = 'fig_index'
    fig_path = c.path_dict[fig_key]
    fig_file = c.file_dict[fig_key] % (series_name + '_' +
                                       u.dateToStr(u.today()))
    u.saveFigure(fig, fig_path, fig_file)

예제 #17

0

파일 보기

파일: GetReference.py 프로젝트: dxcv/fQuant

def extractRZRQDetails(market = 'SH'):
    rzrq_details = u.read_csv(c.fullpath_dict['rzrq'] % ('Details_%s' % market))
    stocks = pd.DataFrame({'code':rzrq_details['code']})
    stocks.drop_duplicates(inplace=True)
    stocks.set_index('code',inplace=True)
    stocks_number = len(stocks)
    print('RZRQ Stock Number:', stocks_number)
    for i in range(stocks_number):
        stock_id = stocks.index[i]
        rzrq = rzrq_details[rzrq_details['code'] == stock_id]
        if not u.isNoneOrEmpty(rzrq):
            rzrq.set_index('date',inplace=True)
            rzrq.sort_index(ascending=True,inplace=True)
            rzrq['code'] = rzrq['code'].map(lambda x:str(x).zfill(6))
            # Handle Missing Columns
            if market == 'SH':
#                rzrq['rqylje'] = 
                rzrq['rzrqye'] = rzrq['rzye'] + rzrq['rqylje']
#            elif market == 'SZ':
#                rzrq['rzche'] = 
#                rzrq['rqchl'] = 
            u.to_csv(rzrq, c.path_dict['rzrq'], c.file_dict['rzrq'] % ('Details_%s_%06d' % (market, stock_id)))

예제 #18

0

파일 보기

파일: CoefficientStrategy.py 프로젝트: dxcv/fQuant

def filterCoefficient(postfix, completeness_threshold):
    # Load Coefficient File
    fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % postfix
    allcoef = u.read_csv(fullpath)
    if u.isNoneOrEmpty(allcoef):
        print('Require Coefficient AllCoef File: %s!' % fullpath)
        return False

    # Filter Out Stocks without Sufficient Data
    threshold = float(completeness_threshold.replace('%', ''))
    allcoef['completeness'] = allcoef['completeness'].map(
        lambda x: x.replace('%', ''))
    allcoef['completeness'] = allcoef['completeness'].astype(float)
    allcoef = allcoef[allcoef.completeness >= threshold]
    allcoef['completeness'] = allcoef['completeness'].map(lambda x:
                                                          ('%.2f' % (x)) + '%')
    allcoef.set_index('code', inplace=True)

    # Save to CSV File
    file_postfix = '_'.join([postfix, completeness_threshold])
    u.to_csv(allcoef, c.path_dict['strategy'],
             c.file_dict['strategy'] % file_postfix)

    return True

예제 #19

0

파일 보기

def mergePriceFollow(stock_list, is_index, threshold_list):
    stock_number = len(stock_list)
    if stock_number < 1:
        print('Stock Number:', stock_number)
        raise SystemExit

    threshold_number = len(threshold_list)
    if threshold_number < 1:
        print('Threshold Number:', threshold_number)
        raise SystemExit

    # Init Price Follow Statistics for All Indexes
    stats_columns = ['date', 'index']
    for i in range(1, threshold_number - 1):
        stats_columns.append('wpredict_%s' % threshold_list[i])
        stats_columns.append('wtrend_%s' % threshold_list[i])
    stats = u.createDataFrame(stock_number, stats_columns)

    for s in range(stock_number):
        stock_id = stock_list[s]
        # Load Results from Different Threshold
        dfs = []
        for i in range(threshold_number):
            threshold = threshold_list[i]
            file_postfix = 'PriceFollow_%s_%s' % (u.stockFileName(
                stock_id, is_index), threshold)
            fullpath = c.path_dict[
                'strategy'] + c.file_dict['strategy'] % file_postfix
            df = u.read_csv(fullpath)
            dfs.append(df)
        # Compose Final Results
        drop_columns = [
            'trend', 'trend_high', 'trend_low', 'trend_ref', 'trend_price',
            'predict', 'confirm'
        ]
        df = dfs[0].drop(drop_columns, axis=1)
        for i in range(threshold_number):
            threshold = threshold_list[i]
            column = 'trend'
            df[column + '_%s' % threshold] = dfs[i][column]
        for i in range(threshold_number):
            threshold = threshold_list[i]
            column = 'trend_price'
            df[column + '_%s' % threshold] = dfs[i][column]
        for i in range(threshold_number):
            threshold = threshold_list[i]
            column = 'predict'
            df[column + '_%s' % threshold] = dfs[i][column]
        for i in range(threshold_number):
            threshold = threshold_list[i]
            column = 'confirm'
            df[column + '_%s' % threshold] = dfs[i][column]
        # Weighted Predict Columns
        cutoff = 0.0  # Optimized cutoff for weighted predict
        for i in range(1, threshold_number - 1):
            t_prev = threshold_list[i - 1]
            t_curr = threshold_list[i]
            t_next = threshold_list[i + 1]
            t_total = t_prev + t_curr + t_next
            column_postfix = '_%s' % t_curr
            df['wpredict' + column_postfix] = np.nan
            df['wtrend' + column_postfix] = np.nan
            row_number = len(df)
            for j in range(1, row_number):
                wpredict = 0.0
                for t in [t_prev, t_curr, t_next]:
                    wpredict = wpredict + t * df.ix[j, 'predict' + '_%s' % t]
                wpredict = wpredict / t_total
                df.ix[j, 'wpredict' + column_postfix] = wpredict
                df.ix[j, 'wtrend' +
                      column_postfix] = 'Up' if wpredict >= cutoff else 'Down'

        # Fill One Row of Statistics
        last_index = len(df) - 1
        stats.ix[s, 'date'] = df.ix[last_index, 'date']
        stats.ix[s, 'index'] = stock_id
        for i in range(1, threshold_number - 1):
            column_postfix = '_%s' % threshold_list[i]
            stats.ix[s, 'wpredict' +
                     column_postfix] = df.ix[last_index,
                                             'wpredict' + column_postfix]
            stats.ix[s,
                     'wtrend' + column_postfix] = df.ix[last_index, 'wtrend' +
                                                        column_postfix]

        # Format Columns
        df.set_index('date', inplace=True)
        # Save to CSV File
        file_postfix = 'PriceFollow_%s_All' % u.stockFileName(
            stock_id, is_index)
        u.to_csv(df, c.path_dict['strategy'],
                 c.file_dict['strategy'] % file_postfix)

    # Format Columns
    stats.set_index('date', inplace=True)
    # Save to CSV File
    file_postfix = 'PriceFollow_Statistics'
    u.to_csv(stats, c.path_dict['strategy'],
             c.file_dict['strategy'] % file_postfix)

예제 #20

0

파일 보기

def loadCXG():
    cxg = u.read_csv(c.fullpath_dict['cxg'])
    return cxg

예제 #21

0

파일 보기

def loadStockList(cutoff_date):
    stocks = u.read_csv(c.fullpath_dict['stock_list'] % cutoff_date)
    return stocks

예제 #22

0

파일 보기

파일: HistoricalPE.py 프로젝트: dxcv/fQuant

def calc_hpe(stock_id, period, ratio):
    '''
    函数功能：
    --------
    逐周期计算历史市盈率。
    假定：逐周期前复权数据，Finance Summary数据已经下载或计算完成，并存储成为CSV文件。

    输入参数：
    --------
    stock_id : string, 股票代码 e.g. 600036
    period : string, 采样周期 e.g. 'W', 'M', 'Q'

    输出参数：
    --------
    DataFrame
        date 周期截止日期（为周期最后一天） e.g. 2005-03-31
        high 周期最高价
        close 周期收盘价
        low 周期最低价
        eps 周期末每股收益（可能有数据缺失）
        eps_filled 根据邻近周期推算出的周期末每股收益
        eps_rolling 根据周期末每股收益（含推算），折算的年度预期每股收益
        pe_high 根据周期最高价，计算出的市盈率
        pe_close 根据周期收盘价，计算出的市盈率
        pe_low 根据周期最低价，计算出的市盈率

    '''

    # Check Input Parameters
    if not isinstance(stock_id, str) or not isinstance(period, str):
        print('Incorrect type of one or more input parameters!')
        raise SystemExit

    # Check Period
    period_types = ['W', 'M', 'Q']
    if not period in period_types:
        print('Un-supported period type - should be one of:', period_types)
        raise SystemExit

    # Check Ratio
    ratio_types = ['PE', 'EP']
    if not ratio in ratio_types:
        print('Un-supported ratio type - should be one of:', ratio_types)
        raise SystemExit

    # Ensure Stock QFQ Data File is Available
    qfq_path = c.path_dict['qfq'] % period
    qfq_file = c.file_dict['qfq'] % (period, stock_id)
    qfq_fullpath = qfq_path + qfq_file
    if not u.hasFile(qfq_fullpath):
        print('Require stock QFQ file:', (qfq_fullpath))
        raise SystemExit

    # Ensure Stock Finance Summary Data File is Available
    fs_fullpath = c.fullpath_dict['finsum'] % stock_id
    if not u.hasFile(fs_fullpath):
        print('Require stock finance summary file:', (fs_fullpath))
        raise SystemExit

    #
    # Load QFQ Data
    #

    qfq = u.read_csv(qfq_fullpath)
    qfq.set_index('date', inplace=True)
    qfq.sort_index(ascending=True, inplace=True)
    if gs.is_debug:
        print(qfq.head(10))

    # Check empty QFQ data
    qfq_number = len(qfq)
    if qfq_number == 0:
        print('Stock QFQ data length is 0!')
        raise SystemExit

    # Handle stop-trading period (by filling with previous period data)
    # Assume: qfq data has been sorted ascendingly by date.
    for i in range(qfq_number):
        if i > 0 and np.isnan(qfq.iloc[i]['close']):
            if gs.is_debug:
                print('close = ', qfq.iloc[i]['close'])
            if np.isnan(qfq.iloc[i - 1]
                        ['close']):  # Ignore leading stop-trading periods
                continue
            else:  # Regular internal stop-trading periods
                for column in qfq.columns:
                    qfq.iloc[i][column] = qfq.iloc[i - 1][column]

    #
    # Load Finance Summary Data
    #

    fs = u.read_csv(fs_fullpath)
    fs.set_index('date', inplace=True)
    fs.sort_index(ascending=True, inplace=True)
    if gs.is_debug:
        print(fs.head(10))

    # Check empty Finance Summary data
    fs_number = len(fs)
    if fs_number == 0:
        print('Stock finance summary data length is 0!')
        raise SystemExit

    #
    # Generate Rolling EPS for Each Quarter
    #

    eps_index = []
    date_start = u.dateFromStr(qfq.index[0])  # First element
    date_end = u.dateFromStr(qfq.index[-1])  # Last element
    year_start = date_start.year
    year_end = date_end.year
    for year in range(year_start, year_end + 1):
        for quarter in range(1, 5):
            date = u.quarterDateStr(year, quarter)
            eps_index.append(date)
    if gs.is_debug:
        print(eps_index)

    eps_columns = ['eps', 'eps_filled', 'eps_rolling']
    eps_columns_number = len(eps_columns)
    eps_index_number = len(eps_index)

    # Init all elements to NaN
    data_init = np.random.randn(eps_index_number * eps_columns_number)
    for i in range(eps_index_number * eps_columns_number):
        data_init[i] = np.nan
    eps = pd.DataFrame(data_init.reshape(eps_index_number, eps_columns_number),
                       index=eps_index,
                       columns=eps_columns)

    # Inherite EPS from finance summary
    for i in range(eps_index_number):
        index = eps.index[i]
        if index in fs.index:  # Has EPS data
            eps.iloc[i]['eps'] = fs.loc[index, 'eps']
        else:  # Missing EPS data
            eps.iloc[i]['eps'] = np.nan

    # Fill the Missing EPS Data
    for year in range(year_start, year_end + 1):
        index_q1 = u.quarterDateStr(year, 1)
        index_q2 = u.quarterDateStr(year, 2)
        index_q3 = u.quarterDateStr(year, 3)
        index_q4 = u.quarterDateStr(year, 4)
        eps_q1 = eps.loc[index_q1, 'eps']
        eps_q2 = eps.loc[index_q2, 'eps']
        eps_q3 = eps.loc[index_q3, 'eps']
        eps_q4 = eps.loc[index_q4, 'eps']
        if gs.is_debug:
            print('eps_q1 =', eps_q1, 'eps_q2 =', eps_q2, 'eps_q3 =', eps_q3,
                  'eps_q4 =', eps_q4)

        eps_q1_filled = eps_q1
        eps_q2_filled = eps_q2
        eps_q3_filled = eps_q3
        eps_q4_filled = eps_q4

        if (np.isnan(eps_q1)):
            if (not np.isnan(eps_q2)):
                eps_q1_filled = eps_q2 * 0.5
            elif (not np.isnan(eps_q3)):
                eps_q1_filled = eps_q3 * 0.3333333333333333
            elif (not np.isnan(eps_q4)):
                eps_q1_filled = eps_q4 * 0.25
        if (np.isnan(eps_q2)):
            if (not np.isnan(eps_q1)):
                eps_q2_filled = eps_q1 * 2.0
            elif (not np.isnan(eps_q3)):
                eps_q2_filled = eps_q3 * 0.6666666666666667
            elif (not np.isnan(eps_q4)):
                eps_q2_filled = eps_q4 * 0.5
        if (np.isnan(eps_q3)):
            if (not np.isnan(eps_q2)):
                eps_q3_filled = eps_q2 * 1.5
            elif (not np.isnan(eps_q1)):
                eps_q3_filled = eps_q1 * 3.0
            elif (not np.isnan(eps_q4)):
                eps_q3_filled = eps_q4 * 0.75
        if (np.isnan(eps_q4)):
            if (not np.isnan(eps_q3)):
                eps_q4_filled = eps_q3 * 1.333333333333333
            elif (not np.isnan(eps_q2)):
                eps_q4_filled = eps_q2 * 2.0
            elif (not np.isnan(eps_q1)):
                eps_q4_filled = eps_q1 * 4.0
        if gs.is_debug:
            print('eps_q1_filled =', eps_q1_filled, 'eps_q2_filled =',
                  eps_q2_filled, 'eps_q3_filled =', eps_q3_filled,
                  'eps_q4_filled =', eps_q4_filled)

        eps.loc[index_q1, 'eps_filled'] = eps_q1_filled
        eps.loc[index_q2, 'eps_filled'] = eps_q2_filled
        eps.loc[index_q3, 'eps_filled'] = eps_q3_filled
        eps.loc[index_q4, 'eps_filled'] = eps_q4_filled

    # Calculate Rolling EPS
    rolling_ratio = [4.0, 2.0, 1.333333333333333, 1.0]
    for year in range(year_start, year_end + 1):
        for quarter in range(1, 5):
            index = u.quarterDateStr(year, quarter)
            eps_filled = eps.loc[index, 'eps_filled']
            eps.loc[index,
                    'eps_rolling'] = eps_filled * rolling_ratio[quarter - 1]

    if gs.is_debug:
        print(eps.head(10))

    #
    # Calculate HPE based on given period
    #

    # Drop un-used columns
    hpe = qfq.drop(['open', 'volume', 'amount'], axis=1)

    # Add columns to hpe
    if ratio == 'PE':
        for column in [
                'eps', 'eps_filled', 'eps_rolling', 'pe_high', 'pe_close',
                'pe_low'
        ]:
            hpe[column] = np.nan
    else:
        for column in [
                'eps', 'eps_filled', 'eps_rolling', 'ep_high', 'ep_close',
                'ep_low'
        ]:
            hpe[column] = np.nan

    # Calculate Historical P/E or E/P Ratio
    hpe_number = len(hpe)
    for i in range(hpe_number):
        index = hpe.index[i]  # 'YYYY-mm-dd'
        index_date = u.dateFromStr(index)  # datetime.date(YYYY-mm-dd)
        index_quarter = u.quarterDateStr(
            index_date.year, u.quarterOfDate(index_date))  # 'YYYY-mm-dd'
        for column in ['eps', 'eps_filled', 'eps_rolling']:
            hpe.loc[index, column] = eps.loc[index_quarter, column]

    if ratio == 'PE':
        # Calculate Historical P/E Ratio
        price = {'pe_close': 'close', 'pe_high': 'high', 'pe_low': 'low'}
        for i in range(hpe_number):
            index = hpe.index[i]  # 'YYYY-mm-dd'
            eps_rolling = hpe.iloc[i]['eps_rolling']
            for column in ['pe_close', 'pe_high', 'pe_low']:
                hpe.loc[index,
                        column] = hpe.loc[index, price[column]] / eps_rolling
    else:
        # Calculate Historical E/P Ratio
        price = {'ep_close': 'close', 'ep_high': 'high', 'ep_low': 'low'}
        for i in range(hpe_number):
            index = hpe.index[i]  # 'YYYY-mm-dd'
            eps_rolling = hpe.iloc[i]['eps_rolling']
            for column in ['ep_close', 'ep_high', 'ep_low']:
                hpe.loc[index,
                        column] = eps_rolling / hpe.loc[index, price[column]]

    # Format columns
    for column in hpe.columns:
        hpe[column] = hpe[column].map(lambda x: '%.2f' % x)
        hpe[column] = hpe[column].astype(float)

    return hpe

예제 #23

0

파일 보기

파일: HistoricalPE.py 프로젝트: dxcv/fQuant

def calc_hpe_quarterly(stock_id, year_start, year_end):
    '''
    函数功能：
    --------
    逐季度计算历史市盈率。
    假定：逐季度历史前复权数据，以及逐季度每股收益已经提前获取并存储为CSV文件。

    输入参数：
    --------
    stock_id : string, 股票代码 e.g. 600036
    year_start : int, 起始年度 e.g. 2005
    year_end : int, 终止年度 e.g. 2016

    输出参数：
    --------
    DataFrame
        date 季度截止日期 e.g. 2005-03-31
        close 季度收盘价
        high 季度最高价
        low 季度最低价
        eps 季度末每股收益（可能有数据缺失）
        eps_filled 根据临近季度推算出的，缺失的季度末每股收益
        eps_rolling 根据季度末每股收益（含推算），折算的年度预期每股收益
        pe_close 根据季度收盘价，计算出的市盈率
        pe_high 根据季度最高价，计算出的市盈率
        pe_low 根据季度最低价，计算出的市盈率

    '''

    # Check Input Parameters
    if not isinstance(stock_id, str) \
        or not isinstance(year_start, int) or not isinstance(year_end, int):
        print('Incorrect type of one or more input parameters!')
        raise SystemExit

    if not (year_start <= year_end):
        print('Start year should be no later than end year!')
        raise SystemExit

    # Fetch Stock Data
    stock_data = u.read_csv(c.fullpath_dict['qfq_q'] % stock_id)
    stock_data_number = len(stock_data)
    if stock_data_number != (year_end - year_start + 1) * 4:
        print(
            'The duration of tock data does not match the duration of analysis!'
        )
        raise SystemExit

    stock_data.set_index('date', inplace=True)
    stock_data.sort_index(ascending=True, inplace=True)
    if gs.is_debug:
        print(stock_data.head(10))

    # Handle stop-trading quarter (by filling with previous quarter data)
    # Assume: stock data has been sorted ascendingly by date.
    for i in range(stock_data_number):
        if i > 0 and np.isnan(stock_data.iloc[i]['close']):
            if gs.is_debug:
                print('close = ', stock_data.iloc[i]['close'])
            if np.isnan(stock_data.iloc[i - 1]
                        ['close']):  # Ignore leading stop-trading quarters
                continue
            else:  # Regular internal stop-trading quarters
                for column in stock_data.columns:
                    stock_data.iloc[i][column] = stock_data.iloc[i - 1][column]

    # Fetch Report Data
    report_data = u.read_csv(c.fullpath_dict['finsum'] % stock_id)
    report_data.set_index('date', inplace=True)
    report_data.sort_index(ascending=True, inplace=True)
    if gs.is_debug:
        print(report_data.head(10))

    # Join Stock Data and Report Data (Assume Stock Data is the reference)
    hpe_columns = [
        'close', 'high', 'low', 'eps', 'eps_filled', 'eps_rolling', 'pe_close',
        'pe_high', 'pe_low'
    ]
    hpe_columns_number = len(hpe_columns)
    hpe_index_number = stock_data_number

    # Init all elements to NaN
    data_init = np.random.randn(hpe_index_number * hpe_columns_number)
    for i in range(hpe_index_number * hpe_columns_number):
        data_init[i] = np.nan
    hpe = pd.DataFrame(data_init.reshape(hpe_index_number, hpe_columns_number),
                       index=stock_data.index,
                       columns=hpe_columns)

    # Inherite close/high/low from stock data, and eps from report data
    for i in range(hpe_index_number):
        for column in ['close', 'high', 'low']:
            hpe.iloc[i][column] = stock_data.iloc[i][column]
        index = hpe.index[i]
        if index in report_data.index:  # Has EPS data
            hpe.iloc[i]['eps'] = report_data.loc[index, 'eps']
        else:  # Missing EPS data
            hpe.iloc[i]['eps'] = np.nan

    # Fill the Missing EPS Data
    for year in range(year_start, year_end + 1):
        index_q1 = u.quarterDateStr(year, 1)
        index_q2 = u.quarterDateStr(year, 2)
        index_q3 = u.quarterDateStr(year, 3)
        index_q4 = u.quarterDateStr(year, 4)
        eps_q1 = hpe.loc[index_q1, 'eps']
        eps_q2 = hpe.loc[index_q2, 'eps']
        eps_q3 = hpe.loc[index_q3, 'eps']
        eps_q4 = hpe.loc[index_q4, 'eps']
        if gs.is_debug:
            print('eps_q1 =', eps_q1, 'eps_q2 =', eps_q2, 'eps_q3 =', eps_q3,
                  'eps_q4 =', eps_q4)

        eps_q1_filled = eps_q1
        eps_q2_filled = eps_q2
        eps_q3_filled = eps_q3
        eps_q4_filled = eps_q4

        if (np.isnan(eps_q1)):
            if (not np.isnan(eps_q2)):
                eps_q1_filled = eps_q2 * 0.5
            elif (not np.isnan(eps_q3)):
                eps_q1_filled = eps_q3 * 0.3333333333333333
            elif (not np.isnan(eps_q4)):
                eps_q1_filled = eps_q4 * 0.25
        if (np.isnan(eps_q2)):
            if (not np.isnan(eps_q1)):
                eps_q2_filled = eps_q1 * 2.0
            elif (not np.isnan(eps_q3)):
                eps_q2_filled = eps_q3 * 0.6666666666666667
            elif (not np.isnan(eps_q4)):
                eps_q2_filled = eps_q4 * 0.5
        if (np.isnan(eps_q3)):
            if (not np.isnan(eps_q2)):
                eps_q3_filled = eps_q2 * 1.5
            elif (not np.isnan(eps_q1)):
                eps_q3_filled = eps_q1 * 3.0
            elif (not np.isnan(eps_q4)):
                eps_q3_filled = eps_q4 * 0.75
        if (np.isnan(eps_q4)):
            if (not np.isnan(eps_q3)):
                eps_q4_filled = eps_q3 * 1.333333333333333
            elif (not np.isnan(eps_q2)):
                eps_q4_filled = eps_q2 * 2.0
            elif (not np.isnan(eps_q1)):
                eps_q4_filled = eps_q1 * 4.0
        if gs.is_debug:
            print('eps_q1_filled =', eps_q1_filled, 'eps_q2_filled =',
                  eps_q2_filled, 'eps_q3_filled =', eps_q3_filled,
                  'eps_q4_filled =', eps_q4_filled)

        hpe.loc[index_q1, 'eps_filled'] = eps_q1_filled
        hpe.loc[index_q2, 'eps_filled'] = eps_q2_filled
        hpe.loc[index_q3, 'eps_filled'] = eps_q3_filled
        hpe.loc[index_q4, 'eps_filled'] = eps_q4_filled

    # Calculate Rolling EPS
    rolling_ratio = [4.0, 2.0, 1.333333333333333, 1.0]
    for year in range(year_start, year_end + 1):
        for quarter in range(1, 5):
            index = u.quarterDateStr(year, quarter)
            eps_filled = hpe.loc[index, 'eps_filled']
            hpe.loc[index,
                    'eps_rolling'] = eps_filled * rolling_ratio[quarter - 1]

    # Calculate Historical P/E Ratio
    price = {'pe_close': 'close', 'pe_high': 'high', 'pe_low': 'low'}
    for i in range(hpe_index_number):
        index = hpe.index[i]  # 'YYYY-mm-dd'
        eps_rolling = hpe.iloc[i]['eps_rolling']
        for column in ['pe_close', 'pe_high', 'pe_low']:
            hpe.loc[index,
                    column] = hpe.loc[index, price[column]] / eps_rolling

    # Format columns
    for column in hpe_columns:
        hpe[column] = hpe[column].map(lambda x: '%.2f' % x)
        hpe[column] = hpe[column].astype(float)

    return hpe

예제 #24

0

파일 보기

def loadIndustrySina():
    industry = u.read_csv(c.fullpath_dict['indu_sina'])
    return industry

예제 #25

0

파일 보기

파일: Coefficient.py 프로젝트: dxcv/fQuant

#        strategyCoefficient(benchmark_id, date_start, date_end, period, ratio_method, loadAllIndex(), True, 'AllIndex')

# Analyze Strategy Results
analyze_strategy = False
target_name = 'AllIndex'
#target_name = 'AllStock'
common_postfix = '_'.join(['Coefficient', date_start, date_end, period, ratio_method, target_name, 'vs', benchmark_id])
if analyze_strategy:
    analyzeCoefficient(common_postfix, completeness_threshold, top_number)

# Plot Strategy Results
plot_strategy = False
if plot_strategy:
    path = c.path_dict['strategy']
    file = c.file_dict['strategy'] % '_'.join(['Common', 'AllPrice', benchmark_id, period, 'AllStock'])
    price_allstock = u.read_csv(path+file)
    file = c.file_dict['strategy'] % '_'.join(['Common', 'AllPrice', benchmark_id, period, 'AllIndex'])
    price_allindex = u.read_csv(path+file)

    # Generate Statistics List
    statistics = []
    for coefficient in ['Correlation', 'Beta', 'Alpha']:
        for classification in ['Positive', 'Zero', 'Negative']:
            statistics.append(classification+coefficient)
    print(statistics)

    # Plot Statistics List
    for stats in statistics:
        # Plot statistics
        file = c.file_dict['strategy'] % '_'.join([common_postfix, completeness_threshold, stats])
        data = u.read_csv(path+file)

예제 #26

0

파일 보기

def loadSuspended():
    suspended = u.read_csv(c.fullpath_dict['suspended'])
    return suspended

예제 #27

0

파일 보기

def optimizePriceFollow(stock_list, is_index, threshold_list, method):
    stock_number = len(stock_list)
    if stock_number < 1:
        print('Stock Number:', stock_number)
        raise SystemExit

    threshold_number = len(threshold_list)
    if threshold_number < 1:
        print('Threshold Number:', threshold_number)
        raise SystemExit

    for stock_id in stock_list:
        # Load Results from Different Threshold
        file_postfix = 'PriceFollow_%s_All' % u.stockFileName(
            stock_id, is_index)
        df = u.read_csv(c.path_dict['strategy'] +
                        c.file_dict['strategy'] % file_postfix)
        row_number = len(df)
        for i in range(1, threshold_number - 1):
            t_prev = threshold_list[i - 1]
            t_curr = threshold_list[i]
            t_next = threshold_list[i + 1]
            t_total = t_prev + t_curr + t_next
            column_postfix = '_%s' % t_curr
            # Slice Interested Columns
            select_columns = []
            for t in [t_prev, t_curr, t_next]:
                select_columns.append('predict' + '_%s' % t)
            select_columns.append('confirm' + column_postfix)
            df2 = pd.DataFrame.copy(df.loc[:, select_columns])
            # Calculate Weighted Predict
            df2['weighted_predict' + column_postfix] = np.nan
            for j in range(1, row_number):
                # Method 1: Threshold Weighted Predict
                if method == 'Threshold Weighted':
                    predict = 0.0
                    for t in [t_prev, t_curr, t_next]:
                        predict = predict + t * df2.ix[j,
                                                       'predict' + '_%s' % t]
                    predict = predict / t_total
                    df2.ix[j, 'weighted_predict' + column_postfix] = predict
                # Method 2: Equally Weighted Predict
                elif method == 'Equally Weighted':
                    predict = 0.0
                    for t in [t_prev, t_curr, t_next]:
                        predict = predict + 1.0 * df2.ix[j,
                                                         'predict' + '_%s' % t]
                    predict = predict / 3.0
                    df2.ix[j, 'weighted_predict' + column_postfix] = predict
                # Method 3: Single Predict
                else:
                    predict = df2.ix[j, 'predict' + column_postfix]
                    df2.ix[j, 'weighted_predict' + column_postfix] = predict

            # Optimize for Given Segments within Range [1, -1] to Find Best Cutoff
            print('Optimization Starts for Threshold:', t_curr)
            segments = 10
            delta_mean = []
            delta_stddev = []
            for j in range(1, row_number):
                weighted_predict = df2.ix[j,
                                          'weighted_predict' + column_postfix]
                confirm = df2.ix[j, 'confirm' + column_postfix]
                for k in range(1, segments):
                    ratio = float(k) / float(segments)
                    cutoff = 1.0 * (1.0 - ratio) + (-1.0) * ratio
                    predict_cutoff = 1 if weighted_predict > cutoff else -1
                    delta_cutoff = predict_cutoff - confirm if not np.isnan(
                        confirm) else np.nan
                    df2.ix[j, 'delta' + '_%.2f' % cutoff] = delta_cutoff
            # Save to CSV File
            file_postfix = 'PriceFollow_%s_Cutoff_%s' % (u.stockFileName(
                stock_id, is_index), t_curr)
            u.to_csv(df2, c.path_dict['strategy'],
                     c.file_dict['strategy'] % file_postfix)
            # Gather Mean of Delta_Cutoffs
            for k in range(1, segments):
                ratio = float(k) / float(segments)
                cutoff = 1.0 * (1.0 - ratio) + (-1.0) * ratio
                describe = df2['delta' + '_%.2f' % cutoff].describe()
                delta_mean.append(np.abs(describe['mean']))
                delta_stddev.append(describe['std'])
            # Find Best Cutoff
            delta_mean_min = min(delta_mean)
            delta_mean_index = delta_mean.index(delta_mean_min)
            print('Delta Mean:', delta_mean)
            print('Delta Stddev:', delta_stddev)
            print('Delta Mean Min:', delta_mean_min)
            print('Delta Mean Index:', delta_mean_index)
            ratio = float(delta_mean_index + 1) / float(segments)
            best_cutoff = 1.0 * (1.0 - ratio) + (-1.0) * ratio
            print('Best Cutoff:', best_cutoff)
            print('Optimization Ends for Threshold:', t_curr)

예제 #28

0

파일 보기

def loadConceptSina():
    concept = u.read_csv(c.fullpath_dict['conc_sina'])
    return concept

예제 #29

0

파일 보기

def loadQFQ(stock_id, period):
    path = c.path_dict['qfq'] % period
    file = c.file_dict['qfq'] % (period, stock_id)

    df = u.read_csv(path + file)
    return df

예제 #30

0

파일 보기

def loadTerminated():
    terminated = u.read_csv(c.fullpath_dict['terminated'])
    return terminated