Esempio n. 1
0
def create_factor_analysis(ds, **kwargs):
    ref_date = kwargs['next_execution_date']
    if not isBizDay('china.sse', ref_date):
        logger.info("{0} is not a business day".format(ref_date))
        return

    ref_date = advanceDateByCalendar('china.sse', ref_date, '-2b').strftime('%Y-%m-%d')

    factor_name = kwargs['factor_name']
    logger.info("updating '{0}' on {1}".format(factor_name, ref_date))

    # small universe, risk_neutral
    return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=True, risk_neutral=True)
    if return_table is not None:
        upload(ref_date, return_table, destination, 'performance', factor_name, 'risk_neutral')

    # small universe, top_100
    return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=True, risk_neutral=False)
    if return_table is not None:
        upload(ref_date, return_table, destination, 'performance', factor_name, 'top_100')

    # small universe, risk_neutral
    return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=False, risk_neutral=True)
    if return_table is not None:
        upload(ref_date, return_table, destination, 'performance_big_universe', factor_name, 'risk_neutral')

    # small universe, top_100
    return_table = common_500_analysis(factor_name, ref_date, use_only_index_components=False, risk_neutral=False)
    if return_table is not None:
        upload(ref_date, return_table, destination, 'performance_big_universe', factor_name, 'top_100')
Esempio n. 2
0
def exchange_suspend_info(ref_date, force_update=False):
    start_date = ref_date

    if not force_update:
        start_date = (find_latest_date() + dt.timedelta(days=1))

    end_date = ref_date
    date_range = pd.date_range(start_date, end_date)

    datas = []
    for date in date_range:
        if isBizDay('china.sse', date):
            datas.append(suspend_info(date.strftime('%Y-%m-%d')))
            spyder_logger.info('Scraping finished for date {0}'.format(date))

    if not datas:
        spyder_logger.info('No data is available for {0}'.format(ref_date))
        return
        
    total_table = pd.concat(datas)
    total_table.drop_duplicates(['停(复)牌时间', '证券代码'], inplace=True)

    if not total_table.empty:
        insert_table(total_table,
                     ['effectiveDate',
                      'instrumentID',
                      'instrumentName',
                      'status',
                      'reason',
                      'stopTime'],
                     'suspend_info',
                     exchange_db_settings)
def update_return_data_300(ds, **kwargs):
    ref_date = kwargs['next_execution_date']

    if not isBizDay('china.sse', ref_date):
        logger.info("{0} is not a business day".format(ref_date))
        return 0

    start_date = advanceDateByCalendar('china.sse', ref_date, '-30b')

    for date in bizDatesList('china.sse', start_date, ref_date):

        date = date.strftime('%Y-%m-%d')

        conn1 = create_ms_engine('PortfolioManagements300')
        df = fetch_date('StockReturns', date, conn1)

        conn2 = create_my_engine()

        delete_data('return_300', date, conn2)
        insert_data('return_300', df, conn2)

        conn3 = create_my_engine2()
        delete_data('return_300', date, conn3)
        insert_data('return_300', df, conn3)
    return 0
Esempio n. 4
0
def check_holiday(this_date):
    flag = isBizDay('china.sse', this_date)

    if not flag:
        alpha_logger.info(
            'Job will be omitted as {0} is a holiday'.format(this_date))

    return flag
Esempio n. 5
0
def get_nffund_idx_etf_component(date, index):
    date = dt.datetime.strptime(date, '%Y%m%d')
    if not isBizDay('China.SSE', date):
        date = advanceDateByCalendar('China.SSE', date, '-1b')
    pre_trading_date = advanceDateByCalendar('China.SSE', date,
                                             '-1b').strftime('%Y%m%d')
    if index == 'zz500':
        date = date.strftime('%Y%m%d')
        url = "http://www.nffund.com/etf/bulletin/ETF500/510500{date}.txt".format(
            date=date)
        html_text = requests.get(url)._content.decode('gbk').split(
            'TAGTAG\r')[1]
        res = []
        col_name = ['Code', 'drop', 'Volume', 'drop', 'drop', 'drop', 'drop']
        for line in html_text.split('\r'):
            res.append(line.replace(' ', '').replace('\n', '').split('|'))
        res = pd.DataFrame(res, columns=col_name)
        res = res.drop('drop', axis=1).iloc[:500]
    elif index == 'hs300':
        url = "http://www.huatai-pb.com/etf-web/etf/index?fundcode=510300&beginDate={date}".format(
            date=date.strftime('%Y-%m-%d'))
        html_text = requests.get(url)._content.decode('utf8')
        soup = BeautifulSoup(html_text, "lxml")
        res = []
        for item in soup.find_all('tr', {'align': 'center'})[1:]:
            sub_item = item.find_all('td')
            res.append([sub_item[0].text, sub_item[2].text])
        col_name = ['Code', 'Volume']
        res = pd.DataFrame(res, columns=col_name)
    elif index == 'sz50':
        url = "http://fund.chinaamc.com/product/fundShengoushuhuiqingdan.do"
        html_text = requests.post(url, data={'querryDate': date.strftime('%Y-%m-%d'), 'fundcode': '510050'})\
                    ._content.decode('utf8')
        soup = BeautifulSoup(html_text, "lxml")
        res = []
        for item in soup.find_all('tr', '')[17:]:
            sub_item = item.find_all('td')
            res.append([sub_item[0].text, sub_item[2].text])
        col_name = ['Code', 'Volume']
        res = pd.DataFrame(res, columns=col_name)
    else:
        raise KeyError('Do not have source for index %s yet...' % index)

    # convert string code to int code
    res['Code'] = res['Code'].apply(int)

    # fetch eod close price
    engine = create_engine(
        'mssql+pymssql://sa:[email protected]/MultiFactor?charset=utf8')
    sql = 'select [Code], [Close] as PreClose from TradingInfo1 where Date = %s' % pre_trading_date
    close_data = pd.read_sql(sql, engine)
    res = res.merge(close_data, on='Code', how='left')

    # calculate weight
    res['weight'] = res['PreClose'].apply(float) * res['Volume'].apply(float)
    res['weight'] = res['weight'] / res['weight'].sum()
    res = res[['Code', 'weight']]
    return res
Esempio n. 6
0
def update_factor_performance_big_universe_top_100(ds, **kwargs):
    ref_date = kwargs['next_execution_date']
    if not isBizDay('china.sse', ref_date):
        logger.info("{0} is not a business day".format(ref_date))
        return 0

    ref_date = advanceDateByCalendar('china.sse', ref_date, '-2b')
    ref_date = ref_date.strftime('%Y-%m-%d')
    previous_date = advanceDateByCalendar('china.sse', ref_date, '-1b')

    this_day_pos, total_data = create_ond_day_pos(ref_date,
                                                  source_db,
                                                  big_universe=True,
                                                  risk_neutral=False)
    last_day_pos, _ = create_ond_day_pos(previous_date,
                                         source_db,
                                         big_universe=True,
                                         risk_neutral=False)

    return_table = settlement(ref_date,
                              this_day_pos,
                              total_data['bm'].values,
                              total_data['D1LogReturn'].values,
                              type='top_100')

    pos_diff_dict = {}

    for name in this_day_pos.columns.difference(['industry']):
        for ind in this_day_pos.industry.unique():
            pos_series = this_day_pos.loc[this_day_pos.industry == ind, name]
            if name in last_day_pos:
                last_series = last_day_pos.loc[last_day_pos.industry == ind,
                                               name]
                pos_diff = pos_series.sub(last_series, fill_value=0)
            else:
                pos_diff = pos_series
            pos_diff_dict[(name, ind)] = pos_diff.abs().sum()

        pos_series = this_day_pos[name]
        if name in last_day_pos:
            last_series = last_day_pos[name]
            pos_diff = pos_series.sub(last_series, fill_value=0)
        else:
            pos_diff = pos_series
        pos_diff_dict[(name, 'total')] = pos_diff.abs().sum()

    pos_diff_series = pd.Series(pos_diff_dict, name='turn_over')
    pos_diff_series.index.names = ['portfolio', 'industry']
    pos_diff_series = pos_diff_series.reset_index()

    return_table = pd.merge(return_table,
                            pos_diff_series,
                            on=['portfolio', 'industry'])
    return_table['source'] = 'tiny'
    return_table['universe'] = 'zz500_expand'
    upload(ref_date, return_table, destination_db, 'performance')
def update_risk_factor_300(ds, **kwargs):
    ref_date = kwargs['next_execution_date']

    if not isBizDay('china.sse', ref_date):
        logger.info("{0} is not a business day".format(ref_date))
        return 0

    ref_date = ref_date.strftime('%Y-%m-%d')

    conn1 = create_ms_engine('PortfolioManagements300')
    df = fetch_date('RiskFactor', ref_date, conn1)

    conn2 = create_my_engine()
    delete_data('risk_factor_300', ref_date, conn2)
    insert_data('risk_factor_300', df, conn2)

    conn3 = create_my_engine2()
    delete_data('risk_factor_300', ref_date, conn3)
    insert_data('risk_factor_300', df, conn3)
    return 0
def update_portfolio_long_top(ds, **kwargs):
    ref_date = kwargs['next_execution_date']

    if not isBizDay('china.sse', ref_date):
        logger.info("{0} is not a business day".format(ref_date))
        return 0

    ref_date = ref_date.strftime('%Y-%m-%d')

    conn1 = create_ms_engine('FactorPerformance')
    df = fetch_date('Portfolio_LongTop_500', ref_date, conn1)

    conn2 = create_my_engine()

    delete_data('portfolio_longtop', ref_date, conn2)
    insert_data('portfolio_longtop', df, conn2)

    conn3 = create_my_engine2()
    delete_data('portfolio_longtop', ref_date, conn3)
    insert_data('portfolio_longtop', df, conn3)
    return 0
def update_trade_data(ds, **kwargs):
    ref_date = kwargs['next_execution_date']

    if not isBizDay('china.sse', ref_date):
        logger.info("{0} is not a business day".format(ref_date))
        return 0

    ref_date = ref_date.strftime('%Y-%m-%d')

    conn1 = create_ms_engine('MultiFactor')
    df = fetch_date('TradingInfo1', ref_date, conn1)

    conn2 = create_my_engine()

    delete_data('trade_data', ref_date, conn2)
    insert_data('trade_data', df, conn2)

    conn3 = create_my_engine2()
    delete_data('trade_data', ref_date, conn3)
    insert_data('trade_data', df, conn3)
    return 0
def update_factor_indicator(ds, **kwargs):
    ref_date = kwargs['next_execution_date']

    if not isBizDay('china.sse', ref_date):
        logger.info("{0} is not a business day".format(ref_date))
        return 0

    ref_date = advanceDateByCalendar('china.sse', ref_date, '-2b')
    ref_date = ref_date.strftime('%Y-%m-%d')

    conn1 = create_ms_engine('FactorPerformance')
    df = fetch_date('FactorIndicator_500', ref_date, conn1)

    conn2 = create_my_engine()

    delete_data('factor_indicator', ref_date, conn2)
    insert_data('factor_indicator', df, conn2)

    conn3 = create_my_engine2()
    delete_data('factor_indicator', ref_date, conn3)
    insert_data('factor_indicator', df, conn3)
    return 0
def update_daily_portfolio(ds, **kwargs):
    execution_date = kwargs['next_execution_date']

    if not isBizDay('china.sse', execution_date):
        logger.info("{0} is not a business day".format(execution_date))
        return 0

    prev_date = advanceDateByCalendar('china.sse', execution_date, '-1b')

    logger.info("factor data is loading for {0}".format(prev_date))
    logger.info("Current running date is {0}".format(execution_date))

    common_factors = ['EPSAfterNonRecurring', 'DivP']
    prod_factors = ['CFinc1', 'BDTO', 'RVOL']
    uqer_factors = ['CoppockCurve', 'EPS']

    factor_weights = np.array([-1.0, 2.0])
    factor_weights = factor_weights / factor_weights.sum()

    engine = sqlalchemy.create_engine('mysql+mysqldb://sa:[email protected]/multifactor?charset=utf8')
    engine2 = sqlalchemy.create_engine(
        'mysql+pymysql://sa:[email protected]:3306/multifactor?charset=utf8')

    common_factors_df = pd.read_sql("select Code, 申万一级行业, {0} from factor_data where Date = '{1}'"
                                    .format(','.join(common_factors), prev_date), engine)

    prod_factors_df = pd.read_sql("select Code, {0} from prod_500 where Date = '{1}'"
                                  .format(','.join(prod_factors), prev_date), engine)

    uqer_factor_df = pd.read_sql(
        "select Code, {0} from factor_uqer where Date = '{1}'".format(','.join(uqer_factors), prev_date), engine2)

    risk_factor_df = pd.read_sql("select Code, {0} from risk_factor_500 where Date = '{1}'"
                                 .format(','.join(risk_factors_500), prev_date), engine)

    index_components_df = get_etf_index_weight.get_nffund_idx_etf_component(prev_date.strftime('%Y%m%d'), index='zz500')
    index_industry_weights = get_etf_index_weight.get_sw_industry_weight(index_components_df)
    index_components_df.rename(columns={'weight': 'benchmark'}, inplace=True)

    total_data = pd.merge(common_factors_df, uqer_factor_df, on=['Code'])
    total_data = pd.merge(total_data, risk_factor_df, on=['Code'])
    total_data = pd.merge(total_data, index_components_df, on=['Code'])
    total_data = total_data[total_data['benchmark'] != 0]

    null_flags = np.any(np.isnan(total_data[uqer_factors]), axis=1)
    total_data.fillna(0, inplace=True)

    total_factors = uqer_factors
    risk_factors_names = risk_factors_500 + ['Market']
    total_data['Market'] = 1.

    all_factors = total_data[total_factors]
    risk_factors = total_data[risk_factors_names]

    factor_processed = neutralize(risk_factors.values,
                                  standardize(winsorize_normal(all_factors.values)))

    normed_factor = pd.DataFrame(factor_processed, columns=total_factors, index=[prev_date] * len(factor_processed))

    er = normed_factor @ factor_weights

    # portfolio construction

    bm = total_data['benchmark'].values
    lbound = np.zeros(len(total_data))
    ubound = 0.01 + bm
    risk_exposure = total_data[risk_factors_names].values

    ubound[null_flags] = 0.

    if len(bm) != 500:

        total_weight = index_industry_weights['weight'].sum()
        filtered = index_industry_weights[index_industry_weights.industry.isin(risk_factors_500)]

        ind_weights = filtered['weight'].values

        risk_lbound = np.concatenate([ind_weights / total_weight,
                                      [bm @ total_data['Size'].values / total_weight],
                                      [1.]], axis=0)
        risk_ubound = np.concatenate([ind_weights / total_weight,
                                      [bm @ total_data['Size'].values / total_weight],
                                      [1.]], axis=0)
    else:
        risk_lbound = bm @ risk_exposure
        risk_ubound = bm @ risk_exposure

    # get black list 1
    engine = sqlalchemy.create_engine('mssql+pymssql://sa:[email protected]/WindDB')
    black_list = pd.read_sql("select S_INFO_WINDCODE, S_INFO_LISTDATE, sum(S_SHARE_RATIO) as s_ratio from ASHARECOMPRESTRICTED \
                              where S_INFO_LISTDATE BETWEEN '{0}' and '{1}' " \
                             "GROUP BY S_INFO_WINDCODE, S_INFO_LISTDATE ORDER BY s_ratio DESC;"
                             .format((execution_date - dt.timedelta(days=7)).strftime('%Y%m%d'),
                                     (execution_date + dt.timedelta(days=14)).strftime('%Y%m%d')), engine)

    black_list = black_list[black_list['s_ratio'] >= 3.]
    black_list.S_INFO_WINDCODE = black_list.S_INFO_WINDCODE.str.split('.').apply(lambda x: int(x[0]))

    mask_array = total_data.Code.isin(black_list.S_INFO_WINDCODE)
    ubound[mask_array.values] = 0.

    # get black list 2
    black_list2 = pd.read_sql("select S_INFO_WINDCODE, AVG(S_WQ_AMOUNT) as avg_amount from ASHAREWEEKLYYIELD "
                              "where TRADE_DT < {1} and TRADE_DT >= {0} GROUP BY S_INFO_WINDCODE;"
                              .format((execution_date - dt.timedelta(days=30)).strftime('%Y%m%d'),
                                      execution_date.strftime('%Y%m%d')), engine)
    black_list2 = black_list2[black_list2['avg_amount'] <= 15000.]
    black_list2.S_INFO_WINDCODE = black_list2.S_INFO_WINDCODE.str.split('.').apply(lambda x: int(x[0]))

    mask_array2 = total_data.Code.isin(black_list2.S_INFO_WINDCODE)
    ubound[mask_array2.values] = 0.

    # get black list 3
    black_list3 = pd.read_sql("SELECT S_INFO_WINDCODE, S_DQ_SUSPENDDATE FROM ASHARETRADINGSUSPENSION AS a "
                              "WHERE a.S_DQ_SUSPENDDATE = (SELECT top 1 S_DQ_SUSPENDDATE FROM ASHARETRADINGSUSPENSION AS b "
                              "WHERE a.S_INFO_WINDCODE=b.S_INFO_WINDCODE and cast(floor(cast(b.OPDATE as float)) as datetime) <= '{0}' ORDER BY b.S_DQ_SUSPENDDATE DESC) "
                              "AND a.S_INFO_WINDCODE IN (SELECT S_INFO_WINDCODE FROM ASHAREDESCRIPTION AS c "
                              "WHERE c.S_INFO_DELISTDATE IS NULL) AND (a.S_DQ_SUSPENDDATE>='{1}' OR (a.S_DQ_RESUMPDATE IS NULL AND a.S_DQ_SUSPENDTYPE=444003000))"
                              .format(execution_date, execution_date.strftime('%Y%m%d')),
                              engine)
    black_list3.S_INFO_WINDCODE = black_list3.S_INFO_WINDCODE.str.split('.').apply(lambda x: int(x[0]))
    mask_array3 = total_data.Code.isin(black_list3.S_INFO_WINDCODE)
    ubound[mask_array3.values] = 0.

    # manual black list
    try:
        bk_list = pd.read_csv('~/mnt/sharespace/personal/licheng/portfolio/zz500_black_list/{0}.csv'.format(
            prev_date.strftime('%Y-%m-%d')),
                              encoding='gbk',
                              names=['code'])
        logger.info('Manual black list exists for the date: {0}'.format(prev_date.strftime('%Y-%m-%d')))
        for code in bk_list['code']:
            ubound[total_data.Code == int(code)] = 0.
    except FileNotFoundError:
        logger.info('No manual black list exists for the date: {0}'.format(prev_date.strftime('%Y-%m-%d')))

    weights = build_portfolio(er,
                              builder='linear',
                              risk_exposure=risk_exposure,
                              lbound=lbound,
                              ubound=ubound,
                              risk_target=(risk_lbound, risk_ubound),
                              solver='GLPK')

    portfolio = pd.DataFrame({'weight': weights,
                              'industry': total_data['申万一级行业'].values,
                              'zz500': total_data['benchmark'].values,
                              'er': er}, index=total_data.Code)

    client = pymongo.MongoClient('mongodb://10.63.6.176:27017')
    db = client.multifactor
    portfolio_collection = db.portfolio

    detail_info = {}
    for code, w, bm_w, ind, r in zip(total_data.Code.values, weights, total_data['benchmark'].values,
                                     total_data['申万一级行业'].values, er):
        detail_info[str(code)] = {
            'weight': w,
            'industry': ind,
            'zz500': bm_w,
            'er': r
        }

    portfolio_dict = {'Date': prev_date,
                      'portfolio': detail_info}

    portfolio_collection.delete_many({'Date': prev_date})
    portfolio_collection.insert_one(portfolio_dict)

    portfolio.to_csv('~/mnt/sharespace/personal/licheng/portfolio/zz500/{0}.csv'.format(prev_date.strftime('%Y-%m-%d')),
                     encoding='gbk')

    return 0