def add_factor_to_163(security_item):
        path_163 = get_kdata_path(security_item, source='163', fuquan='bfq')
        df_163 = pd_read_csv(path_163)

        if 'factor' in df_163.columns:
            df = df_163[df_163['factor'].isna()]

            if df.empty:
                logger.info("{} 163 factor is ok", security_item['code'])
                return

        path_sina = get_kdata_path(security_item, source='sina', fuquan='hfq')
        df_sina = pd_read_csv(path_sina)

        df_sina = df_sina[~df_sina.index.duplicated(keep='first')]
        df_163['factor'] = df_sina['factor']
        df_163.to_csv(path_163, index=False)
Beispiel #2
0
def get_event(security_item,
              event_type='finance_forecast',
              start_date=None,
              end_date=None,
              index='timestamp'):
    """
    get forecast items.

    Parameters
    ----------
    security_item : SecurityItem or str
        the security item,id or code

    event_type : str
        {'finance_forecast','finance_report'}

    start_date: Timestamp str or Timestamp
        the start date for the event

    end_date: Timestamp str or Timestamp
        the end date for the event

    Returns
    -------
    DataFrame

    """
    security_item = to_security_item(security_item)
    path = get_event_path(security_item, event_type)

    if os.path.exists(path):
        df = pd_utils.pd_read_csv(path, index=index, generate_id=True)
        df = df_for_date_range(df, start_date=start_date, end_date=end_date)
    else:
        df = pd.DataFrame()

    return df
Beispiel #3
0
def get_kdata(security_item,
              exchange=None,
              the_date=None,
              start_date=None,
              end_date=None,
              fuquan='bfq',
              source=None,
              level='day',
              generate_id=False):
    """
    get kdata.

    Parameters
    ----------
    security_item : SecurityItem or str
        the security item,id or code

    exchange : str
        the exchange,set this for cryptocurrency

    the_date : TimeStamp str or TimeStamp
        get the kdata for the exact date
    start_date : TimeStamp str or TimeStamp
        start date
    end_date : TimeStamp str or TimeStamp
        end date
    fuquan : str
        {"qfq","hfq","bfq"},default:"bfq"
    source : str
        the data source,{'163','sina','exchange'},just used for internal merge
    level : str or int
        the kdata level,{1,5,15,30,60,'day','week','month'},default : 'day'

    Returns
    -------
    DataFrame

    """

    # 由于数字货币的交易所太多,必须指定exchange
    security_item = to_security_item(security_item, exchange)

    source = adjust_source(security_item, source)

    # 163的数据是合并过的,有复权因子,都存在'bfq'目录下,只需从一个地方取数据,并做相应转换
    if source == '163':
        the_path = files_contract.get_kdata_path(security_item,
                                                 source=source,
                                                 fuquan='bfq')
    else:
        the_path = files_contract.get_kdata_path(security_item,
                                                 source=source,
                                                 fuquan=fuquan)

    if os.path.isfile(the_path):
        df = pd_utils.pd_read_csv(the_path, generate_id=generate_id)

        if 'factor' in df.columns and source == '163' and security_item[
                'type'] == 'stock':
            df_kdata_has_factor = df[df['factor'].notna()]
            if df_kdata_has_factor.shape[0] > 0:
                latest_factor = df_kdata_has_factor.tail(1).factor.iat[0]
            else:
                latest_factor = None

        if the_date:
            if the_date in df.index:
                df = df.loc[the_date:the_date, :]
            else:
                return None
        else:
            if start_date or end_date:
                df = df_for_date_range(df,
                                       start_date=start_date,
                                       end_date=end_date)

        # 复权处理
        if source == '163' and security_item['type'] == 'stock':
            if 'factor' in df.columns:
                # 后复权是不变的
                df['hfqClose'] = df.close * df.factor
                df['hfqOpen'] = df.open * df.factor
                df['hfqHigh'] = df.high * df.factor
                df['hfqLow'] = df.low * df.factor

                # 前复权需要根据最新的factor往回算,当前价格不变
                if latest_factor:
                    df['qfqClose'] = df.hfqClose / latest_factor
                    df['qfqOpen'] = df.hfqOpen / latest_factor
                    df['qfqHigh'] = df.hfqHigh / latest_factor
                    df['qfqLow'] = df.hfqLow / latest_factor
                else:
                    logger.exception("missing latest factor for {}".format(
                        security_item['id']))

        return df
    return pd.DataFrame()