Beispiel #1
0
class RemoteDataService(DataService):
    """
    RemoteDataService is a concrete class using data from remote server's database.

    """
    __metaclass__ = Singleton

    # TODO no validity check for input parameters

    def __init__(self):
        DataService.__init__(self)

        self.data_api = None

        self.REPORT_DATE_FIELD_NAME = 'report_date'
        # self.calendar = None

    def __del__(self):
        self.data_api.close()

    def init_from_config(self, props):
        # do not initialize and login again
        if self.data_api is not None and self.data_api._loggined:
            return

        if props is None:
            props = dict()

        if self.data_api is not None:
            if len(props) == 0:
                return
            else:
                self.data_api.close()

        def get_from_list_of_dict(l, key, default=None):
            res = None
            for dic in l:
                res = dic.get(key, None)
                if res is not None:
                    break
            if res is None:
                res = default
            return res

        props_default = dict(
        )  # jutil.read_json(jutil.join_relative_path('etc/data_config.json'))
        dic_list = [props, props_default]

        address = get_from_list_of_dict(dic_list, "remote.data.address", "")
        username = get_from_list_of_dict(dic_list, "remote.data.username", "")
        password = get_from_list_of_dict(dic_list, "remote.data.password", "")
        if address is None or username is None or password is None:
            raise ValueError("no address, username or password available!")
        time_out = get_from_list_of_dict(dic_list, "timeout", 60)

        self.data_api = DataApi(address, use_jrpc=False)
        self.data_api.set_timeout(timeout=time_out)
        print("\nDataApi login: {}@{}".format(username, address))
        r, msg = self.data_api.login(username=username, password=password)
        if not r:
            print("    login failed: msg = '{}'\n".format(msg))
        else:
            print "    login success \n"

        # self.calendar = Calendar(self)

    # -----------------------------------------------------------------------------------
    # Basic APIs
    def daily(self, symbol, start_date, end_date, fields="", adjust_mode=None):
        df, err_msg = self.data_api.daily(symbol=symbol,
                                          start_date=start_date,
                                          end_date=end_date,
                                          fields=fields,
                                          adjust_mode=adjust_mode,
                                          data_format="")
        # trade_status performance warning
        # TODO there will be duplicate entries when on stocks' IPO day
        df = df.drop_duplicates()
        return df, err_msg

    def bar(self,
            symbol,
            start_time=200000,
            end_time=160000,
            trade_date=None,
            freq='1M',
            fields=""):
        df, msg = self.data_api.bar(symbol=symbol,
                                    fields=fields,
                                    start_time=start_time,
                                    end_time=end_time,
                                    trade_date=trade_date,
                                    freq='1M',
                                    data_format="")
        return df, msg

    def query(self, view, filter="", fields="", **kwargs):
        """
        Get various reference data.
        
        Parameters
        ----------
        view : str
            data source.
        fields : str
            Separated by ','
        filter : str
            filter expressions.
        kwargs

        Returns
        -------
        df : pd.DataFrame
        msg : str
            error code and error message, joined by ','
        
        Examples
        --------
        res3, msg3 = ds.query("lb.secDailyIndicator", fields="price_level,high_52w_adj,low_52w_adj",\
                              filter="start_date=20170907&end_date=20170907",\
                              orderby="trade_date",\
                              data_format='pandas')
            view does not change. fileds can be any field predefined in reference data api.

        """
        df, msg = self.data_api.query(view,
                                      fields=fields,
                                      filter=filter,
                                      data_format="",
                                      **kwargs)
        return df, msg

    # -----------------------------------------------------------------------------------
    # Convenient Functions
    def get_trade_date_range_OLD(self, start_date, end_date):
        return self.calendar.get_trade_date_range(start_date, end_date)

    @staticmethod
    def _dic2url(d):
        """
        Convert a dict to str like 'k1=v1&k2=v2'
        
        Parameters
        ----------
        d : dict

        Returns
        -------
        str

        """
        l = ['='.join([key, str(value)]) for key, value in d.viewitems()]
        return '&'.join(l)

    def query_lb_fin_stat(self,
                          type_,
                          symbol,
                          start_date,
                          end_date,
                          fields="",
                          drop_dup_cols=None):
        """
        Helper function to call data_api.query with 'lb.income' more conveniently.
        
        Parameters
        ----------
        type_ : {'income', 'balance_sheet', 'cash_flow'}
        symbol : str
            separated by ','
        start_date : int
            Annoucement date in results will be no earlier than start_date
        end_date : int
            Annoucement date in results will be no later than start_date
        fields : str, optional
            separated by ',', default ""
        drop_dup_cols : list or tuple
            Whether drop duplicate entries according to drop_dup_cols.

        Returns
        -------
        df : pd.DataFrame
            index date, columns fields
        msg : str

        """
        view_map = {
            'income': 'lb.income',
            'cash_flow': 'lb.cashFlow',
            'balance_sheet': 'lb.balanceSheet',
            'fin_indicator': 'lb.finIndicator'
        }
        view_name = view_map.get(type_, None)
        if view_name is None:
            raise NotImplementedError("type_ = {:s}".format(type_))

        dic_argument = {
            'symbol': symbol,
            'start_date': start_date,
            'end_date': end_date,
            # 'update_flag': '0'
        }
        if view_name != 'lb.finIndicator':
            dic_argument.update({
                'report_type': '408001000'
            })  # we do not use single quarter single there are zeros
            """
            408001000: joint
            408002000: joint (single quarter)
            """

        filter_argument = self._dic2url(
            dic_argument)  # 0 means first time, not update

        res, msg = self.query(view_name,
                              fields=fields,
                              filter=filter_argument,
                              order_by=self.REPORT_DATE_FIELD_NAME)

        # change data type
        try:
            cols = list(
                set.intersection({'ann_date', 'report_date'},
                                 set(res.columns)))
            dic_dtype = {col: int for col in cols}
            res = res.astype(dtype=dic_dtype)
        except:
            pass

        if drop_dup_cols is not None:
            res = res.sort_values(by=drop_dup_cols, axis=0)
            res = res.drop_duplicates(subset=drop_dup_cols, keep='first')

        return res, msg

    def query_lb_dailyindicator(self, symbol, start_date, end_date, fields=""):
        """
        Helper function to call data_api.query with 'lb.secDailyIndicator' more conveniently.
        
        Parameters
        ----------
        symbol : str
            separated by ','
        start_date : int
        end_date : int
        fields : str, optional
            separated by ',', default ""

        Returns
        -------
        df : pd.DataFrame
            index date, columns fields
        msg : str
        
        """
        filter_argument = self._dic2url({
            'symbol': symbol,
            'start_date': start_date,
            'end_date': end_date
        })

        return self.query("lb.secDailyIndicator",
                          fields=fields,
                          filter=filter_argument,
                          orderby="trade_date")

    def get_index_weights(self, index, trade_date):
        """
        Return all securities that have been in index during start_date and end_date.
        
        Parameters
        ----------
        index : str
            separated by ','
        trade_date : int

        Returns
        -------
        pd.DataFrame

        """
        if index == '000300.SH':
            index = '399300.SZ'

        filter_argument = self._dic2url({
            'index_code': index,
            'trade_date': trade_date
        })

        df_io, msg = self.query("lb.indexWeight",
                                fields="",
                                filter=filter_argument)
        if msg != '0,':
            print msg
        df_io = df_io.set_index('symbol')
        df_io = df_io.astype({'weight': float, 'trade_date': int})
        df_io.loc[:, 'weight'] = df_io['weight'] / 100.
        return df_io

    def get_index_weights_daily(self, index, start_date, end_date):
        """
        Return all securities that have been in index during start_date and end_date.
        
        Parameters
        ----------
        index : str
        start_date : int
        end_date : int

        Returns
        -------
        res : pd.DataFrame
            Index is trade_date, columns are symbols.

        """
        # TODO: temparary api
        trade_dates = self.get_trade_date_range(start_date, end_date)
        start_date, end_date = trade_dates[0], trade_dates[-1]
        td = start_date

        dic = dict()
        symbols_set = set()
        while True:
            if td > end_date:
                break
            df = self.get_index_weights(index, td)
            # update_date = df['trade_date'].iat[0]
            # if update_date >= start_date and update_date <= end_date:
            symbols_set.update(set(df.index))
            dic[td] = df['weight']

            td = jutil.get_next_period_day(td, 'month', 1)
        merge = pd.concat(dic, axis=1).T
        merge = merge.fillna(0.0)  # for those which are not components
        res = pd.DataFrame(index=trade_dates,
                           columns=sorted(list(symbols_set)),
                           data=np.nan)
        res.update(merge)
        res = res.fillna(method='ffill')
        res = res.loc[start_date:end_date]
        return res

    def _get_index_comp(self, index, start_date, end_date):
        """
        Return all securities that have been in index during start_date and end_date.
        
        Parameters
        ----------
        index : str
            separated by ','
        start_date : int
        end_date : int

        Returns
        -------
        list

        """
        filter_argument = self._dic2url({
            'index_code': index,
            'start_date': start_date,
            'end_date': end_date
        })

        df_io, msg = self.query("lb.indexCons",
                                fields="",
                                filter=filter_argument,
                                orderby="symbol")
        return df_io, msg

    def get_index_comp(self, index, start_date, end_date):
        """
        Return list of symbols that have been in index during start_date and end_date.
        
        Parameters
        ----------
        index : str
            separated by ','
        start_date : int
        end_date : int

        Returns
        -------
        list

        """
        df_io, msg = self._get_index_comp(index, start_date, end_date)
        if msg != '0,':
            print msg
        return list(np.unique(df_io.loc[:, 'symbol']))

    def get_index_comp_df(self, index, start_date, end_date):
        """
        Get index components on each day during start_date and end_date.
        
        Parameters
        ----------
        index : str
            separated by ','
        start_date : int
        end_date : int

        Returns
        -------
        res : pd.DataFrame
            index dates, columns all securities that have ever been components,
            values are 0 (not in) or 1 (in)

        """
        df_io, msg = self._get_index_comp(index, start_date, end_date)
        if msg != '0,':
            print msg

        def str2int(s):
            if isinstance(s, (str, unicode)):
                return int(s) if s else 99999999
            elif isinstance(s, (int, np.integer, float, np.float)):
                return s
            else:
                raise NotImplementedError("type s = {}".format(type(s)))

        df_io.loc[:, 'in_date'] = df_io.loc[:, 'in_date'].apply(str2int)
        df_io.loc[:, 'out_date'] = df_io.loc[:, 'out_date'].apply(str2int)

        # df_io.set_index('symbol', inplace=True)
        dates = self.get_trade_date_range(start_date=start_date,
                                          end_date=end_date)

        dic = dict()
        gp = df_io.groupby(by='symbol')
        for sec, df in gp:
            mask = np.zeros_like(dates, dtype=int)
            for idx, row in df.iterrows():
                bool_index = np.logical_and(dates > row['in_date'],
                                            dates < row['out_date'])
                mask[bool_index] = 1
            dic[sec] = mask

        res = pd.DataFrame(index=dates, data=dic)

        return res

    def get_industry_daily(self,
                           symbol,
                           start_date,
                           end_date,
                           type_='SW',
                           level=1):
        """
        Get index components on each day during start_date and end_date.
        
        Parameters
        ----------
        symbol : str
            separated by ','
        start_date : int
        end_date : int
        type_ : {'SW', 'ZZ'}

        Returns
        -------
        res : pd.DataFrame
            index dates, columns symbols
            values are industry code

        """
        df_raw = self.get_industry_raw(symbol, type_=type_, level=level)

        dic_sec = jutil.group_df_to_dict(df_raw, by='symbol')
        dic_sec = {
            sec: df.sort_values(by='in_date', axis=0).reset_index()
            for sec, df in dic_sec.viewitems()
        }

        df_ann_tmp = pd.concat(
            {sec: df.loc[:, 'in_date']
             for sec, df in dic_sec.viewitems()},
            axis=1)
        df_value_tmp = pd.concat(
            {
                sec: df.loc[:, 'industry{:d}_code'.format(level)]
                for sec, df in dic_sec.viewitems()
            },
            axis=1)

        idx = np.unique(
            np.concatenate([df.index.values for df in dic_sec.values()]))
        symbol_arr = np.sort(symbol.split(','))
        df_ann = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan)
        df_ann.loc[df_ann_tmp.index, df_ann_tmp.columns] = df_ann_tmp
        df_value = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan)
        df_value.loc[df_value_tmp.index, df_value_tmp.columns] = df_value_tmp

        dates_arr = self.get_trade_date_range(start_date, end_date)
        df_industry = align.align(df_value, df_ann, dates_arr)

        # TODO before industry classification is available, we assume they belong to their first group.
        df_industry = df_industry.fillna(method='bfill')
        df_industry = df_industry.astype(str)

        return df_industry

    def get_industry_raw(self, symbol, type_='ZZ', level=1):
        """
        Get daily industry of securities from ShenWanZhiShu or ZhongZhengZhiShu.
        
        Parameters
        ----------
        symbol : str
            separated by ','
        type_ : {'SW', 'ZZ'}
        level : {1, 2, 3, 4}
            Use which level of industry index classification.

        Returns
        -------
        df : pd.DataFrame

        """
        if type_ == 'SW':
            src = u'申万研究所'.encode('utf-8')
            if level not in [1, 2, 3, 4]:
                raise ValueError("For [SW], level must be one of {1, 2, 3, 4}")
        elif type_ == 'ZZ':
            src = u'中证指数有限公司'.encode('utf-8')
            if level not in [1, 2, 3, 4]:
                raise ValueError("For [ZZ], level must be one of {1, 2}")
        else:
            raise ValueError("type_ must be one of SW of ZZ")

        filter_argument = self._dic2url({
            'symbol': symbol,
            'industry_src': src
        })
        fields_list = [
            'symbol', 'industry{:d}_code'.format(level),
            'industry{:d}_name'.format(level)
        ]

        df_raw, msg = self.query("lb.secIndustry",
                                 fields=','.join(fields_list),
                                 filter=filter_argument,
                                 orderby="symbol")
        if msg != '0,':
            print msg

        df_raw = df_raw.astype(dtype={
            'in_date': int,
            # 'out_date': int
        })
        return df_raw.drop_duplicates()

    def get_adj_factor_daily(self, symbol, start_date, end_date, div=False):
        """
        Get index components on each day during start_date and end_date.
        
        Parameters
        ----------
        symbol : str
            separated by ','
        start_date : int
        end_date : int
        div : bool
            False for normal adjust factor, True for diff.

        Returns
        -------
        res : pd.DataFrame
            index dates, columns symbols
            values are industry code

        """
        df_raw = self.get_adj_factor_raw(symbol,
                                         start_date=start_date,
                                         end_date=end_date)

        dic_sec = jutil.group_df_to_dict(df_raw, by='symbol')
        dic_sec = {
            sec: df.set_index('trade_date').loc[:, 'adjust_factor']
            for sec, df in dic_sec.viewitems()
        }

        # TODO: duplicate codes with dataview.py: line 512
        res = pd.concat(dic_sec, axis=1)  # TODO: fillna ?

        idx = np.unique(
            np.concatenate([df.index.values for df in dic_sec.values()]))
        symbol_arr = np.sort(symbol.split(','))
        res_final = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan)
        res_final.loc[res.index, res.columns] = res

        # align to every trade date
        s, e = df_raw.loc[:,
                          'trade_date'].min(), df_raw.loc[:,
                                                          'trade_date'].max()
        dates_arr = self.get_trade_date_range(s, e)
        if not len(dates_arr) == len(res_final.index):
            res_final = res_final.reindex(dates_arr)

            res_final = res_final.fillna(method='ffill').fillna(method='bfill')

        if div:
            res_final = res_final.div(res_final.shift(1, axis=0)).fillna(1.0)

        # res = res.loc[start_date: end_date, :]

        return res_final

    def get_adj_factor_raw(self, symbol, start_date=None, end_date=None):
        """
        Query adjust factor for symbols.
        
        Parameters
        ----------
        symbol : str
            separated by ','
        start_date : int
        end_date : int

        Returns
        -------
        df : pd.DataFrame

        """
        if start_date is None:
            start_date = ""
        if end_date is None:
            end_date = ""

        filter_argument = self._dic2url({
            'symbol': symbol,
            'start_date': start_date,
            'end_date': end_date
        })
        fields_list = ['symbol', 'trade_date', 'adjust_factor']

        df_raw, msg = self.query("lb.secAdjFactor",
                                 fields=','.join(fields_list),
                                 filter=filter_argument,
                                 orderby="symbol")
        if msg != '0,':
            print msg
        df_raw = df_raw.astype(dtype={
            'symbol': str,
            'trade_date': int,
            'adjust_factor': float
        })
        return df_raw.drop_duplicates()

    def query_inst_info(self, symbol, inst_type="", fields=""):
        if inst_type == "":
            inst_type = "1,2,3,4,5,101,102,103,104"

        filter_argument = self._dic2url({
            'symbol': symbol,
            'inst_type': inst_type
        })

        df_raw, msg = self.query("jz.instrumentInfo",
                                 fields=fields,
                                 filter=filter_argument,
                                 orderby="symbol")
        if msg != '0,':
            print msg

        dtype_map = {
            'symbol': str,
            'list_date': int,
            'delist_date': int,
            'inst_type': int
        }
        cols = set(df_raw.columns)
        dtype_map = {k: v for k, v in dtype_map.viewitems() if k in cols}

        df_raw = df_raw.astype(dtype=dtype_map)

        res = df_raw.set_index('symbol')
        return res

    # -----------------------------------------------------------------------------------
    # subscribe for real time trading
    def subscribe(self, symbols):
        """
        
        Parameters
        ----------
        symbols : str
            Separated by ,

        """
        self.data_api.subscribe(symbols, func=self.mkt_data_callback)

    def mkt_data_callback(self, key, quote):
        e = Event(EVENT_TYPE.MARKET_DATA)
        # print quote
        e.dic = {'quote': quote}
        self.ctx.instance.put(e)

    # ---------------------------------------------------------------------
    # Calendar

    def get_trade_date_range(self, start_date, end_date):
        """
        Get array of trade dates within given range.
        Return zero size array if no trade dates within range.
        
        Parameters
        ----------
        start_date : int
            YYmmdd
        end_date : int

        Returns
        -------
        trade_dates_arr : np.ndarray
            dtype = int

        """
        filter_argument = self._dic2url({
            'start_date': start_date,
            'end_date': end_date
        })

        df_raw, msg = self.data_api.query("jz.secTradeCal",
                                          fields="trade_date",
                                          filter=filter_argument,
                                          orderby="")
        if df_raw.empty:
            return np.array([], dtype=int)

        trade_dates_arr = df_raw['trade_date'].values.astype(int)
        return trade_dates_arr

    def get_last_trade_date(self, date):
        """
        
        Parameters
        ----------
        date : int

        Returns
        -------
        res : int

        """
        dt = jutil.convert_int_to_datetime(date)
        delta = pd.Timedelta(weeks=2)
        dt_old = dt - delta
        date_old = jutil.convert_datetime_to_int(dt_old)

        dates = self.get_trade_date_range(date_old, date)
        mask = dates < date
        res = dates[mask][-1]

        return res

    def is_trade_date(self, date):
        """
        Check whether date is a trade date.

        Parameters
        ----------
        date : int

        Returns
        -------
        bool

        """
        dates = self.get_trade_date_range(date, date)
        return len(dates) > 0

    def get_next_trade_date(self, date):
        """
        
        Parameters
        ----------
        date : int

        Returns
        -------
        res : int

        """
        dt = jutil.convert_int_to_datetime(date)
        delta = pd.Timedelta(weeks=2)
        dt_new = dt + delta
        date_new = jutil.convert_datetime_to_int(dt_new)

        dates = self.get_trade_date_range(date, date_new)
        mask = dates > date
        res = dates[mask][0]

        return res
Beispiel #2
0
    def init_from_config(self, props):
        """
        
        Parameters
        ----------
        props : dict
            Configurations used for initialization.

        Example
        -------
        {"remote.data.address": "tcp://*****:*****@{}".format(username, address))
        INDENT = ' ' * 4

        if self.data_api is None:
            if (address == "") or (username == "") or (password == ""):
                raise InitializeError(
                    "no address, username or password available!")
        elif self.data_api._loggined:
            if ((address == self._address) and (time_out == self._timeout)
                    and (username == self._username)
                    and (password == self._password)):
                print(INDENT +
                      "Already login as {:s}, skip init_from_config".format(
                          username))
                return  # do not login with the same props again
            else:
                self.data_api.close()
                self.data_api = None

        self._address = address
        self._username = username
        self._password = password
        self._timeout = time_out

        data_api = DataApi(self._address, use_jrpc=False)
        data_api.set_timeout(timeout=self._timeout)
        r, err_msg = data_api.login(username=self._username,
                                    password=self._password)
        if not r:
            print(INDENT + "login failed: err_msg = '{}'\n".format(err_msg))
        else:
            self.data_api = data_api
            print(INDENT + "login success \n")
Beispiel #3
0
    def init_from_config(self, props):
        """
        
        Parameters
        ----------
        props : dict
            Configurations used for initialization.

        Example
        -------
        {"remote.data.address": "tcp://*****:*****@{}".format(username, address))
        INDENT = ' ' * 4
        
        if self.data_api_loginned:
            if (address == "") or (username == "") or (password == ""):
                raise InitializeError("no address, username or password available!")
            elif ((address == self._address) and (time_out == self._timeout)
                and (username == self._username) and (password == self._password)):
                print(INDENT + "Already login as {:s}, skip init_from_config".format(username))
                return '0,'  # do not login with the same props again
            else:
                self.data_api.close()
                self.data_api = None

        self._address = address
        self._username = username
        self._password = password
        self._timeout = time_out
        
        data_api = DataApi(self._address, use_jrpc=False)
        data_api.set_timeout(timeout=self._timeout)
        r, err_msg = data_api.login(username=self._username, password=self._password)
        if not r:
            print(INDENT + "login failed: err_msg = '{}'\n".format(err_msg))
        else:
            self.data_api = data_api
            print(INDENT + "login success \n")
        
        return err_msg