def _align_bivariate(self, df1, df2, force_align=False):
     if isinstance(df1, pd.DataFrame) and isinstance(df2, pd.DataFrame):
         len1 = len(df1.index)
         len2 = len(df2.index)
         if (self.ann_dts is not None) and (self.trade_dts is not None):
             if len1 > len2:
                 df2 = align(df2, self.ann_dts, self.trade_dts)
             elif len1 < len2:
                 df1 = align(df1, self.ann_dts, self.trade_dts)
             elif force_align:
                 df1 = align(df1, self.ann_dts, self.trade_dts)
                 df2 = align(df2, self.ann_dts, self.trade_dts)
     return (df1, df2)
Example #2
0
def test_remote_data_service_industry():
    from quantos.data.align import align
    import pandas as pd
    
    ds = RemoteDataService()
    arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505)
    df = ds.get_industry_raw(symbol=','.join(arr), type_='ZZ')
    df = df.astype(dtype={'in_date': int})
    
    # df_ann = df.loc[:, ['in_date', 'symbol']]
    # df_ann = df_ann.set_index(['symbol', 'in_date'])
    # df_ann = df_ann.unstack(level='symbol')
    
    from quantos.data.dataview import DataView
    dic_sec = DataView._group_df_to_dict(df, by='symbol')
    dic_sec = {sec: df.reset_index() for sec, df in dic_sec.viewitems()}
    
    df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1)
    df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1)
    
    dates_arr = ds.get_trade_date(20140101, 20170505)
    res = align(df_value, df_ann, dates_arr)
    print
    # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date')
    # df_value = df.pivot(index=None, columns='symbol', values='industry1_code')
    
    def align_single_df(df_one_sec):
        df_value = df_one_sec.loc[:, ['industry1_code']]
        df_ann = df_one_sec.loc[:, ['in_date']]
        res = align(df_value, df_ann, dates_arr)
        return res
    # res_list = [align_single_df(df) for sec, df in dic_sec.viewitems()]
    res_list = [align_single_df(df) for sec, df in dic_sec.items()[:10]]
    res = pd.concat(res_list, axis=1)
    print res
 def _align_univariate(self, df1):
     if isinstance(df1, pd.DataFrame):
         if (self.ann_dts is not None) and (self.trade_dts is not None):
             len1 = len(df1.index)
             len2 = len(self.trade_dts)
             if len1 != len2:
                 return align(df1, self.ann_dts, self.trade_dts)
     return df1
Example #4
0
    def get_industry_daily(self, symbol, start_date, end_date, type_='SW'):
        """
        Get index components on each day during start_date and end_date.
        
        Parameters
        ----------
        symbol : str
            separated by ','
        start_date : int
        end_date : int
        type_ : {'SW', 'ZZ'}

        Returns
        -------
        res : pd.DataFrame
            index dates, columns symbols
            values are industry code

        """
        df_raw = self.get_industry_raw(symbol, type_=type_)

        dic_sec = self._group_df_to_dict(df_raw, by='symbol')
        dic_sec = {
            sec: df.sort_values(by='in_date', axis=0).reset_index()
            for sec, df in dic_sec.viewitems()
        }

        df_ann = pd.concat([
            df.loc[:, 'in_date'].rename(sec)
            for sec, df in dic_sec.viewitems()
        ],
                           axis=1)
        df_value = pd.concat([
            df.loc[:, 'industry1_code'].rename(sec)
            for sec, df in dic_sec.viewitems()
        ],
                             axis=1)

        dates_arr = self.get_trade_date(start_date, end_date)
        df_industry = align.align(df_value, df_ann, dates_arr)

        # TODO before industry classification is available, we assume they belong to their first group.
        df_industry = df_industry.fillna(method='bfill')
        df_industry = df_industry.astype(str)

        return df_industry
Example #5
0
 def align_single_df(df_one_sec):
     df_value = df_one_sec.loc[:, ['industry1_code']]
     df_ann = df_one_sec.loc[:, ['in_date']]
     res = align(df_value, df_ann, dates_arr)
     return res