Python group_df_to_dictの例、jaqs.util.group_df_to_dict Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_dataservice.py プロジェクト: smartgang/JAQS

def test_remote_data_service_industry():
    from jaqs.data.align import align
    import pandas as pd
    
    arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505)
    df = ds.get_industry_raw(symbol=','.join(arr), type_='ZZ')
    
    # df_ann = df.loc[:, ['in_date', 'symbol']]
    # df_ann = df_ann.set_index(['symbol', 'in_date'])
    # df_ann = df_ann.unstack(level='symbol')
    
    from jaqs.data import DataView
    dic_sec = jutil.group_df_to_dict(df, by='symbol')
    dic_sec = {sec: df.reset_index() for sec, df in dic_sec.items()}
    
    df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.items()], axis=1)
    df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.items()], axis=1)
    
    dates_arr = ds.get_trade_date_range(20140101, 20170505)
    res = align(df_value, df_ann, dates_arr)
    # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date')
    # df_value = df.pivot(index=None, columns='symbol', values='industry1_code')
    
    def align_single_df(df_one_sec):
        df_value = df_one_sec.loc[:, ['industry1_code']]
        df_ann = df_one_sec.loc[:, ['in_date']]
        res = align(df_value, df_ann, dates_arr)
        return res
    # res_list = [align_single_df(df) for sec, df in dic_sec.items()]
    res_list = [align_single_df(df) for df in list(dic_sec.values())[:10]]
    res = pd.concat(res_list, axis=1)

コード例 #2

0

ファイルを表示

    def gen_report(self, source_dir, template_fn, out_folder='.', selected=None):
        """
        Generate HTML (and PDF) report of the trade analysis.

        Parameters
        ----------
        source_dir : str
            path of directory where HTML template and css files are stored.
        template_fn : str
            File name of HTML template.
        out_folder : str
            Output folder of report.
        selected : list of str or None
            List of symbols whose detailed PnL curve and position will be plotted.
            # TODO: this parameter should not belong to function


        """
        dic = dict()
        dic['html_title'] = "Alpha Strategy Backtest Result"
        dic['selected_securities'] = selected
        dic['props'] = self.configs
        dic['metrics'] = self.metrics
        dic['position_change'] = self.position_change
        dic['account'] = self.account
        dic['df_daily'] = jutil.group_df_to_dict(self.daily, by='symbol')
        
        self.report_dic.update(dic)
        
        self.returns.to_csv(os.path.join(out_folder, 'returns.csv'))
    
        r = Report(self.report_dic, source_dir=source_dir, template_fn=template_fn, out_folder=out_folder)
        
        r.generate_html()
        r.output_html('report.html')

コード例 #3

0

ファイルを表示

    def get_industry_daily(self,
                           symbol,
                           start_date,
                           end_date,
                           type_='SW',
                           level=1):
        """
        Get index components on each day during start_date and end_date.
        
        Parameters
        ----------
        symbol : str
            separated by ','
        start_date : int
        end_date : int
        type_ : {'SW', 'ZZ'}

        Returns
        -------
        res : pd.DataFrame
            index dates, columns symbols
            values are industry code

        """
        df_raw = self.get_industry_raw(symbol, type_=type_, level=level)

        dic_sec = jutil.group_df_to_dict(df_raw, by='symbol')
        dic_sec = {
            sec: df.sort_values(by='in_date', axis=0).reset_index()
            for sec, df in dic_sec.viewitems()
        }

        df_ann_tmp = pd.concat(
            {sec: df.loc[:, 'in_date']
             for sec, df in dic_sec.viewitems()},
            axis=1)
        df_value_tmp = pd.concat(
            {
                sec: df.loc[:, 'industry{:d}_code'.format(level)]
                for sec, df in dic_sec.viewitems()
            },
            axis=1)

        idx = np.unique(
            np.concatenate([df.index.values for df in dic_sec.values()]))
        symbol_arr = np.sort(symbol.split(','))
        df_ann = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan)
        df_ann.loc[df_ann_tmp.index, df_ann_tmp.columns] = df_ann_tmp
        df_value = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan)
        df_value.loc[df_value_tmp.index, df_value_tmp.columns] = df_value_tmp

        dates_arr = self.get_trade_date_range(start_date, end_date)
        df_industry = align.align(df_value, df_ann, dates_arr)

        # TODO before industry classification is available, we assume they belong to their first group.
        df_industry = df_industry.fillna(method='bfill')
        df_industry = df_industry.astype(str)

        return df_industry

コード例 #4

0

ファイルを表示

    def query_adj_factor_daily(self, symbol, start_date, end_date, div=False):
        """
        Get index components on each day during start_date and end_date.

        Parameters
        ----------
        symbol : str
            separated by ','
        start_date : int
        end_date : int
        div : bool
            False for normal adjust factor, True for diff.

        Returns
        -------
        res : pd.DataFrame
            index dates, columns symbols
            values are industry code

        """
        _flt = 'symbol=%s&start_date=%s&end_date=%s' % (symbol, start_date, end_date)

        # noinspection PyBroadException
        try:
            df_raw, msg = self.query('lb.secAdjFactor', _flt, '')
        except Exception:
            print('query adjust_factor from Stock_D')
            return self.query_adj_factor_daily_2(symbol, start_date, end_date)

        dic_sec = jutil.group_df_to_dict(df_raw, by='symbol')
        dic_sec = {sec: df.set_index('trade_date').loc[:, 'adjust_factor']
                   for sec, df in dic_sec.items()}

        # TODO: duplicate codes with dataview.py: line 512
        res = pd.concat(dic_sec, axis=1)  # TODO: fillna ?

        idx = np.unique(np.concatenate([df.index.values for df in dic_sec.values()]))
        symbol_arr = np.sort(symbol.split(','))
        res_final = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan)
        res_final.loc[res.index, res.columns] = res

        # align to every trade date
        s, e = df_raw.loc[:, 'trade_date'].min(), df_raw.loc[:, 'trade_date'].max()
        dates_arr = self.query_trade_dates(s, e)
        if not len(dates_arr) == len(res_final.index):
            res_final = res_final.reindex(dates_arr)

            res_final = res_final.fillna(method='ffill').fillna(method='bfill')

        if div:
            res_final = res_final.div(res_final.shift(1, axis=0)).fillna(1.0)

        # res = res.loc[start_date: end_date, :]
        res_final.index = res_final.index.astype(int)

        return res_final

コード例 #5

0

ファイルを表示

ファイル: analyze.py プロジェクト: JTJ17/JAQS

    def gen_report(self,
                   source_dir,
                   template_fn,
                   out_folder='.',
                   selected=None):
        """
        Generate HTML (and PDF) report of the trade analysis.

        Parameters
        ----------
        source_dir : str
            path of directory where HTML template and css files are stored.
        template_fn : str
            File name of HTML template.
        out_folder : str
            Output folder of report.
        selected : list of str or None
            List of symbols whose detailed PnL curve and position will be plotted.
            # TODO: this parameter should not belong to function


        """
        dic = dict()
        dic['html_title'] = "Alpha Strategy Backtest Result"
        dic['selected_securities'] = selected
        # we do not want to show username / password in report
        dic['props'] = {
            k: v
            for k, v in self.configs.items()
            if ('username' not in k and 'password' not in k)
        }
        dic['performance_metrics_report'] = self.performance_metrics_report
        dic['risk_metrics_report'] = self.risk_metrics_report
        dic['position_change'] = self.position_change
        dic['account'] = self.account
        dic['df_daily'] = jutil.group_df_to_dict(self.daily, by='symbol')
        dic['daily_position'] = None  # self.daily_position
        dic['rebalance_positions'] = self.rebalance_positions

        self.report_dic.update(dic)

        r = Report(self.report_dic,
                   source_dir=source_dir,
                   template_fn=template_fn,
                   out_folder=out_folder)

        r.generate_html()
        r.output_html('report.html')

コード例 #6

0

ファイルを表示

ファイル: dataservice.py プロジェクト: sukeyisme/JAQS

    def query_adj_factor_daily(self, symbol, start_date, end_date, div=False):
        """
        Get index components on each day during start_date and end_date.
        
        Parameters
        ----------
        symbol : str
            separated by ','
        start_date : int
        end_date : int
        div : bool
            False for normal adjust factor, True for diff.

        Returns
        -------
        res : pd.DataFrame
            index dates, columns symbols
            values are industry code

        """
        df_raw = self.query_adj_factor_raw(symbol, start_date=start_date, end_date=end_date)
    
        dic_sec = jutil.group_df_to_dict(df_raw, by='symbol')
        dic_sec = {sec: df.set_index('trade_date').loc[:, 'adjust_factor']
                   for sec, df in dic_sec.items()}
        
        # TODO: duplicate codes with dataview.py: line 512
        res = pd.concat(dic_sec, axis=1)  # TODO: fillna ?
        
        idx = np.unique(np.concatenate([df.index.values for df in dic_sec.values()]))
        symbol_arr = np.sort(symbol.split(','))
        res_final = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan)
        res_final.loc[res.index, res.columns] = res

        # align to every trade date
        s, e = df_raw.loc[:, 'trade_date'].min(), df_raw.loc[:, 'trade_date'].max()
        dates_arr = self.query_trade_dates(s, e)
        if not len(dates_arr) == len(res_final.index):
            res_final = res_final.reindex(dates_arr)
            
            res_final = res_final.fillna(method='ffill').fillna(method='bfill')

        if div:
            res_final = res_final.div(res_final.shift(1, axis=0)).fillna(1.0)
            
        # res = res.loc[start_date: end_date, :]

        return res_final

コード例 #7

0

ファイルを表示

ファイル: dataservice.py プロジェクト: sukeyisme/JAQS

    def query_industry_daily(self, symbol, start_date, end_date, type_='SW', level=1):
        """
        Get index components on each day during start_date and end_date.
        
        Parameters
        ----------
        symbol : str
            separated by ','
        start_date : int
        end_date : int
        type_ : {'SW', 'ZZ'}

        Returns
        -------
        res : pd.DataFrame
            index dates, columns symbols
            values are industry code

        """
        df_raw = self.query_industry_raw(symbol, type_=type_, level=level)
        
        dic_sec = jutil.group_df_to_dict(df_raw, by='symbol')
        dic_sec = {sec: df.sort_values(by='in_date', axis=0).reset_index()
                   for sec, df in dic_sec.items()}

        df_ann_tmp = pd.concat({sec: df.loc[:, 'in_date'] for sec, df in dic_sec.items()}, axis=1)
        df_value_tmp = pd.concat({sec: df.loc[:, 'industry{:d}_code'.format(level)]
                                  for sec, df in dic_sec.items()},
                                 axis=1)
        
        idx = np.unique(np.concatenate([df.index.values for df in dic_sec.values()]))
        symbol_arr = np.sort(symbol.split(','))
        df_ann = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan)
        df_ann.loc[df_ann_tmp.index, df_ann_tmp.columns] = df_ann_tmp
        df_value = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan)
        df_value.loc[df_value_tmp.index, df_value_tmp.columns] = df_value_tmp

        dates_arr = self.query_trade_dates(start_date, end_date)
        df_industry = align.align(df_value, df_ann, dates_arr)
        
        # TODO before industry classification is available, we assume they belong to their first group.
        df_industry = df_industry.fillna(method='bfill')
        df_industry = df_industry.astype(str)
        
        return df_industry

コード例 #8

0

ファイルを表示

ファイル: test_util.py プロジェクト: ruzwdy/JAQS

def test_pdutil():
    df = pd.DataFrame(np.random.rand(4, 20))
    df.iloc[1, 2] = np.nan
    df.iloc[3, 4] = np.nan
    df.iloc[1, 4] = np.nan
    assert df.isnull().sum().sum() == 3
    df.iloc[2, 11] = np.inf
    df.iloc[2, 12] = -np.inf
    assert df.isnull().sum().sum() == 3
    df2 = jutil.fillinf(df)
    assert df2.isnull().sum().sum() == 5

    res_q = jutil.to_quantile(df, 5, axis=1)

    df3 = df.copy()
    df3['group'] = ['a', 'a', 'b', 'a']

    dic = jutil.group_df_to_dict(df3, by='group')
    assert set(list(dic.keys())) == {'a', 'b'}

コード例 #9

0

ファイルを表示

ファイル: test_dataservice.py プロジェクト: sukeyisme/JAQS

def test_remote_data_service_industry():
    from jaqs.data.align import align
    import pandas as pd
    
    arr = ds.query_index_member(index='000300.SH', start_date=20130101, end_date=20170505)
    df = ds.query_industry_raw(symbol=','.join(arr), type_='SW')
    df = ds.query_industry_raw(symbol=','.join(arr), type_='ZZ')
    
    # errors
    try:
        ds.query_industry_raw(symbol=','.join(arr), type_='ZZ', level=5)
    except ValueError:
        pass
    try:
        ds.query_industry_raw(symbol=','.join(arr), type_='blabla')
    except ValueError:
        pass
    
    # df_ann = df.loc[:, ['in_date', 'symbol']]
    # df_ann = df_ann.set_index(['symbol', 'in_date'])
    # df_ann = df_ann.unstack(level='symbol')
    
    from jaqs.data import DataView
    dic_sec = jutil.group_df_to_dict(df, by='symbol')
    dic_sec = {sec: df.reset_index() for sec, df in dic_sec.items()}
    
    df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.items()], axis=1)
    df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.items()], axis=1)
    
    dates_arr = ds.query_trade_dates(20140101, 20170505)
    res = align(df_value, df_ann, dates_arr)
    # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date')
    # df_value = df.pivot(index=None, columns='symbol', values='industry1_code')
    
    def align_single_df(df_one_sec):
        df_value = df_one_sec.loc[:, ['industry1_code']]
        df_ann = df_one_sec.loc[:, ['in_date']]
        res = align(df_value, df_ann, dates_arr)
        return res
    # res_list = [align_single_df(df) for sec, df in dic_sec.items()]
    res_list = [align_single_df(df) for df in list(dic_sec.values())[:10]]
    res = pd.concat(res_list, axis=1)

コード例 #10

0

ファイルを表示

ファイル: analyze.py プロジェクト: sukeyisme/JAQS

    def gen_report(self, source_dir, template_fn, out_folder='.', selected=None):
        """
        Generate HTML (and PDF) report of the trade analysis.

        Parameters
        ----------
        source_dir : str
            path of directory where HTML template and css files are stored.
        template_fn : str
            File name of HTML template.
        out_folder : str
            Output folder of report.
        selected : list of str or None
            List of symbols whose detailed PnL curve and position will be plotted.
            # TODO: this parameter should not belong to function


        """
        dic = dict()
        dic['html_title'] = "Alpha Strategy Backtest Result"
        dic['selected_securities'] = selected
        # we do not want to show username / password in report
        dic['props'] = {k: v for k, v in self.configs.items() if ('username' not in k and 'password' not in k)}
        dic['performance_metrics'] = self.performance_metrics
        dic['risk_metrics'] = self.risk_metrics
        dic['position_change'] = self.position_change
        dic['account'] = self.account
        dic['df_daily'] = jutil.group_df_to_dict(self.daily, by='symbol')
        dic['daily_position'] = None # self.daily_position
        dic['rebalance_positions'] = self.rebalance_positions
        
        self.report_dic.update(dic)
        
        r = Report(self.report_dic, source_dir=source_dir, template_fn=template_fn, out_folder=out_folder)
        
        r.generate_html()
        r.output_html('report.html')