コード例 #1
0
ファイル: dump_bin.py プロジェクト: newlyedward/qlib
    def _dump_bin(self, file_or_data: [Path, pd.DataFrame],
                  calendar_list: List[pd.Timestamp]):
        if not calendar_list:
            logger.warning("calendar_list is empty")
            return
        if isinstance(file_or_data, pd.DataFrame):
            if file_or_data.empty:
                return
            code = fname_to_code(
                str(file_or_data.iloc[0][self.symbol_field_name]).lower())
            df = file_or_data
        elif isinstance(file_or_data, Path):
            code = self.get_symbol_from_file(file_or_data)
            df = self._get_source_data(file_or_data)
        else:
            raise ValueError(f"not support {type(file_or_data)}")
        if df is None or df.empty:
            logger.warning(f"{code} data is None or empty")
            return

        # try to remove dup rows or it will cause exception when reindex.
        df = df.drop_duplicates(self.date_field_name)

        # features save dir
        features_dir = self._features_dir.joinpath(code_to_fname(code).lower())
        features_dir.mkdir(parents=True, exist_ok=True)
        self._data_to_bin(df, calendar_list, features_dir)
コード例 #2
0
    def save_instrument(self, symbol, df: pd.DataFrame):
        # if df is None or df.empty:
        #     logger.warning(f"{symbol} is empty")
        #     return
        symbol = self.normalize_symbol(symbol)
        symbol = code_to_fname(symbol)
        instrument_path = self.save_dir.joinpath(f"{symbol}.csv")

        if instrument_path.exists():
            _old_df = pd.read_csv(instrument_path)
            df.index = range(len(df))
            # 只留下更新的数据
            df_merge = pd.concat([_old_df.copy(), df.copy()], sort=False)
            df_merge = df_merge.round(6)
            duplicate_cols = df.columns.drop('date')
            df_merge.drop_duplicates(subset=duplicate_cols,
                                     ignore_index=False,
                                     inplace=True)

            if len(df_merge) == len(_old_df):
                # logger.info(f'{symbol} is not updated!')
                return
            elif len(df_merge) < len(_old_df):
                logger.error(f'please check {symbol} data!')
                instrument_path.replace(
                    instrument_path.with_suffix('.bak.csv'))
                df = _old_df.drop_duplicates()
            else:
                # 数据需要更新 当天更新多次,date和period可能会重复,
                # 同一天更新的数据也有可能会有变化,特征值读取时,原则上是取最新的数据,并不会影响数据使用
                # 决策前更新数据,如果不使用数据,按需每天固定时间更新即可,比如每天下午6点
                logger.info(f'update {symbol} new recorder ......')
                updated_index = df_merge.index[len(_old_df):]
                updated_df = df.reindex(updated_index)
                # 数据原始记录的发布日期如果没有在数据中出现,优先使用原始数据中的发布日期
                updated_df.date = updated_df.date.apply(
                    lambda x: self.updated_date
                    if x in _old_df.date.to_list() else x)
                df = pd.concat([_old_df, updated_df])

        else:
            logger.info(f'update new stock {symbol} ......')
            df = df.drop_duplicates()

        df = df.set_index(['date', 'period']).sort_index()
        df.to_csv(instrument_path)
コード例 #3
0
ファイル: base.py プロジェクト: caozhengquan/qlib
    def save_instrument(self, symbol, df: pd.DataFrame):
        """save stock data to file

        Parameters
        ----------
        symbol: str
            stock code
        df : pd.DataFrame
            df.columns must contain "symbol" and "datetime"
        """
        if df.empty:
            logger.warning(f"{symbol} is empty")
            return

        symbol = self.normalize_symbol(symbol)
        symbol = code_to_fname(symbol)
        stock_path = self.save_dir.joinpath(f"{symbol}.csv")
        df["symbol"] = symbol
        if stock_path.exists():
            _old_df = pd.read_csv(stock_path)
            df = _old_df.append(df, sort=False)
        df.to_csv(stock_path, index=False)
コード例 #4
0
ファイル: collector.py プロジェクト: chrismmuir/qlib
 def normalize_symbol(self, symbol):
     return code_to_fname(symbol).upper()