コード例 #1
0
ファイル: finamdl.py プロジェクト: Darthholi/finamdl
def download(contracts,
             market,
             timeframe,
             destdir,
             delay,
             startdate,
             enddate,
             skiperr,
             skip_existing,
             update_existing,
             sanitize=True,
             daysdelta=20):
    exporter = Exporter()

    if not any((contracts, market)):
        raise click.BadParameter('Neither contracts nor market is specified')

    market_filter = dict()
    if market:
        market_filter.update(market=Market[market])
        contracts_df = exporter.lookup(**market_filter)
        contracts_df = contracts_df.reset_index()
        destpath = os.path.join(destdir, '{}.{}'.format(market, "csv"))
        contracts_df.to_csv(destpath)
    else:
        contracts_list = []
        for contract_code in contracts:
            try:
                contracts_list.append(
                    exporter.lookup(code=contract_code, **market_filter))
            except FinamObjectNotFoundError:
                logger.info('unknown contract "{}"'.format(contract_code))
                if not skiperr:
                    sys.exit(1)
        contracts_df = pd.concat(contracts_list)

    date_ranges = list(
        make_date_ranges(startdate, enddate,
                         datetime.timedelta(days=daysdelta)))
    if not contracts:
        contracts_to_dl = sorted(contracts_df['code'].to_list())
    else:
        contracts_to_dl = sorted(contracts)

    t = tqdm(contracts_to_dl, position=0)
    for contract_code in t:
        this_contract = contracts_df[contracts_df['code'] ==
                                     contract_code].iloc[0]
        destpath = os.path.join(
            destdir, '{}-{}.{}'.format(this_contract.code, timeframe,
                                       "parquet"))
        data_orig = None
        last_datetime = None
        if os.path.exists(destpath):
            if skip_existing:
                continue
            if update_existing:
                data_orig = pd.read_parquet(destpath)
                last_datetime = data_orig.index[-1]

        # logger.info(u'Downloading contract {}'.format(contract))
        if t:
            t.set_description(f" {contract_code} {this_contract['name']}")
            t.refresh()  # to show immediately the update

        all_data = []
        for date_range in tqdm(date_ranges, position=1):
            if last_datetime is not None:
                if date_range[1] < last_datetime:
                    continue
                if date_range[1] - last_datetime <= pd.Timedelta(
                        days=daysdelta):
                    date_range = (last_datetime, date_range[1])
            try:
                data = exporter.download(this_contract['id'],
                                         start_date=date_range[0],
                                         end_date=date_range[1],
                                         timeframe=Timeframe[timeframe],
                                         market=Market(
                                             this_contract['market']))
                # if len(all_data) > 0 and len(data) <=0:
                #    raise ValueError("returned nothing")
                """
                if len(all_data) > 0 and (datepoint_to_date(data.iloc[0]) - datepoint_to_date(all_data[-1].iloc[-1]) > timedeltas_by_setting[Timeframe[timeframe]]):
                    raise ValueError("returned a hole")
                """
                if len(data) > 0:
                    all_data.append(data)
                time.sleep(delay)
            except FinamExportError as e:
                if skiperr:
                    logger.error(repr(e))
                    continue
                else:
                    raise
        if len(all_data) > 0:
            data = pd.concat(all_data)
            if sanitize:
                data = sanitize_df(data)

            if data_orig is not None and last_datetime is not None:
                data = data[data.index > last_datetime]
                data = pd.concat([data_orig, data])
                if len(data.index) > 0 and data.index.duplicated().any():
                    print(f"{contract_code} has duplicates?")
                    continue

            if sanitize:
                data.to_parquet(destpath)
            else:
                data.to_parquet(destpath, index=False)

            if delay > 0:
                logger.info('Sleeping for {} second(s)'.format(delay))
                time.sleep(delay)
コード例 #2
0
ファイル: finam.py プロジェクト: Yamp/wows
class FinamDataDownloader:
    """Класс, который скачивает финансовые данные с Finam."""

    market_to_str = bidict({
        Market.COMMODITIES: "commodities",
        Market.BONDS: "bonds",
        Market.CURRENCIES_WORLD: "currencies_world",
        Market.CURRENCIES: "currencies",
        Market.ETF: "etf",
        Market.ETF_MOEX: "etf_moex",
        Market.FUTURES: "futures",
        Market.FUTURES_ARCHIVE: "futures_archive",
        Market.FUTURES_USA: "futures_usa",
        Market.INDEXES: "indexes",
        Market.SHARES: "shares",
        Market.USA: "usa",
        Market.SPB: "spb",
    })

    tf_to_str = bidict({
        Timeframe.TICKS: "ticks",
        Timeframe.MINUTES1: "minutes1",
        Timeframe.MINUTES5: "minutes5",
        Timeframe.MINUTES10: "minutes10",
        Timeframe.MINUTES15: "minutes15",
        Timeframe.MINUTES30: "minutes30",
        Timeframe.HOURLY: "hourly",
        Timeframe.DAILY: "daily",
        Timeframe.WEEKLY: "weekly",
        Timeframe.MONTHLY: "monthly",
    })

    def __init__(
            self,
            data_dir=Path(__file__).parent.parent.parent / 'data',
            start_date=datetime.date(2001, 1, 1),
            overwrite: bool = False,
    ):
        # параметры
        self.start_date = start_date
        self.data_dir: Path = data_dir
        self.overwrite: bool = overwrite

        # объекты рани
        self.exporter = Exporter()

    def download_all(
        self,
        market: str,
        timeframe: str,
    ):
        """Скачиваем все акции США."""

        market = self.market_to_str.inverse[market]
        timeframe = self.tf_to_str.inverse[timeframe]

        df = self.find_all(market=market)

        for i, row in df.iterrows():
            logger.info(
                f'Скачиваем {row["code"]} {i}/{len(df)} == {int(i)/len(df)}')
            self.download_asset(id=row['id'],
                                ticker=row['code'],
                                timeframe=timeframe,
                                market=market)

    def find_all(self, market: int) -> pd.DataFrame:
        """Находим все интересные нам бумаги."""
        return self.exporter.lookup(
            name='',
            market=market,
            name_comparator=LookupComparator.STARTSWITH,
        ).sort_values('code').reset_index()

    def download_asset(
        self,
        id: int,
        ticker: int,
        market: int,
        timeframe: int,
    ) -> None:
        """Скачиваем один конкретный ассет."""
        logger.info(
            f'Скачиваем данные в папку {self.data_dir.resolve().absolute()}')

        try:
            fname = self.get_filename(ticker=ticker,
                                      market=market,
                                      timeframe=timeframe)
            logger.info(f'Проверяем {fname.resolve().absolute()}')

            if self.overwrite or not fname.exists():
                df = self.exporter.download(
                    id_=id,
                    start_date=self.start_date,
                    end_date=None,
                    market=market,
                    timeframe=timeframe,
                    delay=1,
                    max_in_progress_retries=10,
                )
                self.save_asset(df,
                                market=market,
                                timeframe=timeframe,
                                ticker=ticker)
            else:
                logger.info(f'{ticker} Пропускаем, так как уже скачан.')

        except Exception as e:  # noqa
            logger.info(f'{ticker} Ошибка скачивания. {e}')
            # raise e

    def save_asset(
        self,
        df: pd.DataFrame,
        market: int,
        timeframe: int,
        ticker: int,
    ):
        """Сохраняем результат в файл."""
        try:
            path = self.get_filename(ticker=ticker,
                                     market=market,
                                     timeframe=timeframe)
            df.to_parquet(path, compression='brotli')
        except Exception as e:  # noqa
            logger.info(f'{ticker} Ошибка сохранения. {e}')
            # raise e

    def get_filename(
        self,
        ticker: int,
        market: int,
        timeframe: int,
    ) -> Path:
        """Получаем имя файла, в которое будем все записывать."""
        m = self.get_verbose_market(market)
        t = self.get_verbose_timeframe(timeframe)

        dir_path = self.data_dir / m / t
        dir_path.mkdir(parents=True, exist_ok=True)

        return dir_path / f"{ticker}.parquet"

    def get_verbose_market(self, market: int) -> str:
        """Получаем название рынка."""
        return self.market_to_str[market]

    def get_verbose_timeframe(self, tf: int) -> str:
        """Получаем название рынка."""
        return self.tf_to_str[tf]