コード例 #1
0
ファイル: download.py プロジェクト: vritme/finam-export
def main():
    exporter = Exporter()
    print('*** Current Russian ruble exchange rates ***')
    rub = exporter.lookup(name='USDRUB_TOD', market=Market.CURRENCIES)
    assert len(rub) == 1
    data = exporter.download(rub.index[0], market=Market.CURRENCIES)
    print(data.tail(1))

    print('*** Current Brent Oil price ***')
    oil = exporter.lookup(name='Brent',
                          market=Market.COMMODITIES,
                          name_comparator=LookupComparator.EQUALS)
    assert len(oil) == 1
    data = exporter.download(oil.index[0], market=Market.COMMODITIES)
    print(data.tail(1))
コード例 #2
0
def download_usa(
        id: int,
        code: str,
) -> None:
    """Скачиваем что-то с рынка USA."""
    exporter = Exporter()
    logger.info(f'Скачиваем {code} {id}')

    try:
        data = exporter.download(
            id,
            market=Market.USA,
            start_date=datetime.date(2001, 1, 1),
            end_date=None,
            timeframe=Timeframe.MINUTES1,
            delay=3,
            max_in_progress_retries=10,
        )
    except Exception as e:
        logger.info(f'{code} Ошибка скачивания.')
        return

    try:
        data.to_parquet(f'{code}_1MIN.parquet', compression='brotli')
    except Exception as e:
        logger.info(f'{code} Ошибка сохранения.')
        return
コード例 #3
0
 def test_blank(self, start_date, end_date, timeframe):
     exporter = Exporter()
     result = exporter.download(SBER.id, Market.SHARES,
                                start_date=start_date,
                                end_date=end_date,
                                timeframe=timeframe)
     assert len(result) == 0
     assert result.columns.tolist() == ['<DATE>', '<TIME>',
                                        '<OPEN>', '<HIGH>',
                                        '<LOW>', '<CLOSE>', '<VOL>']
コード例 #4
0
 def test_ticks_blank(self, start_date, end_date):
     exporter = Exporter()
     result = exporter.download(SBER.id, Market.SHARES,
                                start_date=start_date,
                                end_date=end_date,
                                timeframe=Timeframe.TICKS)
     assert len(result) == 0
     assert result.columns.tolist() == ['<TICKER>',
                                        '<PER>',
                                        '<DATE>',
                                        '<TIME>',
                                        '<LAST>',
                                        '<VOL>']
コード例 #5
0
 def test_basic(self, start_date, end_date, timeframe):
     exporter = Exporter()
     result = exporter.download(SBER.id, Market.SHARES,
                                start_date=start_date,
                                end_date=end_date,
                                timeframe=timeframe)
     count = len(result)
     assert count > 0
     assert result['<DATE>'].min() >= int(start_date.strftime('%Y%m%d'))
     assert result['<DATE>'].max() <= int(end_date.strftime('%Y%m%d'))
     assert result.columns.tolist() == ['<DATE>', '<TIME>',
                                        '<OPEN>', '<HIGH>',
                                        '<LOW>', '<CLOSE>', '<VOL>']
コード例 #6
0
 def test_ticks(self, start_date, end_date):
     exporter = Exporter()
     result = exporter.download(SBER.id, Market.SHARES,
                                start_date=start_date,
                                end_date=end_date,
                                timeframe=Timeframe.TICKS)
     assert len(result) > SHARES_SESSION_MINUTES * 60
     assert result['<DATE>'].min() >= int(start_date.strftime('%Y%m%d'))
     assert result['<DATE>'].max() <= int(end_date.strftime('%Y%m%d'))
     assert result.columns.tolist() == ['<TICKER>',
                                        '<PER>',
                                        '<DATE>',
                                        '<TIME>',
                                        '<LAST>',
                                        '<VOL>']
コード例 #7
0
def main(contracts, market, timeframe, destdir, lineterm, delay, startdate,
         enddate, skiperr, ext):
    exporter = Exporter()

    if not any((contracts, market)):
        raise click.BadParameter('Neither contracts nor market is specified')

    market_filter = dict()
    if market:
        market_filter.update(market=Market[market])
        if not contracts:
            contracts = exporter.lookup(**market_filter)['code'].tolist()

    for contract_code in contracts:
        logging.info('Handling {}'.format(contract_code))
        try:
            contracts = exporter.lookup(code=contract_code, **market_filter)
        except FinamObjectNotFoundError:
            logger.error('unknown contract "{}"'.format(contract_code))
            sys.exit(1)
        else:
            contract = contracts.reset_index().iloc[0]

        logger.info(u'Downloading contract {}'.format(contract))
        try:
            data = exporter.download(contract.id,
                                     start_date=startdate,
                                     end_date=enddate,
                                     timeframe=Timeframe[timeframe],
                                     market=Market(contract.market))
        except FinamExportError as e:
            if skiperr:
                logger.error(repr(e))
                continue
            else:
                raise
        destpath = os.path.join(
            destdir, '{}-{}.{}'.format(contract.code, timeframe, ext))

        data.to_csv(destpath, index=False, line_terminator=lineterm)
        if delay > 0:
            logger.info('Sleeping for {} second(s)'.format(delay))
            time.sleep(delay)
コード例 #8
0
ファイル: finamdl.py プロジェクト: Darthholi/finamdl
def download(contracts,
             market,
             timeframe,
             destdir,
             delay,
             startdate,
             enddate,
             skiperr,
             skip_existing,
             update_existing,
             sanitize=True,
             daysdelta=20):
    exporter = Exporter()

    if not any((contracts, market)):
        raise click.BadParameter('Neither contracts nor market is specified')

    market_filter = dict()
    if market:
        market_filter.update(market=Market[market])
        contracts_df = exporter.lookup(**market_filter)
        contracts_df = contracts_df.reset_index()
        destpath = os.path.join(destdir, '{}.{}'.format(market, "csv"))
        contracts_df.to_csv(destpath)
    else:
        contracts_list = []
        for contract_code in contracts:
            try:
                contracts_list.append(
                    exporter.lookup(code=contract_code, **market_filter))
            except FinamObjectNotFoundError:
                logger.info('unknown contract "{}"'.format(contract_code))
                if not skiperr:
                    sys.exit(1)
        contracts_df = pd.concat(contracts_list)

    date_ranges = list(
        make_date_ranges(startdate, enddate,
                         datetime.timedelta(days=daysdelta)))
    if not contracts:
        contracts_to_dl = sorted(contracts_df['code'].to_list())
    else:
        contracts_to_dl = sorted(contracts)

    t = tqdm(contracts_to_dl, position=0)
    for contract_code in t:
        this_contract = contracts_df[contracts_df['code'] ==
                                     contract_code].iloc[0]
        destpath = os.path.join(
            destdir, '{}-{}.{}'.format(this_contract.code, timeframe,
                                       "parquet"))
        data_orig = None
        last_datetime = None
        if os.path.exists(destpath):
            if skip_existing:
                continue
            if update_existing:
                data_orig = pd.read_parquet(destpath)
                last_datetime = data_orig.index[-1]

        # logger.info(u'Downloading contract {}'.format(contract))
        if t:
            t.set_description(f" {contract_code} {this_contract['name']}")
            t.refresh()  # to show immediately the update

        all_data = []
        for date_range in tqdm(date_ranges, position=1):
            if last_datetime is not None:
                if date_range[1] < last_datetime:
                    continue
                if date_range[1] - last_datetime <= pd.Timedelta(
                        days=daysdelta):
                    date_range = (last_datetime, date_range[1])
            try:
                data = exporter.download(this_contract['id'],
                                         start_date=date_range[0],
                                         end_date=date_range[1],
                                         timeframe=Timeframe[timeframe],
                                         market=Market(
                                             this_contract['market']))
                # if len(all_data) > 0 and len(data) <=0:
                #    raise ValueError("returned nothing")
                """
                if len(all_data) > 0 and (datepoint_to_date(data.iloc[0]) - datepoint_to_date(all_data[-1].iloc[-1]) > timedeltas_by_setting[Timeframe[timeframe]]):
                    raise ValueError("returned a hole")
                """
                if len(data) > 0:
                    all_data.append(data)
                time.sleep(delay)
            except FinamExportError as e:
                if skiperr:
                    logger.error(repr(e))
                    continue
                else:
                    raise
        if len(all_data) > 0:
            data = pd.concat(all_data)
            if sanitize:
                data = sanitize_df(data)

            if data_orig is not None and last_datetime is not None:
                data = data[data.index > last_datetime]
                data = pd.concat([data_orig, data])
                if len(data.index) > 0 and data.index.duplicated().any():
                    print(f"{contract_code} has duplicates?")
                    continue

            if sanitize:
                data.to_parquet(destpath)
            else:
                data.to_parquet(destpath, index=False)

            if delay > 0:
                logger.info('Sleeping for {} second(s)'.format(delay))
                time.sleep(delay)
コード例 #9
0
ファイル: finam.py プロジェクト: Yamp/wows
class FinamDataDownloader:
    """Класс, который скачивает финансовые данные с Finam."""

    market_to_str = bidict({
        Market.COMMODITIES: "commodities",
        Market.BONDS: "bonds",
        Market.CURRENCIES_WORLD: "currencies_world",
        Market.CURRENCIES: "currencies",
        Market.ETF: "etf",
        Market.ETF_MOEX: "etf_moex",
        Market.FUTURES: "futures",
        Market.FUTURES_ARCHIVE: "futures_archive",
        Market.FUTURES_USA: "futures_usa",
        Market.INDEXES: "indexes",
        Market.SHARES: "shares",
        Market.USA: "usa",
        Market.SPB: "spb",
    })

    tf_to_str = bidict({
        Timeframe.TICKS: "ticks",
        Timeframe.MINUTES1: "minutes1",
        Timeframe.MINUTES5: "minutes5",
        Timeframe.MINUTES10: "minutes10",
        Timeframe.MINUTES15: "minutes15",
        Timeframe.MINUTES30: "minutes30",
        Timeframe.HOURLY: "hourly",
        Timeframe.DAILY: "daily",
        Timeframe.WEEKLY: "weekly",
        Timeframe.MONTHLY: "monthly",
    })

    def __init__(
            self,
            data_dir=Path(__file__).parent.parent.parent / 'data',
            start_date=datetime.date(2001, 1, 1),
            overwrite: bool = False,
    ):
        # параметры
        self.start_date = start_date
        self.data_dir: Path = data_dir
        self.overwrite: bool = overwrite

        # объекты рани
        self.exporter = Exporter()

    def download_all(
        self,
        market: str,
        timeframe: str,
    ):
        """Скачиваем все акции США."""

        market = self.market_to_str.inverse[market]
        timeframe = self.tf_to_str.inverse[timeframe]

        df = self.find_all(market=market)

        for i, row in df.iterrows():
            logger.info(
                f'Скачиваем {row["code"]} {i}/{len(df)} == {int(i)/len(df)}')
            self.download_asset(id=row['id'],
                                ticker=row['code'],
                                timeframe=timeframe,
                                market=market)

    def find_all(self, market: int) -> pd.DataFrame:
        """Находим все интересные нам бумаги."""
        return self.exporter.lookup(
            name='',
            market=market,
            name_comparator=LookupComparator.STARTSWITH,
        ).sort_values('code').reset_index()

    def download_asset(
        self,
        id: int,
        ticker: int,
        market: int,
        timeframe: int,
    ) -> None:
        """Скачиваем один конкретный ассет."""
        logger.info(
            f'Скачиваем данные в папку {self.data_dir.resolve().absolute()}')

        try:
            fname = self.get_filename(ticker=ticker,
                                      market=market,
                                      timeframe=timeframe)
            logger.info(f'Проверяем {fname.resolve().absolute()}')

            if self.overwrite or not fname.exists():
                df = self.exporter.download(
                    id_=id,
                    start_date=self.start_date,
                    end_date=None,
                    market=market,
                    timeframe=timeframe,
                    delay=1,
                    max_in_progress_retries=10,
                )
                self.save_asset(df,
                                market=market,
                                timeframe=timeframe,
                                ticker=ticker)
            else:
                logger.info(f'{ticker} Пропускаем, так как уже скачан.')

        except Exception as e:  # noqa
            logger.info(f'{ticker} Ошибка скачивания. {e}')
            # raise e

    def save_asset(
        self,
        df: pd.DataFrame,
        market: int,
        timeframe: int,
        ticker: int,
    ):
        """Сохраняем результат в файл."""
        try:
            path = self.get_filename(ticker=ticker,
                                     market=market,
                                     timeframe=timeframe)
            df.to_parquet(path, compression='brotli')
        except Exception as e:  # noqa
            logger.info(f'{ticker} Ошибка сохранения. {e}')
            # raise e

    def get_filename(
        self,
        ticker: int,
        market: int,
        timeframe: int,
    ) -> Path:
        """Получаем имя файла, в которое будем все записывать."""
        m = self.get_verbose_market(market)
        t = self.get_verbose_timeframe(timeframe)

        dir_path = self.data_dir / m / t
        dir_path.mkdir(parents=True, exist_ok=True)

        return dir_path / f"{ticker}.parquet"

    def get_verbose_market(self, market: int) -> str:
        """Получаем название рынка."""
        return self.market_to_str[market]

    def get_verbose_timeframe(self, tf: int) -> str:
        """Получаем название рынка."""
        return self.tf_to_str[tf]