Ejemplo n.º 1
0
def validate_data_range(
    data: pd.DataFrame,
    start_date: date,
    end_date: date,
    abs_tol: Optional[int] = 0,
    rel_tol: Optional[float] = 0,
):
    target_dates = set(
        generate_trading_days(start_date=start_date, end_date=end_date,)
    )
    received_dates = set(
        [dt.date() for dt in np.unique(data.index.to_pydatetime())]
    )

    if len(received_dates - target_dates) != 0:
        raise ValueError(
            f"Data has out-of-range dates: {received_dates - target_dates}."
        )

    if abs_tol:
        if len(target_dates - received_dates) > abs_tol:
            raise ValueError(
                f"Data has {len(target_dates - received_dates)} missing dates,"
                f" but abs_tol is {abs_tol}."
            )

    if rel_tol:
        ratio = len(target_dates - received_dates) / len(target_dates)
        if ratio > rel_tol:
            raise ValueError(
                f"Data has {ratio} of the requested dates, but rel_tol"
                f" is {rel_tol}."
            )
Ejemplo n.º 2
0
def test_retrieving_intermittently_cached_intraday(tmpdir, provider):
    retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir,)

    data = pd.DataFrame()
    dates = generate_trading_days(
        start_date=date.today() - timedelta(days=10),
        end_date=date.today() - timedelta(days=1),
    )
    date_ranges = [
        dates[1:2],
        dates[-3:-2],
        dates,
    ]
    for date_range in date_ranges:
        start_date = date_range[0]
        end_date = date_range[-1]

        data = retriever.retrieve_bar_data(
            symbol="SPY",
            start_date=start_date,
            end_date=end_date,
            bar_size=timedelta(days=1),
        )

    validate_data_range(data=data, start_date=dates[0], end_date=dates[-1])
Ejemplo n.º 3
0
def test_retrieving_intermittently_cached_intraday(tmpdir, provider):
    retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir,)

    data = pd.DataFrame()
    dates = generate_trading_days(
        start_date=date.today() - timedelta(days=10),
        end_date=date.today() - timedelta(days=1),
    )
    date_ranges = [
        dates[1:2],
        dates[-3:-2],
        dates,
    ]
    for date_range in date_ranges:
        start_date = date_range[0]
        end_date = date_range[-1]

        contract = StockContract(symbol="SPY")

        try:
            data = retriever.retrieve_bar_data(
                contract=contract,
                start_date=start_date,
                end_date=end_date,
                bar_size=timedelta(days=1),
            )
        except NotImplementedError:  # todo: fix
            return

    validate_data_range(data=data, start_date=dates[0], end_date=dates[-1])
Ejemplo n.º 4
0
def test_retrieving_intermittently_cached_trades(tmpdir, provider):
    retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir,)

    data = pd.DataFrame()
    dates = generate_trading_days(
        start_date=date(2020, 7, 21), end_date=date(2020, 7, 23),
    )
    date_ranges = [
        dates[:1],
        dates[-1:],
        dates,
    ]
    for date_range in date_ranges:
        start_date = date_range[0]
        end_date = date_range[-1]

        contract = StockContract(symbol="SPY")

        try:
            data = retriever.retrieve_trades_data(
                contract=contract, start_date=start_date, end_date=end_date,
            )
        except NotImplementedError:
            return

    validate_data_range(data=data, start_date=dates[0], end_date=dates[-1])
Ejemplo n.º 5
0
    def download_bars_data(
        self,
        contract: AContract,
        start_date: date,
        end_date: date,
        bar_size: timedelta,
        rth: bool,
        **kwargs,
    ) -> pd.DataFrame:
        ib_contract = self._to_ib_contract(contract=contract)
        dates = generate_trading_days(start_date=start_date, end_date=end_date)
        duration = f"{len(dates)} D"
        bar_size_str = self._to_ib_bar_size(bar_size=bar_size)

        bar_data = self._ib_conn.reqHistoricalData(
            contract=ib_contract,
            endDateTime=end_date,
            durationStr=duration,
            barSizeSetting=bar_size_str,
            whatToShow="TRADES",
            useRTH=False,
        )
        data = util.df(objs=bar_data)

        if data is not None and len(data) != 0:
            data = self._format_data(data=data)
        else:
            data = pd.DataFrame()

        return data
Ejemplo n.º 6
0
    def _get_missing_date_ranges(
        data: pd.DataFrame, start_date: date, end_date: date,
    ) -> List[List[date]]:
        dates = generate_trading_days(start_date=start_date, end_date=end_date)

        if len(data) != 0:
            data_dates = np.unique(data.index.date).tolist()
            date_ranges = []
            date_range = []
            for i in range(len(dates)):
                date_ = dates[i]
                if date_ != data_dates[0]:
                    date_range.append(date_)
                else:
                    data_dates.pop(0)
                    if len(date_range) != 0:
                        date_ranges.append(date_range)
                    if len(data_dates) == 0:
                        if i != len(dates) - 1:
                            date_ranges.append(dates[i + 1 :])
                        break
                    date_range = []
        else:
            date_ranges = [dates]

        return date_ranges
Ejemplo n.º 7
0
    def download_bars_data(
        self,
        contract: AContract,
        start_date: date,
        end_date: date,
        bar_size: timedelta,
        rth: bool,
        **kwargs,
    ):
        # TODO: test rth
        data = pd.DataFrame()
        dates = generate_trading_days(start_date=start_date, end_date=end_date)
        for date_ in dates:
            day_data = self._conn.download_stock_data(
                symbol=contract.symbol,
                request_date=date_,
                bar_size=bar_size,
            )
            data = data.append(other=day_data, ignore_index=True)

        if len(data) != 0:
            if is_daily(bar_size):
                data = self._format_daily_data(data=data)
            else:
                data = self._format_intraday_data(data=data)

        return data
Ejemplo n.º 8
0
def hist_file_names(
    start_date: date,
    end_date: date,
    bar_size: timedelta,
):
    if is_daily(bar_size=bar_size):
        f_names = ["daily.csv"]
    else:
        dates = generate_trading_days(start_date=start_date, end_date=end_date)
        f_names = [f"{date_.strftime(DATE_FORMAT)}.csv" for date_ in dates]

    return f_names
Ejemplo n.º 9
0
    def download_data(
        self,
        symbol: str,
        start_date: date,
        end_date: date,
        bar_size: timedelta,
        **kwargs,
    ):
        params = {"token": self._api_token}

        if is_daily(bar_size=bar_size):
            request_type = "chart"
            params["chartByDay"] = True
            params["range"] = "date"
        elif bar_size == timedelta(minutes=1):
            request_type = "intraday-prices"
            params["range"] = "1d"
        else:
            raise ValueError(
                f"{type(self)} can only download historical data or"
                f" 1-minute bars. Got a bar size of {bar_size}."
            )

        params["types"] = [request_type]
        url = f"{self._base_url}/stock/{symbol.lower()}/batch"

        data = pd.DataFrame()
        dates = generate_trading_days(start_date=start_date, end_date=end_date)
        for date_ in dates:
            params["exactDate"] = date_.strftime(self._REQ_DATE_FORMAT)
            r = requests.get(url=url, params=params)
            json_data = json.loads(r.text)
            day_data = pd.DataFrame(data=json_data[request_type])
            data = data.append(other=day_data, ignore_index=True)

        if len(data) != 0:
            if is_daily(bar_size):
                data = self._format_daily_data(data=data)
            else:
                data = self._format_intraday_data(data=data)

        return data
Ejemplo n.º 10
0
    def download_trades_data(
        self,
        contract: AContract,
        start_date: date,
        end_date: date,
        rth: bool,
        **kwargs,
    ) -> pd.DataFrame:
        data = pd.DataFrame()
        dates = generate_trading_days(start_date=start_date, end_date=end_date)

        for date_ in dates:
            day_data = self._conn.download_trades_data(symbol=contract.symbol,
                                                       request_date=date_,
                                                       rth=rth)
            data = data.append(other=day_data, ignore_index=True)

        data = self._format_trades_data(data=data)

        return data
Ejemplo n.º 11
0
    prepare_dataset_intra_single_day(data_dir=data_dir, case=case)
    broker = get_sme_sim_broker_intra_single_day(data_dir=data_dir)

    broker.run_sim(cache_only=True)

    assert broker.acc_cash == expected_acc_cash
    assert broker.get_position("TEST") == expected_pos


def prepare_dataset_daily(data_dir: Path, case: int):
    f_dir = data_dir / "TEST"
    os.makedirs(str(f_dir))
    f_path = f_dir / "daily.csv"

    dates = generate_trading_days(
        start_date=datetime(2020, 2, 1),
        end_date=datetime(2020, 3, 31),
    )
    index = pd.DatetimeIndex(dates, name="datetime")
    data = pd.DataFrame(
        data={
            "open": np.full(len(index), 100),
            "high": np.full(len(index), 100),
            "low": np.full(len(index), 100),
            "close": np.full(len(index), 100),
            "volume": np.full(len(index), 1000),
        },
        index=index,
    )

    apply_case(data=data, case=case)
Ejemplo n.º 12
0
    retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir)
    contract = StockContract(symbol="SPY")

    try:
        data = retriever.retrieve_trades_data(
            contract=contract, start_date=start_date, end_date=end_date,
        )
    except NotImplementedError:
        return

    validate_data_range(data=data, start_date=start_date, end_date=end_date)


@pytest.mark.skipif(
    len(generate_trading_days(start_date=date.today(), end_date=date.today()))
    == 0,
    reason="Today is not a trading day.",
)
@pytest.mark.parametrize("provider", [provider for provider in HIST_PROVIDERS])
def test_retrieve_non_cached_trades_data_today_partial(tmpdir, provider):
    end_date = date.today()
    start_date = end_date - timedelta(days=1)

    retriever = HistoricalRetriever(provider=provider, hist_data_dir=tmpdir)
    contract = StockContract(symbol="SPY")

    try:
        data = retriever.retrieve_trades_data(
            contract=contract,
            start_date=start_date,