Esempio n. 1
0
    def forecast(self) -> Forecast:
        """Прогноз годовой доходности."""
        loader = data_loader.DescribedDataLoader(
            self._tickers, self._end, self._phenotype["data"], data_params.ForecastParams,
        )

        model = self.get_model(loader, False)

        m_list = []
        s_list = []
        with torch.no_grad():
            model.eval()
            for batch in loader:
                m, s = model(batch)
                m_list.append(m)
                s_list.append(s)
        m_forecast = torch.cat(m_list, dim=0).numpy().flatten()
        s_forecast = torch.cat(s_list, dim=0).numpy().flatten()

        forecast_days = self._phenotype["data"]["forecast_days"]
        history_days = self._phenotype["data"]["history_days"]

        year_mul = YEAR_IN_TRADING_DAYS / forecast_days
        m_forecast = pd.Series(m_forecast, index=list(self._tickers)).mul(year_mul)
        s_forecast = pd.Series(s_forecast, index=list(self._tickers)).mul(year_mul ** 0.5)

        return Forecast(
            tickers=self._tickers,
            date=self._end,
            history_days=history_days,
            forecast_days=forecast_days,
            mean=m_forecast,
            std=s_forecast,
        )
Esempio n. 2
0
def make_data_loader():
    return data_loader.DescribedDataLoader(
        ("MTSS", "BANE"),
        pd.Timestamp("2020-03-20"),
        DATA_PARAMS,
        data_params.TrainParams,
    )
Esempio n. 3
0
    def _validate(self, model: nn.Module) -> NoReturn:
        """Валидация модели."""
        loader = data_loader.DescribedDataLoader(
            self._tickers, self._end, self._phenotype["data"], data_params.ValParams
        )
        if len(loader.dataset) // len(self._tickers) == 0:
            print("~~> Valid: skipped...")
            return

        loss_fn = normal_llh

        llh_sum = 0.0
        weight_sum = 0.0

        print(f"Val size - {len(loader.dataset)}")
        with torch.no_grad():
            model.eval()
            bar = tqdm.tqdm(loader, file=sys.stdout, desc="~~> Valid")
            for batch in bar:
                output = model(batch)
                loss, weight = loss_fn(output, batch)
                llh_sum += -loss.item()
                weight_sum += weight

                bar.set_postfix_str(f"{llh_sum / weight_sum:.5f}")
Esempio n. 4
0
    def _train_model(self) -> nn.Module:
        """Тренировка модели."""
        phenotype = self._phenotype

        loader = data_loader.DescribedDataLoader(
            self._tickers, self._end, phenotype["data"], data_params.TrainParams
        )

        model = self._make_untrained_model(loader)
        optimizer = optim.AdamW(model.parameters(), **phenotype["optimizer"])

        steps_per_epoch = len(loader)
        scheduler_params = dict(phenotype["scheduler"])
        epochs = scheduler_params.pop("epochs")
        total_steps = 1 + int(steps_per_epoch * epochs)
        scheduler_params["total_steps"] = total_steps
        scheduler = lr_scheduler.OneCycleLR(optimizer, **scheduler_params)

        print(f"Epochs - {epochs:.2f}")
        print(f"Train size - {len(loader.dataset)}")

        len_deque = int(total_steps ** 0.5)
        llh_sum = 0.0
        llh_deque = collections.deque([0], maxlen=len_deque)
        weight_sum = 0.0
        weight_deque = collections.deque([0], maxlen=len_deque)
        loss_fn = normal_llh

        loader = itertools.repeat(loader)
        loader = itertools.chain.from_iterable(loader)
        loader = itertools.islice(loader, total_steps)

        model.train()
        bar = tqdm.tqdm(loader, file=sys.stdout, total=total_steps, desc="~~> Train")
        for batch in bar:
            optimizer.zero_grad()
            output = model(batch)

            loss, weight = loss_fn(output, batch)

            llh_sum += -loss.item() - llh_deque[0]
            llh_deque.append(-loss.item())

            weight_sum += weight - weight_deque[0]
            weight_deque.append(weight)

            loss.backward()
            optimizer.step()
            scheduler.step()

            llh = llh_sum / weight_sum
            bar.set_postfix_str(f"{llh:.5f}")

            # Такое условие позволяет отсеять NaN
            if not (llh > LOW_LLH):
                raise GradientsError(llh)

        self._validate(model)

        return model
Esempio n. 5
0
    def _eval_llh(self) -> tuple[float, float]:
        """Вычисляет логарифм правдоподобия.

        Прогнозы пересчитываются в дневное выражение для сопоставимости и вычисляется логарифм
        правдоподобия. Модель загружается при наличии сохраненных весов или обучается с нуля.
        """
        loader = data_loader.DescribedDataLoader(
            self._tickers,
            self._end,
            self._phenotype["data"],
            data_params.TestParams,
        )

        n_tickers = len(self._tickers)
        days, rez = divmod(len(loader.dataset), n_tickers)
        if rez:
            history = int(self._phenotype["data"]["history_days"])

            raise TooLongHistoryError(
                f"Слишком большая длинна истории - {history}")

        model = self.prepare_model(loader)
        model.to(DEVICE)
        loss_fn = log_normal_llh_mix

        llh_sum = 0
        weight_sum = 0
        all_means = []
        all_vars = []
        all_labels = []

        llh_adj = np.log(data_params.FORECAST_DAYS) / 2
        with torch.no_grad():
            model.eval()
            bars = tqdm.tqdm(loader, file=sys.stdout, desc="~~> Test")
            for batch in bars:
                loss, mean, var = loss_fn(model, batch)
                llh_sum -= loss.item()
                weight_sum += mean.shape[0]
                all_means.append(mean)
                all_vars.append(var)
                all_labels.append(batch["Label"])

                bars.set_postfix_str(f"{llh_sum / weight_sum + llh_adj:.5f}")

        all_means = torch.cat(all_means).cpu().numpy().flatten()
        all_vars = torch.cat(all_vars).cpu().numpy().flatten()
        all_labels = torch.cat(all_labels).cpu().numpy().flatten()
        llh = llh_sum / weight_sum + llh_adj

        ir = _opt_port(
            all_means,
            all_vars,
            all_labels,
            self._tickers,
            self._end,
            self._phenotype,
        )

        return llh, ir
Esempio n. 6
0
    def _eval_llh(self) -> float:
        """Вычисляет логарифм правдоподобия.

        Прогнозы пересчитываются в дневное выражение для сопоставимости и вычисляется логарифм
        правдоподобия. Модель загружается при наличии сохраненных весов или обучается с нуля.
        """
        loader = data_loader.DescribedDataLoader(
            self._tickers,
            self._end,
            self._phenotype["data"],
            data_params.TestParams,
        )

        n_tickers = len(self._tickers)
        days, rez = divmod(len(loader.dataset), n_tickers)
        if rez:
            raise TooLongHistoryError

        model = self.prepare_model(loader)
        model.to(DEVICE)
        loss_fn = log_normal_llh

        llh_sum = 0
        weight_sum = 0
        llh_all = []

        print(f"Тестовых дней: {days}")
        print(f"Тестовых примеров: {len(loader.dataset)}")
        with torch.no_grad():
            model.eval()
            bars = tqdm.tqdm(loader, file=sys.stdout, desc="~~> Test")
            for batch in bars:
                mean, std = model(batch)
                loss, weight, llh = loss_fn((mean, std), batch)
                llh_sum -= loss.item()
                weight_sum += weight
                llh_all.append(llh)

                bars.set_postfix_str(f"{llh_sum / weight_sum:.5f}")

        llh_all = torch.cat(llh_all)
        print(
            f"STD: {llh_all.std(unbiased=True).item() / len(llh_all) ** 0.5:.4f}"
        )

        return llh_sum / weight_sum
Esempio n. 7
0
    def forecast(self) -> Forecast:
        """Прогноз годовой доходности."""
        loader = data_loader.DescribedDataLoader(
            self._tickers,
            self._end,
            self._phenotype["data"],
            data_params.ForecastParams,
        )

        model = self.prepare_model(loader)
        model.to(DEVICE)

        means = []
        stds = []
        with torch.no_grad():
            model.eval()
            for batch in loader:
                dist = model.dist(batch)

                means.append(dist.mean - torch.tensor(1.0))
                stds.append(dist.variance**0.5)

        means = torch.cat(means, dim=0).cpu().numpy().flatten()
        stds = torch.cat(stds, dim=0).cpu().numpy().flatten()

        means = pd.Series(means, index=list(self._tickers))
        means = means.mul(YEAR_IN_TRADING_DAYS / data_params.FORECAST_DAYS)

        stds = pd.Series(stds, index=list(self._tickers))
        stds = stds.mul(
            (YEAR_IN_TRADING_DAYS / data_params.FORECAST_DAYS)**0.5)

        return Forecast(
            tickers=self._tickers,
            date=self._end,
            history_days=self._phenotype["data"]["history_days"],
            mean=means,
            std=stds,
            risk_aversion=self._phenotype["utility"]["risk_aversion"],
            error_tolerance=self._phenotype["utility"]["error_tolerance"],
        )
Esempio n. 8
0
def make_data_loader():
    return data_loader.DescribedDataLoader(TICKERS, DATE, PARAMS,
                                           data_params.ForecastParams)
Esempio n. 9
0
    def _train_model(self) -> nn.Module:
        """Тренировка модели."""
        phenotype = self._phenotype

        try:
            loader = data_loader.DescribedDataLoader(
                self._tickers,
                self._end,
                phenotype["data"],
                data_params.TrainParams,
            )
        except ValueError:
            history = int(self._phenotype["data"]["history_days"])

            raise TooLongHistoryError(f"Слишком большая длина истории: {history}")

        if len(loader.features_description) == 1:
            raise DegeneratedModelError("Отсутствуют активные признаки в генотипе")

        model = self._make_untrained_model(loader)
        model.to(DEVICE)
        optimizer = optim.AdamW(model.parameters(), **phenotype["optimizer"])

        steps_per_epoch = len(loader)
        scheduler_params = dict(phenotype["scheduler"])
        epochs = scheduler_params.pop("epochs")
        total_steps = 1 + int(steps_per_epoch * epochs)
        scheduler_params["total_steps"] = total_steps
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer, **scheduler_params)

        LOGGER.info(f"Epochs - {epochs:.2f} / Train size - {len(loader.dataset)}")
        modules = sum(1 for _ in model.modules())
        model_params = sum(tensor.numel() for tensor in model.parameters())
        LOGGER.info(f"Количество слоев / параметров - {modules} / {model_params}")

        batch_size = (model_params * 4) * self._phenotype["data"]["batch_size"] / (2**10) ** 3
        if batch_size > MAX_BATCH_SIZE:
            raise TooLargeModelError(f"Размер батча {batch_size:.0f} > {MAX_BATCH_SIZE}Gb")

        llh_sum = 0
        llh_deque = collections.deque([0], maxlen=steps_per_epoch)
        weight_sum = 0
        weight_deque = collections.deque([0], maxlen=steps_per_epoch)
        loss_fn = log_normal_llh_mix

        loader = itertools.repeat(loader)
        loader = itertools.chain.from_iterable(loader)
        loader = itertools.islice(loader, total_steps)

        model.train()
        bars = tqdm.tqdm(loader, file=sys.stdout, total=total_steps, desc="~~> Train")
        llh_min = None
        llh_adj = np.log(data_params.FORECAST_DAYS) / 2
        for batch in bars:
            optimizer.zero_grad()

            loss, means, _ = loss_fn(model, batch)

            llh_sum += -loss.item() - llh_deque[0]
            llh_deque.append(-loss.item())

            weight_sum += means.shape[0] - weight_deque[0]
            weight_deque.append(means.shape[0])

            loss.backward()
            optimizer.step()
            scheduler.step()

            llh = llh_sum / weight_sum + llh_adj
            bars.set_postfix_str(f"{llh:.5f}")

            if llh_min is None:
                llh_min = llh - LLH_DRAW_DOWN

            total_time = bars.format_dict
            total_time = total_time["total"] / (1 + total_time["n"]) * total_time["elapsed"]
            if total_time > DAY_IN_SECONDS:
                raise DegeneratedModelError(f"Большое время тренировки: {total_time:.0f} >" f" {DAY_IN_SECONDS}")

            # Такое условие позволяет отсеять NaN
            if not (llh > llh_min):
                raise GradientsError(f"LLH снизилось - начальное: {llh_min + LLH_DRAW_DOWN:0.5f}")

        return model
Esempio n. 10
0
    def _eval_llh(self) -> float:
        """Вычисляет логарифм правдоподобия.

        Прогнозы пересчитываются в дневное выражение для сопоставимости и вычисляется логарифм
        правдоподобия. Модель загружается при наличии сохраненных весов или обучается с нуля.
        """
        loader = data_loader.DescribedDataLoader(
            self._tickers, self._end, self._phenotype["data"], data_params.TestParams
        )

        n_tickers = len(self._tickers)
        days, rez = divmod(len(loader.dataset), n_tickers)
        if rez:
            raise TooLongHistoryError

        model = self.get_model(loader)

        forecast_days = torch.tensor(self._phenotype["data"]["forecast_days"], dtype=torch.float)

        loss_fn = normal_llh

        llh_sum = 0.0
        weight_sum = 0.0
        m_all = []
        s_all = []
        r_all = []

        print(f"Тестовых дней: {days}")
        print(f"Тестовых примеров: {len(loader.dataset)}")
        with torch.no_grad():
            model.eval()
            bar = tqdm.tqdm(loader, file=sys.stdout, desc="~~> Test")
            for batch in bar:
                m, s = model(batch)
                m_all.append(m)
                s_all.append(s)
                r_all.append(batch["Label"])
                loss, weight = loss_fn((m / forecast_days, s / forecast_days ** 0.5), batch)
                llh_sum -= loss.item()
                weight_sum += weight

                bar.set_postfix_str(f"{llh_sum / weight_sum:.5f}")

        m_all = torch.cat(m_all).flatten().numpy()
        s_all = torch.cat(s_all).flatten().numpy()
        r_all = torch.cat(r_all).flatten().numpy()

        port = []
        simple = []

        w = np.full(n_tickers, 1)
        w = w / w.sum()

        for day in range(days):
            m = m_all[day::days]
            s = s_all[day::days]
            r = r_all[day::days]

            mp = (m * w).sum()
            sp_2 = ((s * w) ** 2).sum()
            b = (s ** 2 * w) / sp_2
            grad = (m - mp) - (b - 1) * sp_2
            buy = np.argmax(grad)

            grad[w == 0] = np.inf
            sell = np.argmin(grad)

            sell_q = min(0.01, w[sell])

            w[buy] = w[buy] + sell_q
            w[sell] = w[sell] - sell_q

            port.append((r * w).sum())
            simple.append(r.mean())
            w = w * (1 + r)
            w = w / w.sum()

        port = np.array(port)
        simple = np.array(simple)

        print(f"Количество акций в портфеле: {1 / (w * w).sum():.1f}")
        print(f"Port: {port.mean() * 252:.2%} - {port.std() * 252 ** 0.5:.2%}")
        print(f"Simple: {simple.mean() * 252:.2%} - {simple.std() * 252 ** 0.5:.2%}")
        print(
            f"Diff: {(port.mean() - 0.5 * port.std() ** 2) * 252:.2%} - "
            f"{(simple.mean() - 0.5 * simple.std() ** 2) * 252:.2%} = "
            f"{((port.mean() - simple.mean()) - 0.5 * (port.std() ** 2 - simple.std() ** 2)) * 252:.2%}"
        )

        return llh_sum / weight_sum