예제 #1
0
    def predict(self, sampler: Sampler, **kwargs) -> np.ndarray:
        # set a fresh env
        self.rl_model.set_env(self.env)
        samples, _ = sampler.nr_of_source_events
        envs = self.rl_model.get_env().envs[:1]
        rh = []

        obs = [
            env.set_data_generator(
                sampler, lambda reward: rh[-1].append(reward)
                if reward is not None else rh.append([]), **self.kwargs)
            for env in envs
        ]

        prediction = []
        done = False

        for i in range(samples):
            if not done:
                action, state = self.rl_model.predict(obs)
                obs, reward, done, info = zip(
                    *[env.step(action) for env in envs])
                prediction.append(info[0]["interpreted_action"])

                for env, dne in zip(envs, done):
                    call_callable_dynamic_args(env.render, kwargs)

                done = any(done)
            else:
                prediction.append(np.nan)

        return np.array(prediction)
예제 #2
0
    def _fetch_time_series(self, symbol, **kwargs) -> pd.DataFrame:
        args = symbol.get_provider_args() if isinstance(symbol,
                                                        Symbol) else [symbol]

        if isinstance(args, (tuple, list)):
            return call_callable_dynamic_args(self.provider_map[type(symbol)],
                                              *args, **kwargs)
        else:
            return call_callable_dynamic_args(self.provider_map[type(symbol)],
                                              **args, **kwargs)
예제 #3
0
def fetch_timeseries(providers: Dict[Callable[[Any], pd.DataFrame], List[str]],
                     start_date: str = None,
                     force_lower_case: bool = False,
                     multi_index: bool = None,
                     ffill: bool = False,
                     **kwargs):
    symbol_type = (List, Tuple, Set)
    expected_frames = sum(
        len(s) if isinstance(s, symbol_type) else 1
        for s in providers.values())
    df = None

    if multi_index is None and expected_frames > 1:
        multi_index = True

    for provider, symbols in providers.items():
        # make sure provider is an actual provider -> a callable
        if not callable(provider):
            provider = PROVIDER_MAP[provider]

        # make sure the symbols are iterable -> wrap single symbols into a list
        if not isinstance(symbols, symbol_type):
            symbols = [symbols]

        # fetch all symbols of all providers (later we could do this in parallel)
        for symbol in symbols:
            _df = call_callable_dynamic_args(provider,
                                             symbol,
                                             multi_index=multi_index,
                                             **kwargs)

            if _df is None:
                continue

            if multi_index:
                if not isinstance(_df.columns, pd.MultiIndex):
                    _df = add_multi_index(_df, symbol, True)

                if force_lower_case:
                    _df.columns = pd.MultiIndex.from_tuples([
                        (h.lower(), c.lower())
                        for h, c in _df.columns.to_list()
                    ])
            else:
                if isinstance(_df.columns, pd.MultiIndex):
                    _df.columns = [t[-1] for t in _df.columns.to_list()]

                if force_lower_case:
                    _df.columns = [c.lower() for c in _df.columns.to_list()]

            if df is None:
                df = _df
            else:
                df = inner_join(df,
                                _df,
                                force_multi_index=multi_index,
                                ffill=ffill)

    return df if start_date is None else df[start_date:]
예제 #4
0
    def predict(self, features: pd.DataFrame, targets: pd.DataFrame = None, latent: pd.DataFrame = None, samples=1, **kwargs) -> Typing.PatchedDataFrame:
        pred = call_callable_dynamic_args(self.model, features, targets=targets, **self.kwargs)

        if isinstance(pred, pd.DataFrame):
            pred.columns = self._labels_columns
            return pred
        else:
            return to_pandas(pred, features.index, self._labels_columns)
예제 #5
0
    def predict(self,
                model: MlModel,
                tail: int = None,
                samples: int = 1,
                forecast_provider: Callable[[Typing.PatchedDataFrame],
                                            Forecast] = None,
                **kwargs) -> Union[Typing.PatchedDataFrame, Forecast]:
        min_required_samples = model.features_and_labels.min_required_samples
        df = self.df

        if tail is not None:
            if min_required_samples is not None:
                # just use the tail for feature engineering
                df = df[-(abs(tail) + (min_required_samples - 1)):]
            else:
                _log.warning(
                    "could not determine the minimum required data from the model"
                )

        kwargs = merge_kwargs(model.features_and_labels.kwargs, model.kwargs,
                              kwargs)
        typemap_pred = {
            SubModelFeature:
            lambda df, model, **kwargs: model.predict(df, **kwargs),
            **self._type_mapping
        }
        frames: FeaturesWithTargets = model.features_and_labels(
            df, extract_features, type_map=typemap_pred, **kwargs)

        predictions = call_callable_dynamic_args(model.predict,
                                                 features=frames.features,
                                                 targets=frames.targets,
                                                 latent=frames.latent,
                                                 samples=samples,
                                                 df=df,
                                                 **kwargs)

        fc_provider = forecast_provider or model.forecast_provider
        res_df = assemble_result_frame(predictions, frames.targets, None, None,
                                       None, frames.features)

        return res_df if fc_provider is None else call_callable_dynamic_args(
            fc_provider, res_df, **kwargs)
예제 #6
0
    def score_classification(df):
        y_true = df[LABEL_COLUMN_NAME]._.values
        y_pred = df[PREDICTION_COLUMN_NAME]._.values
        sample_weights = df[
            SAMPLE_WEIGHTS_COLUMN_NAME] if SAMPLE_WEIGHTS_COLUMN_NAME in df else None
        scores = defaultdict(lambda: [])

        y_true_class = np.argmax(
            y_true,
            axis=1) if y_true.ndim > 1 and y_true.shape[1] > 1 else y_true
        y_pred_class = np.argmax(
            y_pred, axis=1
        ) if y_pred.ndim > 1 and y_pred.shape[1] > 1 else y_pred > 0.5

        for scorer in class_scores.keys():
            try:
                score = call_callable_dynamic_args(
                    metrics.__dict__[scorer],
                    y_true_class,
                    y_pred_class,
                    sample_weight=sample_weights)
                if scorer == 'log_loss' and y_pred.ndim > 1:
                    score /= np.log(y_pred.shape[1])

                scores[scorer].append(score)
            except Exception as e:
                _log.warning(f"{scorer} failed: {str(e)[:160]}")
                scores[scorer].append(np.nan)

        for scorer in losses.keys():
            try:
                score = call_callable_dynamic_args(
                    metrics.__dict__[scorer],
                    y_true_class,
                    y_pred,
                    sample_weight=sample_weights)
                scores[scorer].append(score)
            except Exception as e:
                _log.warning(f"{scorer} failed: {str(e)[:160]}")
                scores[scorer].append(np.nan)

        return pd.DataFrame(scores)
예제 #7
0
    def fit(self, sampler: Sampler, **kwargs) -> float:
        # provide the data generator for every environment
        envs = self.rl_model.get_env().envs
        self.reward_history = []

        for env in envs:
            rh = []
            self.reward_history.append(rh)
            env.set_data_generator(
                sampler, lambda reward: rh[-1].append(reward)
                if reward is not None else rh.append([]), **self.kwargs)

        call_callable_dynamic_args(self.rl_model.learn, **self.kwargs,
                                   **kwargs)

        # collect statistics of all environments
        latest_rewards = np.array([
            rh[-1] for erh in self.reward_history for rh in erh if len(rh) > 1
        ])
        return latest_rewards.mean()
예제 #8
0
    def _record_loss(self, epoch, fold, fold_epoch, train_data: XYWeight, test_data: List[XYWeight], verbose, callbacks, loss_history_key=None):
        train_loss = self.calculate_loss(fold, train_data.x, train_data.y, train_data.weight)
        self._history["train", loss_history_key or fold][(epoch, fold_epoch)] = train_loss

        if len(test_data) > 0:
            test_loss = np.array([self.calculate_loss(fold, x, y, w) for x, y, w in test_data if len(x) > 0]).mean()
        else:
            test_loss = np.NaN
        self._history["test", loss_history_key or fold][(epoch, fold_epoch)] = test_loss

        self.after_fold_epoch(epoch, fold, fold_epoch, train_loss, test_loss)
        if verbose > 0:
            print(f"epoch: {epoch}, train loss: {train_loss}, test loss: {test_loss}")

        call_callable_dynamic_args(
            callbacks,
            epoch=epoch, fold=fold, fold_epoch=fold_epoch, loss=train_loss, val_loss=test_loss,
            y_train=train_data.y, y_test=[td.y for td in test_data],
            y_hat_train=LazyInit(lambda: self.predict(train_data.x)),
            y_hat_test=[LazyInit(lambda: self.predict(td.x)) for td in test_data]
        )
예제 #9
0
 def __init__(self,
              reinforcement_model_provider: Callable[[Any], RLModel],
              features_and_labels: FeaturesAndLabels,
              summary_provider: Callable[[Typing.PatchedDataFrame],
                                         Summary] = Summary,
              **kwargs):
     super().__init__(features_and_labels, summary_provider, **kwargs)
     self.reinforcement_model_provider = reinforcement_model_provider
     self.rl_model = call_callable_dynamic_args(
         reinforcement_model_provider, **self.kwargs, **kwargs)
     self.reward_history = None
     self.env = self.rl_model.get_env()
예제 #10
0
    def split(
            self, x, *args,
            **kwargs) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]:
        if isinstance(x, pd.MultiIndex):
            grp_offset = 0
            for group in unique_level_rows(x):
                grp_args = [
                    v.loc[group] if isinstance(v, PandasObject) else v
                    for v in args
                ]
                grp_kwargs = {
                    k: v.loc[group] if isinstance(v, PandasObject) else v
                    for k, v in kwargs.items()
                }
                for train, test in call_callable_dynamic_args(
                        self.delegated, x[x.get_loc(group)], *grp_args,
                        **grp_kwargs):
                    yield train + grp_offset, test + grp_offset

                grp_offset += len(x[x.get_loc(group)])
        else:
            return call_callable_dynamic_args(self.delegated, x, **kwargs)
예제 #11
0
    def score_regression(df):
        y_true = df[LABEL_COLUMN_NAME]._.values
        y_pred = df[PREDICTION_COLUMN_NAME]._.values
        sample_weights = df[
            SAMPLE_WEIGHTS_COLUMN_NAME] if SAMPLE_WEIGHTS_COLUMN_NAME in df else None
        scores = defaultdict(lambda: [])

        for scorer in ALL:
            try:
                score = call_callable_dynamic_args(
                    rm.__dict__[scorer],
                    y_true,
                    y_pred,
                    sample_weight=sample_weights)
                scores[scorer].append(score)
            except Exception as e:
                _log.warning(f"{scorer} failed: {str(e)[:160]}")
                scores[scorer].append(np.nan)

        return pd.DataFrame(scores)
예제 #12
0
    def __init__(self,
                 encode_layers: List[int],
                 decode_layers: List[int],
                 features_and_labels: FeaturesAndLabels,
                 summary_provider: Callable[[Typing.PatchedDataFrame],
                                            Summary] = Summary,
                 **kwargs):
        super().__init__(skit_model=call_callable_dynamic_args(
            MLPRegressor, **{
                "hidden_layer_sizes": [*encode_layers, *decode_layers],
                **kwargs
            }),
                         features_and_labels=features_and_labels,
                         summary_provider=summary_provider,
                         **kwargs)

        # Implementation analog blog: https://i-systems.github.io/teaching/ML/iNotes/15_Autoencoder.html
        self.encoder_layers = encode_layers
        self.decoder_layers = decode_layers
        self.layers = [*encode_layers, *decode_layers]
예제 #13
0
    def backtest(self,
                 model: MlModel,
                 summary_provider: Callable[[Typing.PatchedDataFrame],
                                            Summary] = None,
                 tail: int = None,
                 **kwargs) -> Summary:

        min_required_samples = model.features_and_labels.min_required_samples
        df = self.df

        if tail is not None:
            if min_required_samples is not None:
                # just use the tail for feature engineering
                df = df[-(abs(tail) + (min_required_samples - 1)):]
            else:
                _log.warning(
                    "could not determine the minimum required data from the model"
                )

        kwargs = merge_kwargs(model.features_and_labels.kwargs, model.kwargs,
                              kwargs)
        typemap_pred = {
            SubModelFeature:
            lambda df, model, **kwargs: model.predict(df, **kwargs),
            **self._type_mapping
        }
        frames: FeaturesWithLabels = model.features_and_labels(
            df,
            extract_feature_labels_weights,
            type_map=typemap_pred,
            **kwargs)

        predictions = model.predict(frames.features, **kwargs)
        df_backtest = assemble_result_frame(predictions, frames.targets,
                                            frames.labels, frames.gross_loss,
                                            frames.sample_weights,
                                            frames.features)

        return call_callable_dynamic_args(
            summary_provider or model.summary_provider, df_backtest, model,
            **kwargs)
예제 #14
0
    def _decode(self, latent_features: pd.DataFrame, samples,
                **kwargs) -> Typing.PatchedDataFrame:
        skm = self.sk_model
        if not hasattr(skm, 'coefs_'):
            raise ValueError("Model needs to be 'fit' first!")

        decoder = call_callable_dynamic_args(
            MLPRegressor, **{
                "hidden_layer_sizes": self.decoder_layers,
                **self.kwargs
            })
        decoder.coefs_ = skm.coefs_[len(self.encoder_layers):].copy()
        decoder.intercepts_ = skm.intercepts_[len(self.encoder_layers):].copy()
        decoder.n_layers_ = len(decoder.coefs_) + 1
        decoder.n_outputs_ = self.layers[-1]
        decoder.out_activation_ = skm.out_activation_

        decoded = decoder.predict(
            _AbstractSkModel.reshape_rnn_as_ar(
                unpack_nested_arrays(latent_features,
                                     split_multi_index_rows=False)))
        return to_pandas(decoded, latent_features.index, self._feature_columns)
예제 #15
0
    def fit_fold(self, fold_nr: int, x: np.ndarray, y: np.ndarray,
                 x_val: np.ndarray, y_val: np.ndarray,
                 sample_weight: np.ndarray, sample_weight_val: np.ndarray,
                 **kwargs) -> Tuple[np.ndarray, np.ndarray]:
        # import specifics
        from torch.autograd import Variable
        import torch as t

        # TODO we should not re-initialize model, criterion and optimizer once we have it already
        #  TODO we might re-initialize the optimizer with a new fold with a changes learning rate?

        is_verbose = kwargs["verbose"] if "verbose" in kwargs else False
        on_epoch_callbacks = kwargs["on_epoch"] if "on_epoch" in kwargs else []
        restore_best_weights = kwargs[
            "restore_best_weights"] if "restore_best_weights" in kwargs else False
        num_epochs = kwargs["epochs"] if "epochs" in kwargs else 100
        batch_size = kwargs["batch_size"] if "batch_size" in kwargs else 128
        use_cuda = kwargs["cuda"] if "cuda" in kwargs else False

        module = self.module
        criterion_provider = self.criterion_provider

        if use_cuda:
            criterion_provider = lambda: self.criterion_provider().cuda()
            module = module.cuda()

        module = module.train()
        criterion = criterion_provider()
        optimizer = self.optimizer_provider(module.parameters())
        best_model_wts = deepcopy(module.state_dict())
        best_loss = sys.float_info.max
        epoch_losses = []
        epoch_val_losses = []

        if hasattr(module, 'callback'):
            on_epoch_callbacks += [module.callback]

        if hasattr(criterion, 'callback'):
            on_epoch_callbacks += [criterion.callback]

        if is_verbose:
            print(
                f"fit fold {fold_nr} with {len(x)} samples in {math.ceil(len(x) / batch_size)} batches ... "
            )

        for epoch in range(num_epochs):
            for i in range(0, len(x), batch_size):
                nnx = Variable(t.from_numpy(x[i:i + batch_size])).float()
                nny = Variable(t.from_numpy(y[i:i + batch_size])).float()
                weights = Variable(t.from_numpy(sample_weight[i:i + batch_size])).float() \
                    if sample_weight is not None else t.ones(nny.shape[0])

                if use_cuda:
                    nnx, nny, weights = nnx.cuda(), nny.cuda(), weights.cuda()

                # ===================forward=====================
                output = module(nnx)
                loss = self._calc_weighted_loss(criterion, output, nny,
                                                weights)

                # ===================backward====================
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if is_verbose > 1:
                    print(f"{epoch}:{i}\t{loss}\t")

            # ===================log========================
            # add validation loss history
            if y_val is not None and len(y_val) > 0:
                with t.no_grad():
                    nnx = t.from_numpy(x).float()
                    nny = t.from_numpy(y).float()
                    weights = t.from_numpy(sample_weight).float() \
                        if sample_weight is not None else t.ones(nny.shape[0])
                    nnx_val = t.from_numpy(x_val).float()
                    nny_val = t.from_numpy(y_val).float()
                    weights_val = t.from_numpy(sample_weight_val).float() \
                        if sample_weight_val is not None else t.ones(nny_val.shape[0])

                    if use_cuda:
                        nnx, nny = nnx.cuda(), nny.cuda()
                        nnx_val, nny_val = nnx_val.cuda(), nny_val.cuda()
                        weights, weights_val = weights.cuda(
                        ), weights_val.cuda()

                    y_hat = module(nnx)
                    loss = self._calc_weighted_loss(criterion_provider(),
                                                    y_hat, nny,
                                                    weights).item()
                    epoch_losses.append(loss)

                    y_hat_val = module(nnx_val)
                    val_loss = self._calc_weighted_loss(
                        criterion_provider(), y_hat_val, nny_val,
                        weights_val).item()
                    epoch_val_losses.append(val_loss)

                    if val_loss < best_loss:
                        best_loss = val_loss
                        best_model_wts = deepcopy(module.state_dict())

            if is_verbose:
                print(f"{epoch}\t{loss}\t{val_loss}")

            # invoke on epoch end callbacks
            try:
                for callback in on_epoch_callbacks:
                    call_callable_dynamic_args(callback,
                                               fold=fold_nr,
                                               epoch=epoch,
                                               x=x,
                                               y=y,
                                               x_val=x_val,
                                               y_val=y_val,
                                               y_hat=y_hat,
                                               y_hat_val=y_hat_val,
                                               loss=loss,
                                               val_loss=val_loss,
                                               best_loss=best_loss)
            except StopIteration:
                break

        if restore_best_weights:
            module.load_state_dict(best_model_wts)

        return np.array(epoch_losses), np.array(epoch_val_losses)
예제 #16
0
    def __init__(self,
                 frames: XYWeight,
                 splitter: Callable[[Any], Tuple[pd.Index, pd.Index]] = None,
                 filter: Callable[[Any], bool] = None,
                 cross_validation: Union['BaseCrossValidator',
                                         Callable[[Any],
                                                  Generator[Tuple[np.ndarray,
                                                                  np.ndarray],
                                                            None,
                                                            None]]] = None,
                 epochs: int = 1,
                 batch_size: int = None,
                 fold_epochs: int = 1,
                 on_start: Callable = None,
                 on_epoch: Callable = None,
                 on_batch: Callable = None,
                 on_fold: Callable = None,
                 on_fold_epoch: Callable = None,
                 after_epoch: Callable = None,
                 after_batch: Callable = None,
                 after_fold: Callable = None,
                 after_fold_epoch: Callable = None,
                 after_end: Callable = None,
                 **kwargs):
        self.common_index = intersection_of_index(*frames).sort_values()
        self.frames = XYWeight(
            *[loc_if_not_none(f, self.common_index) for f in frames])
        self.epochs = epochs
        self.batch_size = batch_size
        self.fold_epochs = fold_epochs
        self.splitter = splitter
        self.filter = filter

        # callbacks
        self.on_start = on_start
        self.on_epoch = on_epoch
        self.on_batch = on_batch
        self.on_fold = on_fold
        self.on_fold_epoch = on_fold_epoch
        self.after_epoch = after_epoch
        self.after_batch = after_batch
        self.after_fold = after_fold
        self.after_fold_epoch = after_fold_epoch
        self.after_end = after_end

        # split training and test data
        if self.splitter is not None:
            if isinstance(self.common_index, pd.MultiIndex):
                _log.warning(
                    "The Data provided uses a `MultiIndex`, eventually you want to set the "
                    "`partition_row_multi_index` parameter in your splitter")

            self.train_idx, self.test_idx = call_callable_dynamic_args(
                self.splitter, self.common_index, **self.frames.to_dict())
        else:
            self.train_idx, self.test_idx = self.common_index, pd.Index([])

        if cross_validation is not None:
            if isinstance(self.common_index, pd.MultiIndex) and not isinstance(
                    cross_validation, PartitionedOnRowMultiIndexCV):
                # cross validators need to fold within each group of a multi index row index, a wrapper can be provided
                _log.warning(
                    "The Data provided uses a `MultiIndex` but the cross validation is not wrapped in "
                    "`PartitionedOnRowMultiIndexCV`")

            if epochs is None or epochs > 1:
                _log.warning(
                    f"using epochs > 1 together with cross folding may lead to different folds for each epoch!"
                    f"{cross_validation}")

            self.nr_folds = cross_validation.get_n_splits() if hasattr(
                cross_validation, "get_n_splits") else -1
            self.cross_validation = cross_validation.split if hasattr(
                cross_validation, "split") else cross_validation
        else:
            self.nr_folds = None
            self.cross_validation = None
예제 #17
0
    def sample_for_training(self) -> Generator[FoldXYWeight, None, None]:
        cross_validation = self.cross_validation if self.cross_validation is not None else lambda x: [
            (None, None)
        ]

        # filter samples
        if self.filter is not None:
            train_idx = [
                idx for idx in self.train_idx if call_callable_dynamic_args(
                    self.filter, idx, **self.frames.to_dict(idx))
            ]
        else:
            train_idx = self.train_idx

        # update frame views
        train_frames = XYWeight(
            *[loc_if_not_none(f, train_idx) for f in self.frames])
        test_frames = XYWeight(
            *[loc_if_not_none(f, self.test_idx) for f in self.frames])

        # call for start ...
        call_callable_dynamic_args(
            self.on_start,
            epochs=self.epochs,
            batch_size=self.batch_size,
            fold_epochs=self.fold_epochs,
            features=exec_if_not_none(lambda x: x.columns.tolist(),
                                      self.frames.x),
            labels=exec_if_not_none(lambda y: y.columns.tolist(),
                                    self.frames.y),
            cross_validation=self.nr_folds is not None)

        # generate samples
        for epoch in (range(self.epochs) if self.epochs is not None else iter(
                int, 1)):
            call_callable_dynamic_args(self.on_epoch, epoch=epoch)
            fold_iter = enumerate(
                call_callable_dynamic_args(cross_validation, train_idx,
                                           **train_frames.to_dict()))
            for fold, (cv_train_i, cv_test_i) in fold_iter:
                call_callable_dynamic_args(self.on_fold,
                                           epoch=epoch,
                                           fold=fold)

                # if we dont have any cross validation the training and test sets stay unchanged
                cv_train_idx = train_idx if cv_train_i is None else train_idx[
                    cv_train_i]

                # build our test data sets
                if cv_test_i is not None:
                    if cv_test_i.ndim > 1:
                        cv_test_frames = [
                            XYWeight(*[
                                loc_if_not_none(f, train_idx[cv_test_i[:, i]])
                                for f in self.frames
                            ]) for i in range(cv_test_i.shape[1])
                        ]
                    else:
                        cv_test_frames = [
                            XYWeight(*[
                                loc_if_not_none(f, train_idx[cv_test_i])
                                for f in self.frames
                            ])
                        ]
                else:
                    if len(self.test_idx) <= 0:
                        cv_test_frames = []
                    else:
                        cv_test_frames = [
                            XYWeight(*[
                                loc_if_not_none(f, self.test_idx)
                                for f in self.frames
                            ])
                        ]

                for fold_epoch in range(self.fold_epochs):
                    call_callable_dynamic_args(self.on_fold,
                                               epoch=epoch,
                                               fold=fold,
                                               fold_epoch=fold_epoch)

                    # build our training data sets aka batches
                    cv_train_frames = XYWeight(*[
                        loc_if_not_none(f, cv_train_idx) for f in self.frames
                    ])

                    # theoretically we could already yield cv_train_frames, cv_test_frames
                    # but lets create batches first and then yield all together
                    nr_instances = len(cv_train_idx)
                    nice_i = max(nr_instances - 2, 0)
                    bs = min(nr_instances, self.batch_size
                             ) if self.batch_size is not None else nr_instances

                    batch_iter = range(0, nr_instances, bs)
                    for i in batch_iter:
                        call_callable_dynamic_args(self.on_batch,
                                                   epoch=epoch,
                                                   fold=fold,
                                                   fold_epoch=fold_epoch,
                                                   batch=i)
                        yield FoldXYWeight(
                            epoch, fold, fold_epoch,
                            *(f.iloc[i if i < nice_i else i - 1:i +
                                     bs] if f is not None else None
                              for f in cv_train_frames))
                        call_callable_dynamic_args(self.after_batch,
                                                   epoch=epoch,
                                                   fold=fold,
                                                   fold_epoch=fold_epoch,
                                                   batch=i)

                    # end of fold epoch
                    try:
                        call_callable_dynamic_args(self.after_fold_epoch,
                                                   epoch=epoch,
                                                   fold=fold,
                                                   fold_epoch=fold_epoch,
                                                   train_data=cv_train_frames,
                                                   test_data=cv_test_frames)
                    except StopIteration as sie:
                        call_callable_dynamic_args(self.after_fold,
                                                   epoch=epoch,
                                                   fold=fold,
                                                   train_data=cv_train_frames,
                                                   test_data=cv_test_frames)

                        if str(sie).isnumeric() and int(str(sie)) == fold:
                            # we just want to stop this fold
                            break
                        else:
                            # we need to stop any further generation of sample and call all left callbacks
                            call_callable_dynamic_args(self.after_epoch,
                                                       epoch=epoch,
                                                       train_data=train_frames,
                                                       test_data=test_frames)
                            call_callable_dynamic_args(self.after_end)
                            return
                # end of fold
                call_callable_dynamic_args(self.after_fold,
                                           epoch=epoch,
                                           fold=fold,
                                           train_data=cv_train_frames,
                                           test_data=cv_test_frames)
            # end of epoch
            call_callable_dynamic_args(self.after_epoch,
                                       epoch=epoch,
                                       train_data=train_frames,
                                       test_data=test_frames)
        # end of generator
        call_callable_dynamic_args(self.after_end)
 def extract(self, func: callable, *args, **kwargs):
     return tuple([call_callable_dynamic_args(func, f, *args, **kwargs) for f in self.frames])
예제 #19
0
    def fit_fold(self, x: np.ndarray, y: np.ndarray, x_val: np.ndarray,
                 y_val: np.ndarray, sample_weight_train: np.ndarray,
                 sample_weight_test: np.ndarray, **kwargs) -> float:
        # import specifics
        from torch.autograd import Variable
        import torch as t

        on_epoch_callbacks = kwargs["on_epoch"] if "on_epoch" in kwargs else []
        restore_best_weights = kwargs[
            "restore_best_weights"] if "restore_best_weights" in kwargs else False
        num_epochs = kwargs["epochs"] if "epochs" in kwargs else 100
        batch_size = kwargs["batch_size"] if "batch_size" in kwargs else 128
        use_cuda = kwargs["cuda"] if "cuda" in kwargs else False

        module = (self.module.cuda() if use_cuda else self.module).train()
        criterion = self.criterion_provider()
        optimizer = self.optimizer_provider(module.parameters())
        best_model_wts = deepcopy(module.state_dict())
        best_loss = sys.float_info.max
        epoch_losses = []
        epoch_val_losses = []

        for epoch in range(num_epochs):
            batch_loss = 0

            for i in range(0, len(x), batch_size):
                nnx = Variable(t.from_numpy(x[i:i + batch_size])).float()
                nny = Variable(t.from_numpy(y[i:i + batch_size])).float()
                weights = Variable(t.from_numpy(sample_weight_train[i:i+batch_size])).float() \
                    if sample_weight_train is not None else t.ones(len(x))

                if use_cuda:
                    nnx, nny, weights = nnx.cuda(), nny.cuda(), weights.cuda()

                # ===================forward=====================
                output = module(nnx)
                loss = (criterion(output, nny).sum() * weights).mean()

                # ===================backward====================
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                batch_loss += loss.item()

            # ===================log========================
            # add loss history
            epoch_losses.append(batch_loss)

            # add validation loss history
            if y_val is not None and len(y_val) > 0:
                with t.no_grad():
                    nnx = Variable(t.from_numpy(x)).float()
                    nny = Variable(t.from_numpy(y)).float()
                    nnx_val = Variable(t.from_numpy(x_val)).float()
                    nny_val = Variable(t.from_numpy(y_val)).float()

                    if use_cuda:
                        nnx, nny = nnx.cuda(), nny.cuda()
                        nnx_val, nny_val = nnx_val.cuda(), nny_val.cuda()

                    y_hat = module(nnx)
                    y_hat_val = module(nnx_val)
                    val_loss = self.criterion_provider()(y_hat_val,
                                                         nny_val).sum().item()
                    epoch_val_losses.append(val_loss)

                    if val_loss < best_loss:
                        best_loss = val_loss
                        best_model_wts = deepcopy(module.state_dict())

            # invoke on epoch end callbacks
            try:
                for callback in on_epoch_callbacks:
                    call_callable_dynamic_args(callback,
                                               epoch=epoch,
                                               x=x,
                                               y=y,
                                               x_val=x_val,
                                               y_val=y_val,
                                               y_hat=y_hat,
                                               y_hat_val=y_hat_val,
                                               loss=loss,
                                               val_loss=val_loss)
            except StopIteration:
                break

        if restore_best_weights:
            module.load_state_dict(best_model_wts)

        self.history["loss"] = np.array(epoch_losses)
        self.history["val_loss"] = np.array(epoch_val_losses)

        return self.history["loss"][-1] if len(epoch_losses) > 0 else 0