コード例 #1
0
    def test_intersection_of_tuples(self):
        df1 = pd.DataFrame({}, index=[1, 2, 3, 4])
        df2 = pd.DataFrame({}, index=[2, 3, 4])
        df3 = pd.DataFrame({}, index=[1, 3, 4])

        index1 = intersection_of_index(df1,
                                       MultiFrameDecorator([df2, df3], True))
        index2 = intersection_of_index(MultiFrameDecorator([df1, df2], True),
                                       df3)

        self.assertListEqual([3, 4], index1.tolist())
        self.assertListEqual([3, 4], index2.tolist())
コード例 #2
0
def extract_feature_labels_weights(
    df: pd.DataFrame, features_and_labels, **kwargs
) -> Tuple[Tuple[pd.DataFrame, int], pd.DataFrame, pd.DataFrame, pd.Series,
           pd.Series]:
    features = get_pandas_object(df, features_and_labels.features,
                                 **kwargs).dropna()
    labels = get_pandas_object(df, features_and_labels.labels,
                               **kwargs).dropna()
    targets = call_if_not_none(
        get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna')
    sample_weights = call_if_not_none(
        get_pandas_object(df, features_and_labels.sample_weights, **kwargs),
        'dropna')
    gross_loss = call_if_not_none(
        get_pandas_object(df, features_and_labels.gross_loss, **kwargs),
        'dropna')
    common_index = intersection_of_index(features, labels, targets,
                                         sample_weights, gross_loss)

    if features_and_labels.label_type is not None:
        labels = labels.astype(features_and_labels.label_type)

    return ((features.loc[common_index], len(df) - len(features) + 1),
            labels.loc[common_index], loc_if_not_none(targets, common_index),
            loc_if_not_none(sample_weights, common_index),
            loc_if_not_none(gross_loss, common_index))
コード例 #3
0
    def test_intersection_of_index(self):
        df1 = pd.DataFrame({}, index=[1, 2, 3, 4])
        df2 = pd.DataFrame({}, index=[2, 3, 4])
        df3 = pd.DataFrame({}, index=[1, 3, 4])

        index = intersection_of_index(df1, df2, df3)

        self.assertListEqual([3, 4], index.tolist())
コード例 #4
0
def extract_features(df: pd.DataFrame, features_and_labels,
                     **kwargs) -> Tuple[List, pd.DataFrame, pd.DataFrame]:
    features = get_pandas_object(df, features_and_labels.features,
                                 **kwargs).dropna()
    targets = call_if_not_none(
        get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna')
    common_index = intersection_of_index(features, targets)

    if len(features) <= 0:
        raise ValueError("not enough data!")

    return (features_and_labels.label_columns, features.loc[common_index],
            loc_if_not_none(targets, common_index))
コード例 #5
0
ファイル: normalizer.py プロジェクト: KIC/pandas-ml-quant
def ta_delta_hedged_price(df: Typing.PatchedDataFrame, benchmark):
    df_bench = get_pandas_object(df, benchmark)
    idx = intersection_of_index(df, df_bench)

    df = df.loc[idx]
    df_bench = df_bench.loc[idx]

    if hasattr(df, "columns") and not isinstance(
            benchmark, Typing.AnyPandasObject) and benchmark in df.columns:
        df = df.drop(benchmark, axis=1)

    bench_returns = ta_log_returns(df_bench)
    if df.ndim > 1:
        bench_returns = np.repeat(bench_returns.values.reshape(-1, 1),
                                  df.shape[1],
                                  axis=1)

    delta_hedged = ta_log_returns(df) - bench_returns
    return np.exp(delta_hedged.cumsum())
コード例 #6
0
def extract_feature_labels_weights(df: Typing.PatchedDataFrame,
                                   features_and_labels,
                                   **kwargs) -> FeaturesWithLabels:
    features, targets, latent = extract_features(df, features_and_labels,
                                                 **kwargs)
    labels = extract_labels(df, features_and_labels, **kwargs)
    sample_weights = call_if_not_none(
        get_pandas_object(df, features_and_labels.sample_weights, **kwargs),
        'dropna')
    gross_loss = call_if_not_none(
        get_pandas_object(df, features_and_labels.gross_loss, **kwargs),
        'dropna')

    # do some sanity check for any non numeric values in any of the data frames
    for frame in [features, labels, targets, sample_weights, gross_loss]:
        if frame is not None:
            # we could have nested arrays so we need to use the un-nested values
            values = flatten_nested_list(frame._.values, np.max)
            max_value = max([v.max() for v in values])

            if np.isscalar(max_value) and np.isinf(max_value):
                _log.warning(
                    f"features containing infinit number\n"
                    f"{frame[frame.apply(lambda r: np.isinf(r.values).any(), axis=1)]}"
                )
                frame.replace([np.inf, -np.inf], np.nan, inplace=True)
                frame.dropna(inplace=True)

    # now get the common index and return the filtered data frames
    common_index = intersection_of_index(features, labels, targets,
                                         sample_weights, gross_loss)

    return FeaturesWithLabels(
        FeaturesWithRequiredSamples(
            tuple([f.loc[common_index] for f in features]) if isinstance(
                features, tuple) else features.loc[common_index],
            len(df) - len(features) + 1, len(features.columns)),
        labels.loc[common_index], loc_if_not_none(latent, common_index),
        loc_if_not_none(targets, common_index),
        loc_if_not_none(sample_weights, common_index),
        loc_if_not_none(gross_loss, common_index))
コード例 #7
0
def extract_features(df: pd.DataFrame, features_and_labels,
                     **kwargs) -> Tuple[List, pd.DataFrame, pd.DataFrame]:
    if isinstance(features_and_labels.features, tuple):
        # allow multiple feature sets i.e. for multi input layered networks
        features = MultiFrameDecorator([
            get_pandas_object(df, f, **kwargs).dropna()
            for f in features_and_labels.features
        ], True)
    else:
        features = get_pandas_object(df, features_and_labels.features,
                                     **kwargs).dropna()

    targets = call_if_not_none(
        get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna')
    common_index = intersection_of_index(features, targets)

    if len(features) <= 0:
        raise ValueError("not enough data!")

    return (features_and_labels.label_columns, features.loc[common_index],
            loc_if_not_none(targets, common_index))
コード例 #8
0
def extract_feature_labels_weights(
    df: Typing.PatchedDataFrame, features_and_labels, **kwargs
) -> Tuple[Tuple[pd.DataFrame, int], pd.DataFrame, pd.DataFrame, pd.Series,
           pd.Series]:
    _, features, targets = extract_features(df, features_and_labels, **kwargs)
    labels = get_pandas_object(df, features_and_labels.labels,
                               **kwargs).dropna()
    sample_weights = call_if_not_none(
        get_pandas_object(df, features_and_labels.sample_weights, **kwargs),
        'dropna')
    gross_loss = call_if_not_none(
        get_pandas_object(df, features_and_labels.gross_loss, **kwargs),
        'dropna')

    if features_and_labels.label_type is not None:
        labels = labels.astype(features_and_labels.label_type)

    # do some sanity check for any non numeric values in any of the data frames
    for frame in [features, labels, targets, sample_weights, gross_loss]:
        if frame is not None:
            max = frame._.max()

            if np.isscalar(max) and np.isinf(max):
                _log.warning(
                    "features containing infinit number\n",
                    frame[frame.apply(lambda r: np.isinf(r.values).any(),
                                      axis=1)])
                frame.replace([np.inf, -np.inf], np.nan, inplace=True)
                frame.dropna(inplace=True)

    # now get the common index and return the filtered data frames
    common_index = intersection_of_index(features, labels, targets,
                                         sample_weights, gross_loss)

    return ((tuple([f.loc[common_index] for f in features])
             if isinstance(features, tuple) else features.loc[common_index],
             len(df) - len(features) + 1), labels.loc[common_index],
            loc_if_not_none(targets, common_index),
            loc_if_not_none(sample_weights, common_index),
            loc_if_not_none(gross_loss, common_index))
コード例 #9
0
def extract_feature_labels_weights(
    df: Typing.PatchedDataFrame, features_and_labels, **kwargs
) -> Tuple[Tuple[pd.DataFrame, int], pd.DataFrame, pd.DataFrame, pd.Series,
           pd.Series]:
    features = get_pandas_object(df, features_and_labels.features,
                                 **kwargs).dropna()
    labels = get_pandas_object(df, features_and_labels.labels,
                               **kwargs).dropna()
    targets = call_if_not_none(
        get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna')
    sample_weights = call_if_not_none(
        get_pandas_object(df, features_and_labels.sample_weights, **kwargs),
        'dropna')
    gross_loss = call_if_not_none(
        get_pandas_object(df, features_and_labels.gross_loss, **kwargs),
        'dropna')

    if features_and_labels.label_type is not None:
        labels = labels.astype(features_and_labels.label_type)

    for frame in [features, labels, targets, sample_weights, gross_loss]:
        if frame is not None:
            max = frame._.values.max()

            if np.isscalar(max) and np.isinf(max):
                _log.warning(
                    "features containing infinit number\n",
                    frame[frame.apply(lambda r: np.isinf(r.values).any(),
                                      axis=1)])
                frame.replace([np.inf, -np.inf], np.nan, inplace=True)
                frame.dropna(inplace=True)

    common_index = intersection_of_index(features, labels, targets,
                                         sample_weights, gross_loss)

    return ((features.loc[common_index], len(df) - len(features) + 1),
            labels.loc[common_index], loc_if_not_none(targets, common_index),
            loc_if_not_none(sample_weights, common_index),
            loc_if_not_none(gross_loss, common_index))
コード例 #10
0
    def __init__(self,
                 frames: XYWeight,
                 splitter: Callable[[Any], Tuple[pd.Index, pd.Index]] = None,
                 filter: Callable[[Any], bool] = None,
                 cross_validation: Union['BaseCrossValidator',
                                         Callable[[Any],
                                                  Generator[Tuple[np.ndarray,
                                                                  np.ndarray],
                                                            None,
                                                            None]]] = None,
                 epochs: int = 1,
                 batch_size: int = None,
                 fold_epochs: int = 1,
                 on_start: Callable = None,
                 on_epoch: Callable = None,
                 on_batch: Callable = None,
                 on_fold: Callable = None,
                 on_fold_epoch: Callable = None,
                 after_epoch: Callable = None,
                 after_batch: Callable = None,
                 after_fold: Callable = None,
                 after_fold_epoch: Callable = None,
                 after_end: Callable = None,
                 **kwargs):
        self.common_index = intersection_of_index(*frames).sort_values()
        self.frames = XYWeight(
            *[loc_if_not_none(f, self.common_index) for f in frames])
        self.epochs = epochs
        self.batch_size = batch_size
        self.fold_epochs = fold_epochs
        self.splitter = splitter
        self.filter = filter

        # callbacks
        self.on_start = on_start
        self.on_epoch = on_epoch
        self.on_batch = on_batch
        self.on_fold = on_fold
        self.on_fold_epoch = on_fold_epoch
        self.after_epoch = after_epoch
        self.after_batch = after_batch
        self.after_fold = after_fold
        self.after_fold_epoch = after_fold_epoch
        self.after_end = after_end

        # split training and test data
        if self.splitter is not None:
            if isinstance(self.common_index, pd.MultiIndex):
                _log.warning(
                    "The Data provided uses a `MultiIndex`, eventually you want to set the "
                    "`partition_row_multi_index` parameter in your splitter")

            self.train_idx, self.test_idx = call_callable_dynamic_args(
                self.splitter, self.common_index, **self.frames.to_dict())
        else:
            self.train_idx, self.test_idx = self.common_index, pd.Index([])

        if cross_validation is not None:
            if isinstance(self.common_index, pd.MultiIndex) and not isinstance(
                    cross_validation, PartitionedOnRowMultiIndexCV):
                # cross validators need to fold within each group of a multi index row index, a wrapper can be provided
                _log.warning(
                    "The Data provided uses a `MultiIndex` but the cross validation is not wrapped in "
                    "`PartitionedOnRowMultiIndexCV`")

            if epochs is None or epochs > 1:
                _log.warning(
                    f"using epochs > 1 together with cross folding may lead to different folds for each epoch!"
                    f"{cross_validation}")

            self.nr_folds = cross_validation.get_n_splits() if hasattr(
                cross_validation, "get_n_splits") else -1
            self.cross_validation = cross_validation.split if hasattr(
                cross_validation, "split") else cross_validation
        else:
            self.nr_folds = None
            self.cross_validation = None
コード例 #11
0
 def __init__(self, frames: Tuple[Typing.PatchedDataFrame], use_index_intersection=False):
     self._frames = frames
     self._index = intersection_of_index(*frames) if use_index_intersection else frames[0].index