Example #1
0
def random_regression_datasets(n_samples,
                               features=100,
                               outs=1,
                               informative=0.1,
                               partition_proportions=(0.5, 0.3),
                               rnd=None,
                               **mk_rgr_kwargs):
    rnd_state = dl.get_rand_state(rnd)
    X, Y, w = make_regression(n_samples,
                              features,
                              int(features * informative),
                              outs,
                              random_state=rnd_state,
                              coef=True,
                              **mk_rgr_kwargs)
    if outs == 1:
        Y = np.reshape(Y, (n_samples, 1))

    print("range of Y", np.min(Y), np.max(Y))
    info = merge_dicts({
        "informative": informative,
        "random_seed": rnd,
        "w": w
    }, mk_rgr_kwargs)
    name = dl.em_utils.name_from_dict(info, "w")
    dt = dl.Dataset(X, Y, name=name, info=info)
    datasets = dl.Datasets.from_list(redivide_data([dt],
                                                   partition_proportions))
    print(
        "conditioning of X^T X",
        np.linalg.cond(datasets.train.data.T @ datasets.train.data),
    )
    return datasets
Example #2
0
def random_classification_datasets(n_samples,
                                   features=100,
                                   classes=2,
                                   informative=0.1,
                                   partition_proportions=(0.5, 0.3),
                                   rnd=None,
                                   one_hot=True,
                                   **mk_cls_kwargs):
    rnd_state = dl.get_rand_state(rnd)
    X, Y = make_classification(n_samples,
                               features,
                               n_classes=classes,
                               random_state=rnd_state,
                               **mk_cls_kwargs)
    if one_hot:
        Y = to_one_hot_enc(Y)

    print("range of Y", np.min(Y), np.max(Y))
    info = merge_dicts({
        "informative": informative,
        "random_seed": rnd
    }, mk_cls_kwargs)
    name = dl.em_utils.name_from_dict(info, "w")
    dt = dl.Dataset(X, Y, name=name, info=info)
    datasets = dl.Datasets.from_list(redivide_data([dt],
                                                   partition_proportions))
    print(
        "conditioning of X^T X",
        np.linalg.cond(datasets.train.data.T @ datasets.train.data),
    )
    return datasets
Example #3
0
    def stack(*datasets):
        """
        Assuming that the datasets have same structure, stacks data, targets and other info

        :param datasets:
        :return: stacked dataset
        """
        return Dataset(
            data=vstack([d.data for d in datasets]),
            target=stack_or_concat([d.target for d in datasets]),
            sample_info=np.concatenate([d.sample_info for d in datasets]),
            info={
                k: [d.info.get(k, None) for d in datasets]
                for k in merge_dicts(*[d.info for d in datasets])
            },
        )
Example #4
0
        def _training_supplier(step=None):
            nonlocal other_feeds

            if step >= self.T:
                if step % self.T == 0:
                    if self.epochs:
                        print(
                            "WARNING: End of the training scheme reached."
                            "Generating another scheme.",
                            file=sys.stderr,
                        )
                    self.generate_visiting_scheme()
                step %= self.T

            if self.training_schedule is None:
                # print('visiting scheme not yet generated!')
                self.generate_visiting_scheme()

            # noinspection PyTypeChecker
            nb = self.training_schedule[
                step
                * self.batch_size : min(
                    (step + 1) * self.batch_size, len(self.training_schedule)
                )
            ]

            bx = self.dataset.data[nb, :]
            by = self.dataset.target[nb, :]

            # if lambda_feeds:  # this was previous implementation... dunno for what it was used for
            #     lambda_processed_feeds = {k: v(nb) for k, v in lambda_feeds.items()}  previous implementation...
            #  looks like lambda was
            # else:
            #     lambda_processed_feeds = {}
            return merge_dicts(
                {x: bx, y: by}, *[maybe_call(of, step) for of in other_feeds]
            )