def random_regression_datasets(n_samples, features=100, outs=1, informative=0.1, partition_proportions=(0.5, 0.3), rnd=None, **mk_rgr_kwargs): rnd_state = dl.get_rand_state(rnd) X, Y, w = make_regression(n_samples, features, int(features * informative), outs, random_state=rnd_state, coef=True, **mk_rgr_kwargs) if outs == 1: Y = np.reshape(Y, (n_samples, 1)) print("range of Y", np.min(Y), np.max(Y)) info = merge_dicts({ "informative": informative, "random_seed": rnd, "w": w }, mk_rgr_kwargs) name = dl.em_utils.name_from_dict(info, "w") dt = dl.Dataset(X, Y, name=name, info=info) datasets = dl.Datasets.from_list(redivide_data([dt], partition_proportions)) print( "conditioning of X^T X", np.linalg.cond(datasets.train.data.T @ datasets.train.data), ) return datasets
def random_classification_datasets(n_samples, features=100, classes=2, informative=0.1, partition_proportions=(0.5, 0.3), rnd=None, one_hot=True, **mk_cls_kwargs): rnd_state = dl.get_rand_state(rnd) X, Y = make_classification(n_samples, features, n_classes=classes, random_state=rnd_state, **mk_cls_kwargs) if one_hot: Y = to_one_hot_enc(Y) print("range of Y", np.min(Y), np.max(Y)) info = merge_dicts({ "informative": informative, "random_seed": rnd }, mk_cls_kwargs) name = dl.em_utils.name_from_dict(info, "w") dt = dl.Dataset(X, Y, name=name, info=info) datasets = dl.Datasets.from_list(redivide_data([dt], partition_proportions)) print( "conditioning of X^T X", np.linalg.cond(datasets.train.data.T @ datasets.train.data), ) return datasets
def stack(*datasets): """ Assuming that the datasets have same structure, stacks data, targets and other info :param datasets: :return: stacked dataset """ return Dataset( data=vstack([d.data for d in datasets]), target=stack_or_concat([d.target for d in datasets]), sample_info=np.concatenate([d.sample_info for d in datasets]), info={ k: [d.info.get(k, None) for d in datasets] for k in merge_dicts(*[d.info for d in datasets]) }, )
def _training_supplier(step=None): nonlocal other_feeds if step >= self.T: if step % self.T == 0: if self.epochs: print( "WARNING: End of the training scheme reached." "Generating another scheme.", file=sys.stderr, ) self.generate_visiting_scheme() step %= self.T if self.training_schedule is None: # print('visiting scheme not yet generated!') self.generate_visiting_scheme() # noinspection PyTypeChecker nb = self.training_schedule[ step * self.batch_size : min( (step + 1) * self.batch_size, len(self.training_schedule) ) ] bx = self.dataset.data[nb, :] by = self.dataset.target[nb, :] # if lambda_feeds: # this was previous implementation... dunno for what it was used for # lambda_processed_feeds = {k: v(nb) for k, v in lambda_feeds.items()} previous implementation... # looks like lambda was # else: # lambda_processed_feeds = {} return merge_dicts( {x: bx, y: by}, *[maybe_call(of, step) for of in other_feeds] )