Ejemplo n.º 1
0
def test_filter_values_covered_by_single_interval(filter_values):
    """Verify that a single intervals covers all the values in
    ``filter_values``"""
    # TODO: Extend to inputs with shape (n_samples, 1)
    cover = OneDimensionalCover(n_intervals=1)
    interval_masks = cover.fit_transform(filter_values)
    # TODO: Generate filter_values with desired shape
    assert_almost_equal(filter_values[:, None][interval_masks], filter_values)
Ejemplo n.º 2
0
def test_two_dimensional_tensor(pts):
    """Verify that the oneDimensionalCover fails for an input
    with more than one dimension, and that the CubicalCover
    does not."""
    one_d = OneDimensionalCover()
    with pytest.raises(ValueError):
        one_d.fit(pts)
    cubical = CubicalCover()
    _ = cubical.fit(pts)
Ejemplo n.º 3
0
def test_cubical_fit_transform_consistent_with_OneD(filter, kind, n_intervals,
                                                    overlap_fraction):
    """Check that CubicalCover gives the same results as OneDimensionalCover,
    on one-d data """
    one_d = OneDimensionalCover(kind, n_intervals, overlap_fraction)
    cubical = CubicalCover(kind, n_intervals, overlap_fraction)
    x_one_d = one_d.fit_transform(filter)
    x_cubical = cubical.fit_transform(filter)
    assert_almost_equal(x_one_d, x_cubical)
Ejemplo n.º 4
0
def test_fit_transform_limits_not_computed():
    """We do not compute intervals when `kind`= `'balanced'`,
    unless fit is explicitly called."""
    cover = OneDimensionalCover(n_intervals=10, kind='balanced',
                                overlap_frac=0.3)
    x = np.arange(0, 30)
    _ = cover.fit_transform(x)
    with pytest.raises(NotFittedError):
        _ = cover.get_fitted_intervals()
Ejemplo n.º 5
0
def test_balanced_is_balanced(balanced_cover):
    """Test that each point is in one interval, and that each interval has
    ``nb_in_each_interval`` points."""
    points, nb_in_each_interval, nb_intervals = balanced_cover
    cover = OneDimensionalCover(kind='balanced', n_intervals=nb_intervals,
                                overlap_frac=0.01)
    mask = cover.fit_transform(points)
    # each interval contains nb_in_each_interval points
    assert all([s == nb_in_each_interval for s in np.sum(mask, axis=0)])
    # each point is in exactly one interval
    assert all([s == 1 for s in np.sum(mask, axis=1)])
Ejemplo n.º 6
0
def test_filter_values_covered_by_interval_union(filter_values, n_intervals):
    """Test that each value is at least in one interval.
    (that is, the cover is a true cover)."""
    # TODO: Extend to inputs with shape (n_samples, 1)
    cover = OneDimensionalCover(n_intervals=n_intervals)
    interval_masks = cover.fit_transform(filter_values)
    intervals = [filter_values[interval_masks[:, i]]
                 for i in range(interval_masks.shape[1])]
    intervals_union = reduce(np.union1d, intervals)
    filter_values_union = filter_values[np.in1d(filter_values,
                                                intervals_union)]
    assert_almost_equal(filter_values_union, filter_values)
Ejemplo n.º 7
0
    def _runMapper(self):
        """
        creates mapper graphs based on train data

        :return: None
        """
        log.debug("--->creating mappers...")
        if not self.remake and os.path.exists(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label):
            fgin = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "rb")
            self.graphs = pickle.load(fgin)

            fpin = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "rb")
            self.mapper_pipes = pickle.load(fpin)
            return

        clusterer = FirstSimpleGap()
        self.mapper_pipes = []

        log.debug("------> creating projection components...")

        for k in range(self.n_components):
            log.debug("---------> on component {}/{}...".format(k + 1, self.n_components))
            proj = Projection(columns=k)
            filter_func = Pipeline(steps=[('pca', self.rep), ('proj', proj)])
            filtered_data = filter_func.fit_transform(self.data)
            cover = OneDimensionalCover(n_intervals=self.n_intervals, overlap_frac=self.overlap_frac, kind='balanced')
            cover.fit(filtered_data)
            mapper_pipe = make_mapper_pipeline(scaler=None,
                                               filter_func=filter_func,
                                               cover=cover,
                                               clusterer=clusterer,
                                               verbose=(log.getEffectiveLevel() == logging.DEBUG),
                                               n_jobs=1)
            mapper_pipe.set_params(filter_func__proj__columns=k)
            self.mapper_pipes.append(("PCA%d" % (k + 1), mapper_pipe))

        # try parallelization
        log.debug("------> entering parallelization...")

        self.graphs = [mapper_pipe[1].fit_transform(self.data) for mapper_pipe in self.mapper_pipes]

        #
        # self.graphs = Parallel(n_jobs=5, prefer="threads")(
        #     delayed(mapper_pipe[1].fit_transform)(self.data) for mapper_pipe in self.mapper_pipes
        # )

        fg = open(TEMP_DATA + "%s_firstsimplegap_graphs" % self.label, "wb")
        pickle.dump(self.graphs, fg)
        fg.close()

        fp = open(TEMP_DATA + "%s_mapper_pipes" % self.label, "wb")
        pickle.dump(self.mapper_pipes, fp)
        fp.close()
Ejemplo n.º 8
0
def test_equal_interval_length(filter_values, n_intervals, overlap_frac):
    """Test that all the intervals have the same length, up to an additive
    constant of 0.1."""
    cover = OneDimensionalCover(kind="uniform", n_intervals=n_intervals,
                                overlap_frac=overlap_frac)
    cover = cover.fit(filter_values)

    lower_limits, upper_limits = np.array(
        list(map(tuple, zip(*cover.get_fitted_intervals()[1:-1])))
        )

    # rounding precision
    decimals = 10
    assert len(set(np.floor((upper_limits - lower_limits) *
                            decimals).tolist())) == 1
Ejemplo n.º 9
0
def test_one_dimensional_cover_shape(filter_values, n_intervals):
    """Assert that the length of the mask ``unique_interval_masks`` corresponds
    to the pre-specified ``n_samples`` and that there are no more intervals in
    the cover than ``n_intervals``. The case when the filter has only a unique
    value, in which case fit_transform should throw an error, is treated
    separately."""
    # TODO: Extend to inputs with shape (n_samples, 1)
    cover = OneDimensionalCover(n_intervals=n_intervals)
    n_samples, n_intervals = len(filter_values), cover.n_intervals
    try:
        unique_interval_masks = cover.fit_transform(filter_values)
        assert n_samples == unique_interval_masks.shape[0]
        assert n_intervals >= unique_interval_masks.shape[1]
    except ValueError as ve:
        assert ve.args[0] == f"Only one unique filter value found, cannot " \
                             f"fit {n_intervals} > 1 intervals."
        assert (n_intervals > 1) and (len(np.unique(filter_values)) == 1)
Ejemplo n.º 10
0
def test_fit_transform_against_fit_and_transform(
        pts, n_intervals, kind, overlap_frac
        ):
    """Fitting and transforming should give the same result as fit_transform"""
    cover = OneDimensionalCover(n_intervals=n_intervals, kind=kind,
                                overlap_frac=overlap_frac)
    x_fit_transf = cover.fit_transform(pts)

    cover2 = OneDimensionalCover(n_intervals=n_intervals, kind=kind,
                                 overlap_frac=overlap_frac)
    cover2 = cover2.fit(pts)
    x_fit_and_transf = cover2.transform(pts)
    assert_almost_equal(x_fit_transf, x_fit_and_transf)
Ejemplo n.º 11
0
def test_contract_nodes():
    """Test that, on a pathological dataset, we generate a graph without edges
    when `contract_nodes` is set to False and with edges when it is set to
    True."""
    X = make_circles(n_samples=2000)[0]

    filter_func = Projection()
    cover = OneDimensionalCover(n_intervals=5, overlap_frac=0.4)
    p = filter_func.fit_transform(X)
    m = cover.fit_transform(p)

    gap = 0.1
    idx_to_remove = []
    for i in range(m.shape[1] - 1):
        inters = np.logical_and(m[:, i], m[:, i + 1])
        inters_idx = np.flatnonzero(inters)
        p_inters = p[inters_idx]
        min_p, max_p = np.min(p_inters), np.max(p_inters)
        idx_to_remove += list(np.flatnonzero((min_p <= p)
                                             & (p <= min_p + gap)))
        idx_to_remove += list(np.flatnonzero((max_p - gap <= p)
                                             & (p <= max_p)))

    X_f = X[[x for x in range(len(X)) if x not in idx_to_remove]]

    clusterer = DBSCAN(eps=0.05)
    pipe = make_mapper_pipeline(filter_func=filter_func,
                                cover=cover,
                                clusterer=clusterer,
                                contract_nodes=True)
    graph = pipe.fit_transform(X_f)
    assert not len(graph.es)

    pipe.set_params(contract_nodes=False)
    graph = pipe.fit_transform(X_f)
    assert len(graph.es)