def test_random_projection_embedding_quality():
    data, _ = make_sparse_random_data(8, 5000, 15000)
    eps = 0.2

    original_distances = euclidean_distances(data, squared=True)
    original_distances = original_distances.ravel()
    non_identical = original_distances != 0.0

    # remove 0 distances to avoid division by 0
    original_distances = original_distances[non_identical]

    for RandomProjection in all_RandomProjection:
        rp = RandomProjection(n_components='auto', eps=eps, random_state=0)
        projected = rp.fit_transform(data)

        projected_distances = euclidean_distances(projected, squared=True)
        projected_distances = projected_distances.ravel()

        # remove 0 distances to avoid division by 0
        projected_distances = projected_distances[non_identical]

        distances_ratio = projected_distances / original_distances

        # check that the automatically tuned values for the density respect the
        # contract for eps: pairwise distances are preserved according to the
        # Johnson-Lindenstrauss lemma
        assert_less(distances_ratio.max(), 1 + eps)
        assert_less(1 - eps, distances_ratio.min())
Ejemplo n.º 2
0
def do_stuff(dataset = None, metric = True, drtype = "mds", components = 2):
    data_for_mds = np.array(dataset)
    
    if drtype:
        if drtype == "mds":
            mds = manifold.MDS(n_components=components, n_init=10, max_iter=3000, dissimilarity="euclidean", n_jobs=1, metric=metric)
            mds_result = mds.fit(data_for_mds)
        elif drtype == "pca":
            pca = PCA(n_components=2)
            mds_result = pca.fit(euclidean_distances(data_for_mds)).transform(data_for_mds)
        elif drtype == "tsne":
            model = manifold.TSNE(n_components=2, random_state=0, learning_rate=1000, early_exaggeration=10.0)
            mds_result = model.fit_transform(data_for_mds)
    
    clusterings = {}
    for i in range(10, 1, -1):
        clustering = ac(n_clusters=i, memory=mkdtemp())
        clusterings[i] = clustering.fit(data_for_mds).labels_.tolist()
        
    clustering = ac(n_clusters=1, memory=mkdtemp())
    clustering.fit(data_for_mds)
    
    output = {
        "drInfo": None,
        "embedding": None,
        "clustering": {
            "tree": clustering.children_.tolist(),
            "labels": clusterings
        }
    }
    if drtype:
        median_distance = False
        stress1 = False
        raw_stress = False
        if drtype == "mds":
            raw_stress =  mds_result.stress_
            disparities = euclidean_distances(data_for_mds)
            disparityHalfMatrix = np.triu(disparities)
            sumSquaredDisparities = np.sum(np.square(disparityHalfMatrix))
            stress1 = math.sqrt(mds_result.stress_ / sumSquaredDisparities)
            median_distance = np.median(euclidean_distances(mds_result.embedding_))
            embedding = mds_result.embedding_.tolist()
            print mds_result.stress_
        else:
            embedding = mds_result.tolist()
        output["drInfo"] = {
            "type": drtype,
            "metric": metric,
            "components": components,
            "stress1": stress1,
            "rawStress":raw_stress,
            "medianDistance": median_distance
        }
        output["embedding"] = embedding

    return output
def agregation(Entity_struc, Entity_Tex):
    while(not(Entity_struc==Entity_Tex).all()):
        w1_1=x/(0.01+euclidean_distances(Entity_struc,Entity_struc))
        w1_2=x/(0.01+euclidean_distances(Entity_struc,Entity_Tex))
        f=solve((w1_1+w1_2)-1, x)
        p1_1=f/(0.01+euclidean_distances(Entity_struc,Entity_struc))
        p1_2=f/(0.01+euclidean_distances(Entity_struc,Entity_Tex))
        Entity_struc=(p1_1*Entity_struc)+(p1_2*Entity_Tex)
        Entity_Tex=(p1_2*Entity_struc)+((p1_1)*Entity_Tex)
        Agregated_evidence=Entity_Tex
    return Agregated_evidence
Ejemplo n.º 4
0
def estimate_X_test():
    n = 50
    random_state = np.random.RandomState(42)
    X_true = random_state.rand(n, 3)
    dis = euclidean_distances(X_true)
    alpha, beta = -3., 1.

    counts = beta * dis ** alpha
    counts = np.triu(counts)
    counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    counts = sparse.coo_matrix(counts)

    X = mds.estimate_X(counts, random_state=random_state)
    assert_array_almost_equal(dis,
                              euclidean_distances(X), 2)
def test_negative_binomial_gradient_sparse_dispersed():
    n = 10
    random_state = np.random.RandomState(42)
    X = random_state.rand(n, 3)
    dis = euclidean_distances(X)
    alpha, beta = -3, 1

    fdis = beta * dis**alpha
    fdis[np.isinf(fdis)] = 1
    dispersion = fdis + fdis ** 2
    p = fdis / (fdis + dispersion)

    counts = random_state.negative_binomial(dispersion, 1 - p)
    counts = np.triu(counts)
    counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    counts = sparse.coo_matrix(counts, dtype=float)
    return True
    # from minorswing import dispersion
    mean, variance = dispersion.compute_mean_variance(
        counts,
        np.array([counts.shape[0]]))
    mean, variance = mean[:-1], variance[:-1]
    d = dispersion.DispersionPolynomial()
    d.fit(mean, variance)

    gradient_sparse = negative_binomial_structure.negative_binomial_gradient(
        X, counts, dispersion=d)
def test_negative_binomial_obj_sparse_dispersion_biased():
    n = 10
    random_state = np.random.RandomState(42)
    X = random_state.rand(n, 3)
    dis = euclidean_distances(X)
    alpha, beta = -3, 1

    counts = beta * dis ** alpha

    return True
    from minorswing import dispersion
    mean, variance = dispersion.compute_mean_variance(
        counts**2,
        np.array([counts.shape[0]]))
    mean, variance = mean[:-1], variance[:-1]
    d = dispersion.Dispersion()
    d.fit(mean, variance)

    counts = np.triu(counts)
    counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    counts = sparse.coo_matrix(counts)

    obj = negative_binomial_structure.negative_binomial_obj(
        X, counts, dispersion=d, alpha=alpha, beta=beta)

    obj_ = negative_binomial_structure.negative_binomial_obj(
        random_state.rand(*X.shape),
        counts, dispersion=d, alpha=alpha, beta=beta)
    assert(obj < obj_)
def test_estimate_X_biased_dispersion():
    n = 50
    random_state = np.random.RandomState(42)
    X_true = random_state.rand(n, 3)
    dis = euclidean_distances(X_true)
    alpha, beta = -3, 1

    fdis = beta * dis ** alpha
    fdis[np.isinf(fdis)] = 1
    dispersion = fdis + fdis ** 2
    p = fdis / (fdis + dispersion)

    counts = random_state.negative_binomial(dispersion, 1 - p)
    counts = np.triu(counts)
    counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    counts = sparse.coo_matrix(counts, dtype=np.float)

    lengths = np.array([counts.shape[0]])
    return True
    from minorswing import dispersion
    mean, variance = dispersion.compute_mean_variance(counts, lengths)
    mean, variance = mean[:-1], variance[:-1]
    d = dispersion.DispersionPolynomial()
    d.fit(mean, variance)

    X = negative_binomial_structure.estimate_X(counts, alpha, beta,
                                               dispersion=d,
                                               random_state=random_state)
def test_estimate_X():
    n = 50
    random_state = np.random.RandomState(42)
    X_true = random_state.rand(n, 3)
    dis = euclidean_distances(X_true)
    alpha, beta = -3, 1

    counts = beta * dis ** alpha
    counts = np.triu(counts)
    counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    counts = sparse.coo_matrix(counts)

    X = negative_binomial_structure.estimate_X(counts, alpha, beta,
                                               random_state=random_state)
    assert_array_almost_equal(dis,
                              euclidean_distances(X), 2)
Ejemplo n.º 9
0
def plotMap(maparr, freq, nest, seqs, dbfile, map2d, outfile, plotm='T'):

    #mutli-dimensional scaling
    similarities = euclidean_distances(np.matrix(maparr))
    mds = MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=np.random.RandomState(seed=3), dissimilarity="precomputed", n_jobs=1)
    pos = mds.fit(similarities).embedding_

    #plot attributes
    N = len(pos)
    #size = [20*n for n in freq]
    size = 8000
    color = np.array(range(N))
    
    if str(plotm) == 'T':
    
        #plot MDS
        fig, ax = plt.subplots(figsize=(10,10))
        warnings.filterwarnings("ignore")
        scatter = ax.scatter(np.array(pos[:,0]), np.array(pos[:,1]), c=color, s=size, alpha=0.3, cmap=plt.cm.viridis, marker='s')
        plt.xlabel('Dimension 1', fontsize=20, labelpad=20)
        plt.ylabel('Dimension 2', fontsize=20, labelpad=20)
        #plt.axis([xmin, xmax, ymin, ymax])
        plt.tick_params(labelsize=15, length=14, direction='out', pad=15, top='off', right='off')

        #save figures
        fig.savefig(outfile + '.png', bbox_inches='tight', format='png')
        fig.savefig(outfile + '.pdf', bbox_inches='tight', format='pdf')
        plt.close(fig)
        warnings.resetwarnings()
        
        #write csv file
        writePlotMDS(freq, nest, seqs, dbfile, pos, maparr, map2d, outfile)

    return pos
    def fit(self,X,y=None):
        """ Create affinity matrix from negative euclidean distances, then
        apply affinity propagation clustering.
        Parameters
        ----------
        X: array-like, shape (n_samples, n_features) or (n_samples, n_samples)
            Data matrix or, if affinity is ``precomputed``, matrix of
            similarities / affinities.
        """
        X = check_array(X, accept_sparse='csr')
        if self.affinity == "precomputed":
            self.affinity_matrix_ = X
        elif self.affinity == "euclidean":
            self.affinity_matrix_ = -euclidean_distances(X,squared=True)
        else:
            raise ValueError("Affinity must be 'precomputed' or "
                             "'euclidean'. Got %s instead"
                             % str(self.affinity))

        self.cluster_centers_indices_, self.labels_, self.n_iter_ = \
            affinity_propagation(
                self.affinity_matrix_, self.preference, max_iter=self.max_iter,
                convergence_iter=self.convergence_iter, damping=self.damping,
                copy=self.copy, verbose=self.verbose, return_n_iter=True)

        if self.affinity != "precomputed":
            self.cluster_centers_ = X[self.cluster_centers_indices_].copy()

        return self
Ejemplo n.º 11
0
def eval_grad_f_X(X, user_data=None):
    """
    Evaluate the gradient of the function in X
    """
    global niter
    niter += 1
    if not niter % 10:
        X.dump('%d.sol.npy' % niter)

    if VERBOSE:
        print "Poisson exponential model : eval_grad_f_X (evaluation in f X)"

    m, n, counts, alpha, beta, d = user_data
    X = X.reshape((m, n))
    dis = euclidean_distances(X)

    tmp = X.repeat(m, axis=0).reshape((m, m, n))
    dif = tmp - tmp.transpose(1, 0, 2)
    dis = dis.repeat(n).reshape((m, m, n))
    counts = counts.repeat(n).reshape((m, m, n))

    grad = - alpha * beta * dif / dis * (dis ** (alpha - 1)) + \
           counts * alpha * dif / (dis ** 2)
    grad[np.isnan(grad)] = 0
    return - grad.sum(1)
Ejemplo n.º 12
0
    def _get_enemies_dists(data, target):
        """Get the distances to the nearest enemy of each instance in a
        training set.

        Args:
            data: Data values.
            target: Target values.

        Returns:
            Array with the distance to the nearest enemy of each instance.
        """

        # Enemies of each label ('label': [list of enemies])
        enemies = {}
        for label in np.unique(target):  # For every label
            indices = np.nonzero(label != target)[0]
            enemies[label] = data[indices].copy()

        # Compute the distance to the nearest enemy of each instance
        dists = np.zeros(len(data))
        for p in range(len(data)):
            enemies_dists = metrics.euclidean_distances(data[p], enemies[target[p]])
            nearest_enemy_dist = enemies_dists.min()
            dists[p] = nearest_enemy_dist

        return dists
Ejemplo n.º 13
0
    def query(self, query):
        c = self.c
        m = self.m
        query = np.array(query)

        # descend phase
        max_depth = 0
        for i in range(len(self.trees)):
            bin_query = self._hash(query, self.hash_functions[i])
            k = self.trees[i].find_prefix_match(bin_query)
            if k > max_depth:
                max_depth = k

        # asynchronous ascend phase
        candidates = list()
        number_of_candidates = c * len(self.trees)
        while max_depth > 0 and (len(candidates) < number_of_candidates or len(set(candidates)) < m):
            for i in range(len(self.trees)):
                bin_query = self._hash(query, self.hash_functions[i])
                candidates.extend(self.trees[i].query(bin_query, max_depth))
            max_depth = max_depth - 1

        if len(candidates) == 0:
            candidates = range(len(self.xs))

        candidates = np.array(list(set(candidates)))
        if self.debug:
            print('md:', max_depth)
            print('c:', candidates)
        distances = euclidean_distances(query, self.xs[candidates])
        return sorted(zip(distances[0], candidates))[:self.m]
Ejemplo n.º 14
0
def betacv_simple(data, labels, size=3000, metric='euclidean'):
    n = labels.shape[0]
    n_slices = ceil(n/size)
    intra = 0
    inter = 0
    n_in = 0
    n_out = 0
    last = 0
    labels_unq = np.unique(labels)
    members = np.array([member_count(labels, i) for i in labels_unq])
    N_in = np.array([i*(i-1) for i in members])
    n_in = np.sum(N_in)
    N_out = np.array([i*(n-i) for i in members])
    n_out = np.sum(N_out)
    
    for i in range(n_slices):
        x = data[last:(last+size), :]
        distances = euclidean_distances(x, data)
        j_range = min(size, n-size*i)
        A = np.array([intra_cluster_distance(distances[j], labels, j+last)
                  for j in range(j_range)])
        B = np.array([inter_cluster_distance(distances[j], labels, j+last)
                  for j in range(j_range)])
        intra += np.sum(A)
        inter += np.sum(B)
        last += size

    betacv = (intra/n_in)/(inter/n_out)
    print('simple intra:', intra)
    print('simple inter:', inter)
    print('simple n_in :', n_in)
    print('simple n_out:', n_out)
    return betacv
Ejemplo n.º 15
0
	def fit_transform(self, X, y=None, init=None):
		"""
		Fit the data from X, and returns the embedded coordinates
		Parameters
		----------
		X : array, shape=[n_samples, n_features], or [n_samples, n_samples] \
				if dissimilarity='precomputed'
			Input data.
		init : {None or ndarray, shape (n_samples,)}, optional
			If None, randomly chooses the initial configuration
			if ndarray, initialize the SMACOF algorithm with this array.
		"""
		X = check_array(X)
		if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":
			warnings.warn("The MDS API has changed. ``fit`` now constructs an"
						  " dissimilarity matrix from data. To use a custom "
						  "dissimilarity matrix, set "
						  "``dissimilarity='precomputed'``.")

		if self.dissimilarity == "precomputed":
			self.dissimilarity_matrix_ = X
		elif self.dissimilarity == "euclidean":
			self.dissimilarity_matrix_ = euclidean_distances(X)
		else:
			raise ValueError("Proximity must be 'precomputed' or 'euclidean'."
							 " Got %s instead" % str(self.dissimilarity))

		self.embedding_, self.stress_, self.n_iter_, self.last_n_embeddings = smacof_dispatch(self.config, self.variant,
			self.dissimilarity_matrix_, metric=self.metric,
			n_components=self.n_components, init=init, n_init=self.n_init,
			n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose,
			eps=self.eps, random_state=self.random_state,
			return_n_iter=True)

		return self.embedding_, self.last_n_embeddings
def test_affinity_propagation_equal_mutual_similarities():
    X = np.array([[-1, 1], [1, -1]])
    S = -euclidean_distances(X, squared=True)

    # setting preference > similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=0)

    # expect every sample to become an exemplar
    assert_array_equal([0, 1], cluster_center_indices)
    assert_array_equal([0, 1], labels)

    # setting preference < similarity
    cluster_center_indices, labels = assert_warns_message(
        UserWarning, "mutually equal", affinity_propagation, S, preference=-10)

    # expect one cluster, with arbitrary (first) sample as exemplar
    assert_array_equal([0], cluster_center_indices)
    assert_array_equal([0, 0], labels)

    # setting different preferences
    cluster_center_indices, labels = assert_no_warnings(
        affinity_propagation, S, preference=[-20, -10])

    # expect one cluster, with highest-preference sample as exemplar
    assert_array_equal([1], cluster_center_indices)
    assert_array_equal([0, 0], labels)
Ejemplo n.º 17
0
 def _cluster_variance(cls, num_points, clusters, centroids):
     s = 0
     denom = float(num_points - len(centroids))
     for cluster, centroid in zip(clusters, centroids):
         distances = euclidean_distances(cluster, centroid)
         s += (distances*distances).sum()
     return s / denom
Ejemplo n.º 18
0
    def run(self):
        """Implement method from ISABase."""

        sel = np.zeros(len(self._x), bool)  # Mask of selected instances (none)
        aval = np.ones(len(self._x), bool)  # Mask of available instances (all)

        # Calculate distances to nearest enemies
        enemy_dists = self._get_enemies_dists(self._x, self._y)

        # For every unique label
        for l in np.unique(self._y):
            while True:
                # Get available instances with a label other than `l`
                candidates = (aval & (self._y == l)).nonzero()[0]
                candidates_dists = enemy_dists[candidates]
                if len(candidates_dists) == 0:
                    break
                # Choose the candidate with the smallest distance to its enemy
                candidate = candidates[candidates_dists.argmin()]

                sel[candidate] = True  # Mark candidate as selected
                aval[candidate] = False  # Mark candidate as unavailable

                rest = candidates[candidates != candidate]  # rest of candidates
                # Work out the distances from `candidate` to `rest`
                rest_dists = metrics.euclidean_distances(self._x[candidate], self._x[rest])[0]
                # Pick instances closer to the candidate that the candidate's
                # nearest enemy
                picked_candidates = rest[rest_dists < enemy_dists[candidate]]
                # Mark picked candidates as unavailable
                aval[picked_candidates] = False  # Mark picked candidates

        self._sel = sel
Ejemplo n.º 19
0
def histogram_colors_strict(lab_array, palette, plot_filename=None):
    """
    Return a palette histogram of colors in the image.

    Parameters
    ----------
    lab_array : (N,3) ndarray
        The L*a*b color of each of N pixels.
    palette : rayleigh.Palette
        Containing K colors.
    plot_filename : string, optional
        If given, save histogram to this filename.

    Returns
    -------
    color_hist : (K,) ndarray
    """
    # This is the fastest way that I've found.
    # >>> %%timeit -n 200 from sklearn.metrics import euclidean_distances
    # >>> euclidean_distances(palette, lab_array, squared=True)
    dist = euclidean_distances(palette.lab_array, lab_array, squared=True).T
    min_ind = np.argmin(dist, axis=1)
    num_colors = palette.lab_array.shape[0]
    num_pixels = lab_array.shape[0]
    color_hist = 1. * np.bincount(min_ind, minlength=num_colors) / num_pixels
    if plot_filename is not None:
        plot_histogram(color_hist, palette, plot_filename)
    return color_hist
Ejemplo n.º 20
0
def wordMoverDistance(d1, d2):
    ###d1 list
    ###d2 list
    # Rule out words that not in vocabulary
    d1 = " ".join([w for w in d1 if w in vocab_dict])
    d2 = " ".join([w for w in d2 if w in vocab_dict])
    #print d1
    #print d2
    vect = CountVectorizer().fit([d1,d2])
    feature_names = vect.get_feature_names()
    W_ = W[[vocab_dict[w] for w in vect.get_feature_names()]] #Word Matrix
    D_ = euclidean_distances(W_) # Distance Matrix
    D_ = D_.astype(np.double)
    #D_ /= D_.max()  # Normalize for comparison
    v_1, v_2 = vect.transform([d1, d2])
    v_1 = v_1.toarray().ravel()
    v_2 = v_2.toarray().ravel()
    ### EMD
    v_1 = v_1.astype(np.double)
    v_2 = v_2.astype(np.double)
    v_1 /= v_1.sum()
    v_2 /= v_2.sum()
    #print("d(doc_1, doc_2) = {:.2f}".format(emd(v_1, v_2, D_)))
    emd_d = emd(v_1, v_2, D_) ## WMD
    #print emd_d
    return emd_d
Ejemplo n.º 21
0
def closest(pipeline, records, record, n=10):
    """Find the closest records from the given record.

    :param pipeline:
        A classification pipeline, as returned by ``train``.

    :param records:
        Records are expected as a list of dictionaries.

    :param record:
        Record is expected as a dictionary.

    :param n:
        The number of closest records to return.

    :return list:
        The ``n`` closest records.
    """
    transformer = pipeline.steps[0][1]

    X = transformer.transform(np.array(records, dtype=np.object))
    X_record = transformer.transform(np.array([record], dtype=np.object))
    top = np.argsort(euclidean_distances(X, X_record), axis=0)

    return [records[i] for i in top[:n]]
Ejemplo n.º 22
0
def run_kmeans(inFile,  n_colors):
	china = cv2.imread(inFile)
	china = np.array(china, dtype=np.float64) / 255
	w, h, d = original_shape = tuple(china.shape)
	assert d == 3
	image_array = np.reshape(china, (w * h, d))
	
	print("\tFitting model on a small sub-sample of the data")
	t0 = time()
	image_array_sample = shuffle(image_array, random_state=0)[:1000]
	kmeans = KMeans(k=n_colors, random_state=0).fit(image_array_sample)
	print("\tdone in %0.3fs." % (time() - t0))
	
	# Get labels for all points
	print("\tPredicting color indices on the full image (k-means)")
	t0 = time()
	labels = kmeans.predict(image_array)
	print("\tdone in %0.3fs." % (time() - t0))
	
	codebook_random = shuffle(image_array, random_state=0)[:n_colors + 1]
	print("\tPredicting color indices on the full image (random)")
	t0 = time()
	dist = euclidean_distances(codebook_random, image_array, squared=True)
	labels_random = dist.argmin(axis=0)
	print("\tdone in %0.3fs." % (time() - t0))

	img_kmeans = recreate_image(kmeans.cluster_centers_, labels, w, h)
	img_random = recreate_image(codebook_random, labels_random, w, h)
	return china, img_kmeans, img_random
Ejemplo n.º 23
0
def test_affinity_propagation():
    """Affinity Propagation algorithm
    """
    # Compute similarities
    S = -euclidean_distances(X, squared=True)
    preference = np.median(S) * 10
    # Compute Affinity Propagation
    cluster_centers_indices, labels = affinity_propagation(S,
            preference=preference)

    n_clusters_ = len(cluster_centers_indices)

    assert_equal(n_clusters, n_clusters_)

    af = AffinityPropagation(preference=preference, affinity="precomputed")
    labels_precomputed = af.fit(S).labels_

    af = AffinityPropagation(preference=preference)
    labels = af.fit(X).labels_

    assert_array_equal(labels, labels_precomputed)

    cluster_centers_indices = af.cluster_centers_indices_

    n_clusters_ = len(cluster_centers_indices)
    assert_equal(np.unique(labels).size, n_clusters_)
    assert_equal(n_clusters, n_clusters_)

    # Test also with no copy
    _, labels_no_copy = affinity_propagation(S, preference=preference,
            copy=False)
    assert_array_equal(labels, labels_no_copy)
def complete_linkage(X, connectivity=None, n_clusters=4):
    from sklearn.cluster.hierarchical import _hc_cut
    if connectivity is None:
        d = euclidean_distances(X)
    else:
        connectivity = connectivity.copy()
        # Remove the diagonal
        mask = connectivity.row != connectivity.col
        connectivity.row = connectivity.row[mask]
        connectivity.col = connectivity.col[mask]
        connectivity.data = connectivity.data[mask]
        d_ = X[connectivity.row]
        d_ -= X[connectivity.col]
        d_ **= 2
        d_ = d_.sum(axis=-1)
        # XXX: not necessary: complete_linkage is invariant by increasing
        # function
        d_ = np.sqrt(d_)
        d = connectivity
        d.data = d_
    L = nn_chain_core(d)
    a, b, height = np.array(L).T
    children = np.c_[a, b].astype(np.int)
    labels = _hc_cut(n_clusters=n_clusters, children=children,
                     n_leaves=len(X))
    return labels
Ejemplo n.º 25
0
def euclidean_MDS(data):
    seed = np.random.RandomState(seed=3)
    similarities = euclidean_distances(data)
    mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
                   dissimilarity="precomputed", n_jobs=1)
    pos = mds.fit_transform(similarities)
    return pos
Ejemplo n.º 26
0
def _poisson_exp_dense(X, counts, alpha, bias,
                       beta=None, use_empty_entries=False):
    m, n = X.shape
    d = euclidean_distances(X)
    if use_empty_entries:
        mask = (np.invert(np.tri(m, dtype=np.bool)))
    else:
        mask = np.invert(np.tri(m, dtype=np.bool)) & (counts != 0) & (d != 0)

    bias = bias.reshape(-1, 1)
    if beta is None:
        beta = counts[mask].sum() / (
            (d[mask] ** alpha) * (bias * bias.T)[mask]).sum()

    g = beta * d ** alpha
    g *= bias * bias.T
    g = g[mask]

    ll = counts[mask] * np.log(beta) + \
        alpha * counts[mask] * np.log(d[mask]) + \
        counts[mask] * np.log(bias * bias.T)[mask]
    ll -= g
    # We are trying to maximise, so we need the opposite of the log likelihood
    if np.isnan(ll.sum()):
        raise ValueError("Objective function is Not a Number")
    return - ll.sum()
Ejemplo n.º 27
0
def distance_matrix_visualization():
    xx, yy = create_mesh_data(-1, 1.01, 0.2, -1, 1.01, 0.2)
    points = np.array([[x,y] for x,y in zip(xx.ravel(), yy.ravel())])

    distance_matrix_lin = get_distance_matrix(xx, yy, linear_kernel)
    distance_matrix_pol = get_distance_matrix(xx, yy, get_pol_kernel_closure(10.0))
    distance_matrix_rbf = get_distance_matrix(xx, yy, get_rbf_kernel_closure(10.0))
    distance_matrix_sig = get_distance_matrix(xx, yy, get_sigmoid_kernel_closure(0.1))

    distance_matrix_orig = euclidean_distances(points, points)

    plt.figure()
    plt.pcolor(distance_matrix_orig)
    plt.colorbar()

    plt.figure()
    plt.pcolor(distance_matrix_lin)
    plt.colorbar()

    plt.figure()
    plt.pcolor(distance_matrix_pol)
    plt.colorbar()

    plt.figure()
    plt.pcolor(distance_matrix_rbf)
    plt.colorbar()

    plt.figure()
    plt.pcolor(distance_matrix_sig)
    plt.colorbar()

    plt.show()
Ejemplo n.º 28
0
    def __call__(self, track,  slice=None):

        # remove WHERE when table cleaned up to remove header rows
        statement = (
            "SELECT transcript_id, TPM, sample_id FROM %(table)s "
            "where transcript_id != 'Transcript'")

        # fetch data
        df = pd.DataFrame.from_dict(self.getAll(statement))

        df = df.pivot('transcript_id', 'sample_id')['TPM']

        # calculate dissimilarities
        similarities = euclidean_distances(df.transpose())

        # run MDS
        mds = manifold.MDS(n_components=2, max_iter=3000,
                           eps=1e-9, dissimilarity="precomputed", n_jobs=1)
        mds = mds.fit(similarities)
        pos = pd.DataFrame(mds.embedding_)

        pos.columns = ["MD1", "MD2"]
        pos['sample'] = df.columns

        return pos
Ejemplo n.º 29
0
def test_poisson_exp():
    random_state = np.random.RandomState(seed=42)
    n = 50
    X = random_state.rand(n, 3)
    counts = euclidean_distances(X)**(-3)
    counts[np.isinf(counts) | np.isnan(counts)] = 0
    eps = poisson_model.poisson_exp(X, counts, -2)
    assert eps < 1e-6
Ejemplo n.º 30
0
def mds_positions(df, identifier, hash_map):
    euc = pd.DataFrame(euclidean_distances(df), index=df.index, columns=df.columns)
    mds = manifold.MDS(dissimilarity='precomputed', max_iter=3000)
    posdf = pd.DataFrame(mds.fit(euc).embedding_, index=euc.index)
    clf = PCA(n_components=2)
    posdf = pd.DataFrame(clf.fit_transform(posdf), index=posdf.index)
    posdf[identifier] = [hash_map[abb] for abb in posdf.index]
    return posdf
Ejemplo n.º 31
0
 def _cluster_variance(cls, num_points, clusters, centroids):
     s = 0
     num_dims = clusters[0][0].shape[0]
     denom = float(num_points - len(centroids)) * num_dims
     for cluster, centroid in zip(clusters, centroids):
         distances = euclidean_distances(cluster, centroid)
         s += (distances * distances).sum()
     return s / denom
Ejemplo n.º 32
0
def discr_stat(X,
               Y,
               dissimilarity="euclidean",
               remove_isolates=True,
               return_rdfs=False):
    """
    Computes the discriminability statistic.
    Parameters
    ----------
    X : array, shape (n_samples, n_features) or (n_samples, n_samples)
    Input data. If dissimilarity=='precomputed', the input should be the dissimilarity
    matrix.
    Y : 1d-array, shape (n_samples)
    Input labels.
    dissimilarity : str, {"euclidean" (default), "precomputed"}
    Dissimilarity measure to use:
    - 'euclidean':
    Pairwise Euclidean distances between points in the dataset.
    - 'precomputed':
    Pre-computed dissimilarities.
    remove_isolates : bool, optional, default=True
    Whether to remove data that have single label.
    return_rdfs : bool, optional, default=False
    Whether to return rdf for all data points.
    Returns
    -------
    stat : float
    Discriminability statistic.
    rdfs : array, shape (n_samples, max{len(id)})
    Rdfs for each sample. Only returned if ``return_rdfs==True``.
    """
    check_X_y(X, Y, accept_sparse=True)
    uniques, counts = np.unique(Y, return_counts=True)
    if (counts != 1).sum() <= 1:
        msg = "You have passed a vector containing only a single unique sample id."
        raise ValueError(msg)
    if remove_isolates:
        idx = np.isin(Y, uniques[counts != 1])
        labels = Y[idx]
        if dissimilarity == "euclidean":
            X = X[idx]
        else:
            X = X[np.ix_(idx, idx)]
    else:
        labels = Y

    if dissimilarity == "euclidean":
        dissimilarities = euclidean_distances(X)
    else:
        dissimilarities = X

    rdfs = _discr_rdf(dissimilarities, labels)
    stat = np.nanmean(rdfs)

    if return_rdfs:
        return stat, rdfs
    else:
        return stat
Ejemplo n.º 33
0
def visualize(reader, visualization_method, value_column, segment_column):
    labels, data = organize_data(reader, visualization_method, value_column,
                                 segment_column)

    if visualization_method == 'hc':
        link = linkage(data)
        dendrogram(link, leaf_label_func=lambda i: labels[i])
        plt.gcf()
        plt.show()

    if visualization_method == 'mds':
        n = len(labels)
        data -= data.mean()
        clf = PCA(n_components=2)
        data = clf.fit_transform(data)

        similarities = euclidean_distances(data)

        # Add noise to the similarities
        noise = np.random.rand(n, n)
        noise = noise + noise.T
        noise[np.arange(noise.shape[0]), np.arange(noise.shape[0])] = 0
        similarities += noise

        fig = plt.figure(1)
        ax = plt.axes([0., 0., 1., 1.])

        similarities = similarities.max() / similarities * 100
        similarities[np.isinf(similarities)] = 0

        plt.scatter(data[:, 0], data[:, 1], c='r', s=20)
        plt.legend('Position', loc='best')
        start_idx, end_idx = np.where(data)
        segments = [[data[i, :], data[j, :]] for i in range(len(data))
                    for j in range(len(data))]
        values = np.abs(similarities)
        lc = LineCollection(segments,
                            zorder=0,
                            cmap=plt.cm.hot_r,
                            norm=plt.Normalize(0, values.max()))
        lc.set_array(similarities.flatten())
        lc.set_linewidths(0.5 * np.ones(len(segments)))
        ax.add_collection(lc)

        for label, x, y in zip(labels, data[:, 0], data[:, 1]):
            plt.annotate(label,
                         xy=(x, y),
                         xytext=(-20, 20),
                         textcoords='offset points',
                         ha='right',
                         va='bottom',
                         bbox=dict(boxstyle='round,pad=0.5',
                                   fc='yellow',
                                   alpha=0.5),
                         arrowprops=dict(arrowstyle='->',
                                         connectionstyle='arc3,rad=0'))

        plt.show()
Ejemplo n.º 34
0
def test_pairwise_distances_radius_neighbors(
    n_features,
    translation,
    metric,
    strategy,
    n_samples=100,
    dtype=np.float64,
):
    rng = np.random.RandomState(0)
    spread = 1000
    radius = spread * np.log(n_features)
    X = translation + rng.rand(n_samples, n_features).astype(dtype) * spread
    Y = translation + rng.rand(n_samples, n_features).astype(dtype) * spread

    metric_kwargs = _get_metric_params_list(metric, n_features)[0]

    # Reference for argkmin results
    if metric == "euclidean":
        # Compare to scikit-learn GEMM optimized implementation
        dist_matrix = euclidean_distances(X, Y)
    else:
        dist_matrix = cdist(X, Y, metric=metric, **metric_kwargs)

    # Getting the neighbors for a given radius
    neigh_indices_ref = []
    neigh_distances_ref = []

    for row in dist_matrix:
        ind = np.arange(row.shape[0])[row <= radius]
        dist = row[ind]

        sort = np.argsort(dist)
        ind, dist = ind[sort], dist[sort]

        neigh_indices_ref.append(ind)
        neigh_distances_ref.append(dist)

    neigh_indices_ref = np.array(neigh_indices_ref)
    neigh_distances_ref = np.array(neigh_distances_ref)

    neigh_distances, neigh_indices = PairwiseDistancesRadiusNeighborhood.compute(
        X,
        Y,
        radius,
        metric=metric,
        metric_kwargs=metric_kwargs,
        return_distance=True,
        # So as to have more than a chunk, forcing parallelism.
        chunk_size=n_samples // 4,
        strategy=strategy,
        sort_results=True,
    )

    ASSERT_RESULT[PairwiseDistancesRadiusNeighborhood](neigh_distances,
                                                       neigh_distances_ref,
                                                       neigh_indices,
                                                       neigh_indices_ref)
Ejemplo n.º 35
0
 def _select_targets(self):
   target_neighbors = np.empty((self.X_.shape[0], self.k), dtype=int)
   for label in self.labels_:
     inds, = np.nonzero(self.label_inds_ == label)
     dd = euclidean_distances(self.X_[inds], squared=True)
     np.fill_diagonal(dd, np.inf)
     nn = np.argsort(dd)[..., :self.k]
     target_neighbors[inds] = inds[nn]
   return target_neighbors
def squared_difference_mean(data1, data2):
    distance = euclidean_distances(data1, data2)
    num = distance.shape[0]
    error = 0
    for i in range(num):
        error += distance[i][i]**2
    error_mean = error / num

    return error_mean
Ejemplo n.º 37
0
 def from_points(cls, points: np.array):
     if points.ndim != 2:
         raise ValueError('"points" should have two dimensions.')
     if points.shape[0] < 3:
         raise ValueError('"points" should contain at least 3 points.')
     if points.shape[1] != 3:
         raise ValueError('"points" should be X*3 (x,y,z).')
     distance_matrix_nparray = euclidean_distances(points)
     return cls(distance_matrix_nparray)
Ejemplo n.º 38
0
def get_representative_jobs(df, kmeans):
    cluster_centers = kmeans.cluster_centers_
    for cent in cluster_centers:
        print('\nCluster Represnetations')
        dist = euclidean_distances(cent.reshape(1, -1), tfidf)
        order = np.argsort(dist)
        for o in order[0][:5]:
            title = df['Job_Title'].iloc[o]
            print(title)
Ejemplo n.º 39
0
def test_shuffle_equal(verbose):
    # for this data set there shouldn't be any equal distances,
    # and shuffle should make no difference
    X, _ = make_classification(random_state=12354)
    dist = euclidean_distances(X)
    skew_shuffle, skew_no_shuffle = \
        [Hubness(metric='precomputed', shuffle_equal=v, verbose=verbose)
         .fit(dist).score() for v in [True, False]]
    assert skew_no_shuffle == skew_shuffle
Ejemplo n.º 40
0
 def _select_targets(X, y, k):
     target_neighbors = np.empty((X.shape[0], k), dtype=int)
     for label in np.unique(y):
         inds, = np.nonzero(y == label)
         dd = euclidean_distances(X[inds], squared=True)
         np.fill_diagonal(dd, np.inf)
         nn = np.argsort(dd)[..., :k]
         target_neighbors[inds] = inds[nn]
     return target_neighbors
Ejemplo n.º 41
0
def neighbor_test():
    from sklearn.metrics import euclidean_distances
    A = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    D = euclidean_distances(A)
    nbrs = NearestNeighbors(n_neighbors=2, metric='precomputed').fit(D)
    distance, knn = nbrs.kneighbors()
    # 这里返回的KNN是不包括它自己的。
    print(distance)
    print(knn)
Ejemplo n.º 42
0
def calculateDistance(mean_images):
    length = len(mean_images)
    distance = np.zeros((10, 10))
    for i in range(length):
        for j in range(length):
            a = mean_images[i].reshape(1, -1)
            b = mean_images[j].reshape(1, -1)
            distance[i, j] = euclidean_distances(a, b)
    return np.square(distance)
Ejemplo n.º 43
0
def showMDSAnalysis(X, q_class, n_components):
    similarities = euclidean_distances(X)
    print 'similarities...'
    #     similarities = 1 - chi2_kernel(X, gamma=.5)
    print 'mds...'
    mds = manifold.MDS(n_components=2,
                       max_iter=3000,
                       eps=1e-9,
                       dissimilarity="precomputed",
                       n_jobs=1)
    pos = mds.fit(similarities).embedding_

    #     print 'nmds...'
    #     nmds = manifold.MDS(n_components=2, metric=False, max_iter=3000, eps=1e-12,
    #                         dissimilarity="precomputed", n_jobs=1,
    #                         n_init=1)
    #     npos = nmds.fit_transform(similarities, init=pos)

    clf = PCA(n_components=2)

    X = clf.fit_transform(X)

    pos = clf.fit_transform(pos)

    #     npos = clf.fit_transform(npos)

    fig = plt.figure(1)
    ax = plt.axes([0., 0., 1., 1.])

    color = 'wmgbr'
    mark = 'ox+Ds'
    c = 0
    start = 0
    for n in q_class:
        end = start + n
        print start, end
        #         plt.scatter(X[i:i+50, 0], X[i:i+50, 1], c=color[c], marker = mark[c])
        plt.scatter(pos[start:end, 0],
                    pos[start:end, 1],
                    s=20,
                    c=color[c],
                    marker=mark[c])
        #         plt.scatter(npos[i:i+50, 0], npos[i:i+50, 1], s=20, c=color[c], marker = mark[c])
        #         plt.legend(('True position', 'MDS', 'NMDS'), loc='best')
        c += 1
        start = end

    plt.legend(('equation', 'photo', 'scheme', 'table', 'visualization'),
               loc='best')

    similarities = similarities.max() / similarities * 100
    similarities[np.isinf(similarities)] = 0

    plt.show()

    return pos
def dimension_reduction(df,
                        sample_limit,
                        category_feature,
                        method="MDS",
                        n_components=3,
                        n_jobs=2,
                        whiten=True):
    '''
    The function is used to conduct multidimentional scaling
    Inputs:
        df: dataframe
        sample_limit: given restriction for the size of rows
        category_features: feature used as predefined label
        methods: "MDS" for multidimensional scaling, "PCA" for principal component analysis
        n_components: the final dimensions
        n_jobs: parallel computing factor
        whiten: True if removing relative variance between components
    Returns: numpy array with n dinmensions and labels, index for the label
    '''
    if df.shape[0] > sample_limit:
        sub_df = df.sample(n=sample_limit).reset_index()
    else:
        sub_df = df.reset_index()

    used_columns = list(sub_df.columns)
    if category_feature:
        used_columns.remove(category_feature)
    sub_dfm = np.matrix(sub_df[used_columns])

    if method == "MDS":
        similarities = euclidean_distances(sub_dfm)
        mds = manifold.MDS(n_components=n_components,
                           max_iter=3000,
                           eps=1e-9,
                           dissimilarity='precomputed',
                           n_jobs=1)
        pos = mds.fit(similarities).embedding_

    else:
        pca = PCA(n_components=n_components, copy=True, whiten=whiten)
        pos = pca.fit_transform(sub_dfm)
        print(pca.explained_variance_ratio_)

    if category_feature:
        category_index = {}
        sub_df["label"] = 0
        for i, category in enumerate(
                sorted(list(sub_df[category_feature].unique()))):
            sub_df.loc[sub_df[category_feature] == category, "label"] = i
            category_index[category] = i

        new_pos = np.zeros((pos.shape[0], n_components + 1))
        new_pos[:, :-1] = pos
        new_pos[:, -1] = sub_df["label"]

    return new_pos, category_index
Ejemplo n.º 45
0
    def predict(self, X):

        # Check is fit had been called
        check_is_fitted(self, ['X_', 'y_'])

        # Input validation
        X = check_array(X)

        closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
        return self.y_[closest]
Ejemplo n.º 46
0
 def calculate_distance(gdf, norm):
     xy = np.asarray(
         gdf[['x', 'y']] * 10000
     )  # pd.merge(gdf[geom_col].x, gdf[geom_col].y, left_index=True, right_index=True)
     spatial_distance = euclidean_distances(xy)
     norm_spatial_distance = preprocessing.normalize(spatial_distance,
                                                     norm=norm)
     t = np.asarray(gdf[['t']])
     temporal_distance = euclidean_distances(t)
     norm_temporal_distance = preprocessing.normalize(temporal_distance,
                                                      norm=norm)
     c = np.asarray(gdf['c'])
     vectorizer = TfidfVectorizer()
     c_vect = vectorizer.fit_transform(c)
     content_distance = np.absolute(cosine_distances(c_vect))
     norm_content_distance = preprocessing.normalize(content_distance,
                                                     norm=norm)
     distances = alpha * norm_spatial_distance + beta * norm_content_distance + gama * norm_temporal_distance
     return distances
Ejemplo n.º 47
0
    def evaluate(x, z, hyp):
        if len(x.shape) == 1:
            x = x.reshape(1, -1)
        if len(z.shape) == 1:
            z = z.reshape(1, -1)

        ell = np.exp(hyp[0])
        sf2 = np.exp(2 * hyp[1])
        K = euclidean_distances(x / ell, z / ell, squared=True)  # (x-z)^T (x-z)
        return sf2 * np.exp(-K / 2)
Ejemplo n.º 48
0
def test_equal_similarities_and_preferences():
    # Unequal distances
    X = np.array([[0, 0], [1, 1], [-2, -2]])
    S = -euclidean_distances(X, squared=True)

    assert not _equal_similarities_and_preferences(S, np.array(0))
    assert not _equal_similarities_and_preferences(S, np.array([0, 0]))
    assert not _equal_similarities_and_preferences(S, np.array([0, 1]))

    # Equal distances
    X = np.array([[0, 0], [1, 1]])
    S = -euclidean_distances(X, squared=True)

    # Different preferences
    assert not _equal_similarities_and_preferences(S, np.array([0, 1]))

    # Same preferences
    assert _equal_similarities_and_preferences(S, np.array([0, 0]))
    assert _equal_similarities_and_preferences(S, np.array(0))
Ejemplo n.º 49
0
    def predict(self, xtest):
        """Predict method"""
        # Check is fit had been called
        check_is_fitted(self, ['xtrain_', 'ytrain_'])

        # Input validation
        xtest = check_array(xtest)

        closest = np.argmin(euclidean_distances(xtest, self.xtrain_), axis=1)
        return self.ytrain_[closest]
Ejemplo n.º 50
0
def getDisMatrixEuclidean(meanList):
    eucDisMat = []
    for i in range(len(meanList)):
        eucDisMat.append([])
        for j in range(len(meanList)):
            dis = sum(
                euclidean_distances(meanList[i].reshape(1, -1),
                                    meanList[j].reshape(1, -1)))[0]
            eucDisMat[i].append(dis * dis)
    return eucDisMat
Ejemplo n.º 51
0
 def getvec(self, s1, s2):
     vect = CountVectorizer(token_pattern='(?u)\\b\\w+\\b').fit([s1, s2])
     v1, v2 = vect.transform([s1, s2])
     v1 = v1.toarray().ravel()
     v2 = v2.toarray().ravel()
     w = numpy.array([self.model[w] for w in vect.get_feature_names()])
     d = euclidean_distances(w)
     d = d.astype(numpy.double)
     d /= d.max()
     return v1, v2, d
Ejemplo n.º 52
0
Archivo: rmds.py Proyecto: lhlruc/ASHIC
def f(R, *params):
    thetaxm, thetaym, thetazm, thetaxp, thetayp, thetazp = R
    d, X, Y, distances = params
    Rm = rotation(thetaxm, thetaym, thetazm)
    Rp = rotation(thetaxp, thetayp, thetazp)
    Xr = Rm.dot(X.T).T
    Yr = Rp.dot(Y.T).T + np.tile([d, 0, 0], (Y.shape[0], 1))
    dis = euclidean_distances(Xr, Yr)
    obj = 1. / (distances**2) * ((dis - distances)**2)
    return obj[np.invert(np.isnan(obj) | np.isinf(obj))].sum()
Ejemplo n.º 53
0
def eval_stress(X, user_data=None):
    """
    """
    if VERBOSE:
        print("Computing stress: eval_stress")
    m, n, distances, alpha, beta, d = user_data
    X = X.reshape((m, n))
    dis = euclidean_distances(X)
    stress = ((dis - distances)**2)[distances != 0].sum()
    return stress
    def get_derivative(self, X, Y, P, Q, P0, beta):
        Dy = euclidean_distances(Y)
        H = hessian_y_matrix_fast(Dy, P, Q, Y)
        J = derivative_X_matrix_fast(X, Y, Dy, beta, P0)
        self.H = H
        self.J = J
        Pxy = Jxy(H, J)
        self.P = Pxy

        return Pxy
Ejemplo n.º 55
0
def cal_sim():
    for i in range(len(tasks)):
        for j in range(i, len(tasks)):
            print(
                tasks[i], tasks[j],
                cosine_similarity([domain_embedding[i], domain_embedding[j]]))
            print(
                tasks[i], tasks[j],
                euclidean_distances([domain_embedding[i],
                                     domain_embedding[j]]))
Ejemplo n.º 56
0
def poisson_lambda(x, cdis, beta, alpha, bias=None):
    d = euclidean_distances(x)
    n = int(x.shape[0] / 2)
    # set inter distance to centroid distance
    d[:n, n:] = cdis
    d[n:, :n] = cdis
    if bias is None:
        bias = np.ones(d.shape[0], dtype=float)
    lambda_mat = (beta * d**alpha) * np.outer(bias, bias)
    return lambda_mat.astype(float)
def spanning_tree_length(X):
    """Compute the length of the euclidean MST of X.

    Parameters
    ----------
    X: ndarray, shape=[n_samples, n_features]
    """
    if X.shape[0] < 2:
        return 0
    return minimum_spanning_tree(euclidean_distances(X)).sum()
Ejemplo n.º 58
0
def calculate_costvalue(dists, red_dists):
    """Only for testing"""
    low_dists = euclidean_distances(red_dists)
    n_conf = dists.shape[0]
    costvalue = []
    for i in range(n_conf - 1):
        for j in range(i + 1, n_conf):
            costvalue.append(abs(dists[i][j] - low_dists[i][j]))
    costvalue = sum(costvalue) / len(costvalue)
    return costvalue
Ejemplo n.º 59
0
def find_closest(in_vector, proto_vectors):
    closest = None
    closest_distance = 99999
    for p_v in proto_vectors:
        distance = euclidean_distances(in_vector.reshape(1, 4),
                                       p_v.p_vector.reshape(1, 4))
        if distance < closest_distance:
            closest_distance = distance
            closest = p_v
    return closest
Ejemplo n.º 60
0
 def predict(self, X):
     print('Predict', len(X))
     # Check if fit had been called
     check_is_fitted(self, ['p5p_'])
     # Input validation
     X = check_array(X)
     
     closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
     #print(closest, self.p5p_, self.y_[0 0])
     return self.y_[closest]