def gram(self, X, Xt=None): """ Returns the dense Gram matrix evaluated over the datapoints. :param X: n-by-d data matrix :param Xt: optional t-by-d test matrix Returns: ------- n-by-n Gram matrix over X (if Xt is not provided) t-by-n Gram matrix between Xt and X if X is provided """ # TODO the test, and this function, should work for all matrix types. if X.shape[1] != self._d: raise ValueError("X must have vectors of dimension d") nu = self._nu l = self._l if Xt is None: D = euclidean(X, X) else: if Xt.shape[1] != self._d: raise ValueError("Xt must have vectors of dimension d") D = euclidean(X, Xt); Y = scipy.sqrt(2.0 * nu * D) / l K = 2.0 ** (1 - nu) / scipy.special.gamma(nu) * Y ** nu * scipy.special.kv(nu, Y) return scipy.real(K)
def gram(self, X, Xt=None): """ Returns the dense Gram matrix evaluated over the datapoints. :param X: n-by-d data matrix :param Xt: optional t-by-d test matrix Returns: ------- n-by-n Gram matrix over X (if Xt is not provided) t-by-n Gram matrix between Xt and X if X is provided """ # TODO the test, and this function, should work for all matrix types. if X.shape[1] != self._d: raise ValueError("X must have vectors of dimension d") sigma = self._sigma if Xt is None: K = numpy.exp(-euclidean(X, X)/(2*sigma**2)) else: if Xt.shape[1] != self._d: raise ValueError("Xt must have vectors of dimension d") K = numpy.exp(-euclidean(X, Xt)/(2*sigma**2)) return K
def __init__(self, ListOfObjects, algorithm=None, eps=800): """Parameters: ListOfObjects algorithm """ # first the centroids: A_i if algorithm == "optics": self.list_centroids, SetOfClusters = centroids(ListOfObjects, algorithm, eps) else: self.list_centroids, SetOfClusters = centroids(ListOfObjects) # second the S value or internal dispertion of each centroid self.S = [] sum_acumulated = 0 for key in SetOfClusters.keys(): for vector in SetOfClusters[key]: v1 = vector.Coords v2 = self.list_centroids[key - 1] result = np.array((v1[0] - v2[0], v1[1] - v2[1])) result = np.power(result, 2) result = np.sum(result) sum_acumulated += result self.S.append( np.sqrt( np.true_divide(sum_acumulated, len(SetOfClusters[key])))) # third the distance beetween every cluster NumberOfClusters = len(self.list_centroids) self.M = np.zeros([NumberOfClusters, NumberOfClusters]) for i in range(NumberOfClusters - 1): for j in range(i + 1, NumberOfClusters): self.M[i, j] = dist.euclidean(self.list_centroids[i], self.list_centroids[j]) # fourth the ratio beetween internal dispersion of each centroid and # between centroids. self.R = np.zeros([NumberOfClusters, NumberOfClusters]) for i in range(NumberOfClusters - 1): for j in range(i + 1, NumberOfClusters): self.R[i, j] = np.true_divide(self.S[i] + self.S[j], self.M[i, j]) self.R[j, i] = self.R[i, j] # the Davies-Bouldien value. self.value = 0 for i in range(NumberOfClusters): self.value += np.max(self.R[i, :]) self.value = np.true_divide(self.value, NumberOfClusters)
def __init__(self, ListOfObjects, algorithm=None, eps=800): """Parameters: ListOfObjects algorithm """ # first the centroids: A_i if algorithm == "optics": self.list_centroids, SetOfClusters = centroids( ListOfObjects, algorithm, eps) else: self.list_centroids, SetOfClusters = centroids(ListOfObjects) # second the S value or internal dispertion of each centroid self.S = [] sum_acumulated = 0 for key in SetOfClusters.keys(): for vector in SetOfClusters[key]: v1 = vector.Coords v2 = self.list_centroids[key - 1] result = np.array((v1[0] - v2[0], v1[1] - v2[1])) result = np.power(result, 2) result = np.sum(result) sum_acumulated += result self.S.append( np.sqrt(np.true_divide(sum_acumulated, len(SetOfClusters[key])))) # third the distance beetween every cluster NumberOfClusters = len(self.list_centroids) self.M = np.zeros([NumberOfClusters, NumberOfClusters]) for i in range(NumberOfClusters - 1): for j in range(i + 1, NumberOfClusters): self.M[i, j] = dist.euclidean(self.list_centroids[i], self.list_centroids[j]) # fourth the ratio beetween internal dispersion of each centroid and # between centroids. self.R = np.zeros([NumberOfClusters, NumberOfClusters]) for i in range(NumberOfClusters - 1): for j in range(i + 1, NumberOfClusters): self.R[i, j] = np.true_divide(self.S[i] + self.S[j], self.M[i, j]) self.R[j, i] = self.R[i, j] # the Davies-Bouldien value. self.value = 0 for i in range(NumberOfClusters): self.value += np.max(self.R[i, :]) self.value = np.true_divide(self.value, NumberOfClusters)
def __init__(self, sentence1, sentence2): self.ham = hamming(sentence1, sentence2) self.euc = euclidean(sentence1, sentence2) self.man = manhattan(sentence1, sentence2) self.w2v = similarity(sentence1, sentence2) self.res = [self.ham, self.euc, self.man, self.w2v]