예제 #1
0
def test_chi_square_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((10, 4))
    K_add = additive_chi2_kernel(X, Y)
    gamma = 0.1
    K = chi2_kernel(X, Y, gamma=gamma)
    assert K.dtype == np.float
    for i, x in enumerate(X):
        for j, y in enumerate(Y):
            chi2 = -np.sum((x - y)**2 / (x + y))
            chi2_exp = np.exp(gamma * chi2)
            assert_almost_equal(K_add[i, j], chi2)
            assert_almost_equal(K[i, j], chi2_exp)

    # check diagonal is ones for data with itself
    K = chi2_kernel(Y)
    assert_array_equal(np.diag(K), 1)
    # check off-diagonal is < 1 but > 0:
    assert np.all(K > 0)
    assert np.all(K - np.diag(np.diag(K)) < 1)
    # check that float32 is preserved
    X = rng.random_sample((5, 4)).astype(np.float32)
    Y = rng.random_sample((10, 4)).astype(np.float32)
    K = chi2_kernel(X, Y)
    assert K.dtype == np.float32

    # check integer type gets converted,
    # check that zeros are handled
    X = rng.random_sample((10, 4)).astype(np.int32)
    K = chi2_kernel(X, X)
    assert np.isfinite(K).all()
    assert K.dtype == np.float

    # check that kernel of similar things is greater than dissimilar ones
    X = [[.3, .7], [1., 0]]
    Y = [[0, 1], [.9, .1]]
    K = chi2_kernel(X, Y)
    assert K[0, 0] > K[0, 1]
    assert K[1, 1] > K[1, 0]

    # test negative input
    with pytest.raises(ValueError):
        chi2_kernel([[0, -1]])
    with pytest.raises(ValueError):
        chi2_kernel([[0, -1]], [[-1, -1]])
    with pytest.raises(ValueError):
        chi2_kernel([[0, 1]], [[-1, -1]])

    # different n_features in X and Y
    with pytest.raises(ValueError):
        chi2_kernel([[0, 1]], [[.2, .2, .6]])

    # sparse matrices
    with pytest.raises(ValueError):
        chi2_kernel(csr_matrix(X), csr_matrix(Y))
    with pytest.raises(ValueError):
        additive_chi2_kernel(csr_matrix(X), csr_matrix(Y))
예제 #2
0
def tsne_embed(datafile, kernel, n_repeats, seed):
    # datafile = './data/full/features/vgg16_block5_conv3-vlad-32.h5'
    resultsfile = datafile.replace('features', 'tsne')
    
    keys, features = load_representations(datafile)
    labels = []    
    for key in keys:
        if '-' in key:
            # deal with cropped micrographs: key -> Micrograph.id-UL
            m_id, quadrant = key.split('-')
        else:
            m_id = key
        m = db.query(Micrograph).filter(Micrograph.micrograph_id == int(m_id)).one()
        labels.append(m.primary_microconstituent)
    labels = np.array(labels)

    if kernel == 'linear':
        x_pca = PCA(n_components=50).fit_transform(features)
    elif kernel == 'chi2':
        gamma = -1 / np.mean(additive_chi2_kernel(features))

        with warnings.catch_warnings():
            warnings.simplefilter("once", DeprecationWarning)
            x_pca = KernelPCA(n_components=50, kernel=chi2_kernel, gamma=gamma).fit_transform(features)
        
    perplexity = [10, 20, 30, 40, 50, 60]
    for p in perplexity:
        x_tsne = ntsne.best_tsne(x_pca, perplexity=p, theta=0.1, n_repeats=n_repeats)
        stash_tsne_embeddings(resultsfile, keys, x_tsne, p)
예제 #3
0
def tsne_embed(datafile, kernel, n_repeats, seed):
    # datafile = './data/full/features/vgg16_block5_conv3-vlad-32.h5'

    # Make a directory for the t-sne results
    print('working')
    resultsfile = datafile.replace('features', 'tsne')
    try:
        os.makedirs(os.path.dirname(resultsfile))
    except FileExistsError:
        pass

    # Load the features
    keys, features = load_representations(datafile)

    # Run t-SNE
    if kernel == 'linear':
        x_pca = PCA(n_components=50).fit_transform(features)
    elif kernel == 'chi2':
        gamma = -1 / np.mean(additive_chi2_kernel(features))

        with warnings.catch_warnings():
            warnings.simplefilter("once", DeprecationWarning)
            x_pca = KernelPCA(n_components=50, kernel=chi2_kernel,
                              gamma=gamma).fit_transform(features)

    perplexity = [10, 20, 30, 40, 50, 60]
    for p in perplexity:
        x_tsne = ntsne.best_tsne(x_pca,
                                 perplexity=p,
                                 theta=0.1,
                                 n_repeats=n_repeats)
        stash_tsne_embeddings(resultsfile, keys, x_tsne, p)
예제 #4
0
def compute_euristic_sigma_chi2(dataset_full, slice_size=100):
    """
    Given a dataset, return the gamma that should be used (euristically) when using a rbf kernel on this dataset.

    The formula: $\sigma^2 = 1/n^2 * \sum_{i, j}^{n}||x_i - x_j||^2$

    :param dataset: The dataset on which to look for the best sigma
    :return:
    """
    dataset_full = np.reshape(dataset_full, (-1, 1))
    results = []
    if slice_size > dataset_full.shape[0]:
        slice_size = dataset_full.shape[0]
    for i in range(dataset_full.shape[0] // slice_size):
        if (i+1) * slice_size <= dataset_full.shape[0]:
            dataset = dataset_full[i * slice_size: (i+1) * slice_size]
            slice_size_tmp = slice_size
        else:
            dataset = dataset_full[i * slice_size:]
            slice_size_tmp = len(dataset)
        # wall = np.expand_dims(dataset, axis=1)
        # # the drawing of the matrix Y expanded looks like a floor
        # floor = np.expand_dims(dataset, axis=0)
        # numerator = np.square((wall - floor))
        # denominator = wall + floor
        # quotient = numerator / denominator
        # quotient_without_nan = replace_nan(quotient)
        quotient_without_nan = additive_chi2_kernel(dataset)
        results.append(1/slice_size_tmp**2 * np.sum(quotient_without_nan))
        logger.debug("Compute sigma chi2; current mean: {}".format(np.mean(results)))
    return np.mean(results)
예제 #5
0
    def _get_kernel_matrix(self, X1, X2):
        # K is len(X1)-by-len(X2) matrix
        if self._kernel == 'rbf':
            K = pairwise.rbf_kernel(X1, X2, gamma=self._gamma)
        elif self._kernel == 'poly':
            K = pairwise.polynomial_kernel(X1,
                                           X2,
                                           degree=self._degree,
                                           gamma=self._gamma,
                                           coef0=self._coef0)
        elif self._kernel == 'linear':
            K = pairwise.linear_kernel(X1, X2)
        elif self._kernel == 'laplacian':
            K = pairwise.laplacian_kernel(X1, X2, gamma=self._gamma)
        elif self._kernel == 'chi2':
            K = pairwise.chi2_kernel(X1, X2, gamma=self._gamma)
        elif self._kernel == 'additive_chi2':
            K = pairwise.additive_chi2_kernel(X1, X2)
        elif self._kernel == 'sigmoid':
            K = pairwise.sigmoid_kernel(X1,
                                        X2,
                                        gamma=self._gamma,
                                        coef0=self._coef0)
        else:
            print('[Error] Unknown kernel')
            K = None

        return K
예제 #6
0
def approximate_gamma(sample_matrix):
    """ Approximates the width parameter for the gaussian kernel.

        By computing the average distance between all training samples,
        we can approximate the width parameter of the gaussian and eliminate
        the need to optimize it through grid search.
    """
    return np.mean(-additive_chi2_kernel(sample_matrix))
예제 #7
0
def approximate_gamma(sample_matrix):
    """ Approximates the width parameter for the gaussian kernel.

        By computing the average distance between all training samples,
        we can approximate the width parameter of the gaussian and eliminate
        the need to optimize it through grid search.
    """
    return np.mean(-additive_chi2_kernel(sample_matrix))
 def transform(self, X, Y):
     if self.type == 'rbf':
         return rbf_kernel(X, Y, self.gamma)[0]
     elif self.type == 'Chi2':
         return chi2_kernel(X, Y, self.gamma)[0]
     elif self.type == 'AChi2':
         return -additive_chi2_kernel(X, Y)[0]
     elif self.type == 'laplacian':
         return laplacian_kernel(X, Y, self.gamma)[0]
     elif self.type == 'sigmoid':
         return sigmoid_kernel(X, Y, self.gamma, self.coef0)[0]
def additive_chi2(v1, v2):
    """
     k(x, y) = -Sum [(x - y)^2 / (x + y)]
    :param v1:
    :param v2:
    :return:
    """
    v1, v2 = check_pairwise_vector(v1, v2)

    X, Y = v1.reshape(v1.shape[0], -1), v2.reshape(v2.shape[0], -1)
    K = additive_chi2_kernel(X, Y)[0][0]
    return K
예제 #10
0
    def __init__(self, kernel_name='rbf', type='classification'):

        self.kernel_name = kernel_name
        self.type = type
        self.kernel_dict = {
            "rbf": lambda x, y=None: rbf_kernel(x, y),
            "linear": lambda x, y=None: linear_kernel(x, y),
            "add_chi2": lambda x, y=None: additive_chi2_kernel(x, y),
            "chi2": lambda x, y=None: chi2_kernel(x, y),
            "poly": lambda x, y=None: polynomial_kernel(x, y),
            "laplace": lambda x, y=None: laplacian_kernel(x, y)
        }
예제 #11
0
def test_chi_square_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((10, 4))
    K_add = additive_chi2_kernel(X, Y)
    gamma = 0.1
    K = chi2_kernel(X, Y, gamma=gamma)
    assert_equal(K.dtype, np.float)
    for i, x in enumerate(X):
        for j, y in enumerate(Y):
            chi2 = -np.sum((x - y) ** 2 / (x + y))
            chi2_exp = np.exp(gamma * chi2)
            assert_almost_equal(K_add[i, j], chi2)
            assert_almost_equal(K[i, j], chi2_exp)

    # check diagonal is ones for data with itself
    K = chi2_kernel(Y)
    assert_array_equal(np.diag(K), 1)
    # check off-diagonal is < 1 but > 0:
    assert np.all(K > 0)
    assert np.all(K - np.diag(np.diag(K)) < 1)
    # check that float32 is preserved
    X = rng.random_sample((5, 4)).astype(np.float32)
    Y = rng.random_sample((10, 4)).astype(np.float32)
    K = chi2_kernel(X, Y)
    assert_equal(K.dtype, np.float32)

    # check integer type gets converted,
    # check that zeros are handled
    X = rng.random_sample((10, 4)).astype(np.int32)
    K = chi2_kernel(X, X)
    assert np.isfinite(K).all()
    assert_equal(K.dtype, np.float)

    # check that kernel of similar things is greater than dissimilar ones
    X = [[.3, .7], [1., 0]]
    Y = [[0, 1], [.9, .1]]
    K = chi2_kernel(X, Y)
    assert_greater(K[0, 0], K[0, 1])
    assert_greater(K[1, 1], K[1, 0])

    # test negative input
    assert_raises(ValueError, chi2_kernel, [[0, -1]])
    assert_raises(ValueError, chi2_kernel, [[0, -1]], [[-1, -1]])
    assert_raises(ValueError, chi2_kernel, [[0, 1]], [[-1, -1]])

    # different n_features in X and Y
    assert_raises(ValueError, chi2_kernel, [[0, 1]], [[.2, .2, .6]])

    # sparse matrices
    assert_raises(ValueError, chi2_kernel, csr_matrix(X), csr_matrix(Y))
    assert_raises(ValueError, additive_chi2_kernel,
                  csr_matrix(X), csr_matrix(Y))
예제 #12
0
def calculateMultipleKernel(x, y):
    theta = random.sample(range(1, 47), 46)  # given a random theta for now

    # Convert our 2d arrays to numpy arrays
    x = np.array(x)
    y = np.array(y)

    # Reshape the array-like input vectors since we only have one sample
    x = x.reshape(1, -1)
    y = y.reshape(1, -1)

    # Variables to aggregate the kernel result
    kernelResult = 0
    index = 0

    for i in range(0, 3):
        kernelResult += theta[index] * additive_chi2_kernel(x, y)
        index += 1

    for i in range(0, 3):
        kernelResult += theta[index] * chi2_kernel(x, y, theta[index + 1])
        index += 2

    for i in range(0, 3):
        kernelResult += theta[index] * cosine_similarity(x, y)
        index += 1

    for i in range(0, 3):
        kernelResult += theta[index] * linear_kernel(x, y)
        index += 1

    for i in range(0, 3):
        kernelResult += theta[index] * polynomial_kernel(
            x, y, theta[index + 1], theta[index + 2], theta[index + 3])
        index += 4

    for i in range(0, 3):
        kernelResult += theta[index] * rbf_kernel(x, y, theta[index + 1])
        index += 2

    for i in range(0, 3):
        kernelResult += theta[index] * laplacian_kernel(x, y, theta[index + 1])
        index += 2

    for i in range(0, 3):
        kernelResult += theta[index] * sigmoid_kernel(x, y, theta[index + 1])
        index += 2

    return kernelResult
예제 #13
0
def calculateMultipleKernel(x, y):
    theta = random.sample(range(1,47),46) # given a random theta for now

    # Convert our 2d arrays to numpy arrays
    x = np.array(x)
    y = np.array(y)
    
    # Reshape the array-like input vectors since we only have one sample
    x = x.reshape(1,-1)
    y = y.reshape(1,-1)
    
    # Variables to aggregate the kernel result
    kernelResult = 0;
    index = 0; 
    
    for i in range(0,3):
        kernelResult += theta[index] * additive_chi2_kernel(x,y)
        index += 1
        
    for i in range(0,3):
        kernelResult += theta[index] * chi2_kernel(x,y,theta[index+1])
        index += 2
    
    for i in range(0,3):
        kernelResult += theta[index] * cosine_similarity(x,y)
        index += 1
    
    for i in range(0,3):
        kernelResult += theta[index] * linear_kernel(x,y)
        index += 1
    
    for i in range(0,3):
        kernelResult += theta[index] * polynomial_kernel(
            x,y,theta[index+1],theta[index+2], theta[index+3])
        index += 4
        
    for i in range(0,3):
        kernelResult += theta[index] * rbf_kernel(x,y,theta[index+1])
        index += 2
        
    for i in range(0,3):
        kernelResult += theta[index] * laplacian_kernel(x,y,theta[index+1])
        index += 2
    
    for i in range(0,3):
        kernelResult += theta[index] * sigmoid_kernel(x,y,theta[index+1])
        index += 2
        
    return kernelResult
예제 #14
0
def cv_loop_chi2(labels, X, cv, C=1, n_repeats=1):
    tscore, vscore = [], []
    for repeat in range(n_repeats):
        for train, test in cv.split(X, labels):
            # follow Zhang et al (2007) in setting gamma
            gamma = -1 / np.mean(additive_chi2_kernel(X[train]))
            clf = SVC(kernel=chi2_kernel, gamma=gamma, C=C,
                      class_weight='balanced', decision_function_shape='ovr', cache_size=2048)
    
            clf.fit(X[train], labels[train])
            tscore.append(clf.score(X[train], labels[train]))
            vscore.append(clf.score(X[test], labels[test]))

    print('{} +/- {}'.format(np.mean(vscore), np.std(vscore, ddof=1)))
    return np.mean(vscore), np.std(vscore, ddof=1), np.mean(tscore), np.std(tscore, ddof=1)
예제 #15
0
 def nn_ind(self, color_hist, num):
     """
     Exact nearest neighbor seach through exhaustive comparison.
     """
     if self.distance_metric == 'manhattan':
         dists = manhattan_distances(color_hist, self.hists_reduced)
     elif self.distance_metric == 'euclidean':
         dists = euclidean_distances(color_hist, self.hists_reduced, squared=True)
     elif self.distance_metric == 'chi_square':
         dists = -additive_chi2_kernel(color_hist, self.hists_reduced)
     
     dists = dists.flatten()
     nn_ind = np.argsort(dists).flatten()[:num]
     nn_dists = dists[nn_ind]
     
     return nn_ind, nn_dists
예제 #16
0
    def predict(self, samples):
        # transform data
        samples = np.array(samples)
        samples = samples.reshape(1, -1)
        sample = additive_chi2_kernel(samples, self.npsamples)
        #samples = [chi2_kernel(s.reshape(1, -1)) for s in samples]

        predictions = self.__model.decision_function(sample)
        predictionValues = self.__model.predict(sample)
        # calculate confidences
        confidences = []

        for p in predictions:
            confidences.append((p * 1))

        return confidences
예제 #17
0
 def nn_ind(self, color_hist, num):
     """
     Exact nearest neighbor seach through exhaustive comparison.
     """
     if self.distance_metric == 'manhattan':
         dists = manhattan_distances(color_hist, self.hists_reduced)
     elif self.distance_metric == 'euclidean':
         dists = euclidean_distances(color_hist, self.hists_reduced, squared=True)
     elif self.distance_metric == 'chi_square':
         dists = -additive_chi2_kernel(color_hist, self.hists_reduced)
     
     dists = dists.flatten()
     nn_ind = np.argsort(dists).flatten()[:num]
     nn_dists = dists[nn_ind]
     
     return nn_ind, nn_dists
예제 #18
0
def nn(feat, feats, distance='euclidean', K=-1):
    """
    Exact nearest neighbor seach through exhaustive comparison.
    """
    if distance == 'manhattan':
        dists = metrics.manhattan_distances(feat, feats)
    elif distance == 'euclidean':
        dists = metrics.euclidean_distances(feat, feats, squared=True)
    elif distance == 'chi_square':
        dists = -metrics.additive_chi2_kernel(feat, feats)

    dists = dists.flatten()
    if K > 0:
        nn_ind = bn.argpartsort(dists, K).flatten()[:K]
        nn_ind = nn_ind[np.argsort(dists[nn_ind])]
    else:
        nn_ind = np.argsort(dists)
    nn_dist = dists[nn_ind]

    return nn_ind, nn_dist
예제 #19
0
def get_kernel_matrix(X1, X2=None, kernel='rbf',gamma = 1, degree = 3, coef0=1):
    #Obtain N1xN2 kernel matrix from N1xM and N2xM data matrices
    if kernel == 'rbf':
        K = pairwise.rbf_kernel(X1,X2,gamma = gamma);
    elif kernel == 'poly':
        K = pairwise.polynomial_kernel(X1,X2,degree = degree, gamma = gamma,
                                       coef0 = coef0);
    elif kernel == 'linear':
        K = pairwise.linear_kernel(X1,X2);
    elif kernel == 'laplacian':
        K = pairwise.laplacian_kernel(X1,X2,gamma = gamma);
    elif kernel == 'chi2':
        K = pairwise.chi2_kernel(X1,X2,gamma = gamma);
    elif kernel == 'additive_chi2':
        K = pairwise.additive_chi2_kernel(X1,X2);
    elif kernel == 'sigmoid':
        K = pairwise.sigmoid_kernel(X1,X2,gamma = gamma,coef0 = coef0);
    else:
        print('[Error] Unknown kernel');
        K = None;
    return K;
예제 #20
0
    def train(self, samples, labels):
        if samples[0].shape[0] == 1L:
            npsamples = np.concatenate(samples)
        else:
            npsamples = np.array(samples)

        # make sure that npsamples have the correct shape
        npsamples = npsamples.reshape((-1, npsamples.shape[1]))

        # convert label list to numpy array
        nplabels = np.float32(labels)
        self.npsamples = npsamples
        try:
            # map to Chi^2 kernel
            k = additive_chi2_kernel(npsamples)
            self.__model = SVC(kernel="precomputed").fit(k, labels)
        except:
            logging.exception("Could not train {0} samples.".format(
                len(labels)))
            print "\n\ndtype:", npsamples.dtype
            raw_input("Press any key to continue")
            print npsamples
예제 #21
0
def nn(feat, feats, distance='euclidean', K=-1):
    """
    Exact nearest neighbor seach through exhaustive comparison.
    """
    if distance == 'manhattan':
        dists = metrics.manhattan_distances(feat, feats)
    elif distance == 'euclidean':
        dists = metrics.euclidean_distances(feat, feats, squared=True)
    elif distance == 'chi_square':
        dists = -metrics.additive_chi2_kernel(feat, feats)
    elif distance == 'dot':
        dists = -np.dot(feat, feats)

    dists = dists.flatten()
    if K > 0:
        nn_ind = bn.argpartsort(dists, K).flatten()[:K]
        nn_ind = nn_ind[np.argsort(dists[nn_ind])]
    else:
        nn_ind = np.argsort(dists)
    nn_dist = dists[nn_ind]

    return nn_ind, nn_dist
예제 #22
0
    def _nn(self, image_id, feature, distance='cosine', K=-1):
        """
        Exact nearest neighbor seach through exhaustive comparison.
        """
        # S = self.S[feature]
        feats = self.features[feature]
        feat = feats[self.index.index(image_id)]

        if distance == 'manhattan':
            dists = metrics.manhattan_distances(feat, feats)

        elif distance == 'euclidean':
            dists = metrics.euclidean_distances(feat, feats, squared=True)

        elif distance == 'chi_square':
            dists = -metrics.additive_chi2_kernel(feat, feats)

        elif distance == 'dot':
            dists = -np.dot(feats, feat)

        elif distance == 'cosine':
            feats_norm = self.features_norm[feature]
            dists = -np.dot(feats, feat) / feats_norm / np.linalg.norm(feat, 2)

        elif distance == 'projected':
            feats = self.features_proj[feature]
            feat = feats[self.index.index(image_id)]
            dists = sklearn.utils.extmath.row_norms(feats - feat)

        dists = dists.flatten()
        if K > 0:
            nn_ind = np.argsort(dists).flatten()[:K]
        else:
            nn_ind = np.argsort(dists)
        nn_dist = dists[nn_ind]

        return nn_ind, nn_dist
예제 #23
0
def tsne_embed(datafile, kernel, method, n_repeats, seed):
    # datafile = './data/full/features/vgg16_block5_conv3-vlad-32.h5'
    resultsfile = datafile.replace('features', 'embed')

    keys, features = load_representations(datafile)
    labels = []
    for key in keys:
        if '-' in key:
            # deal with cropped micrographs: key -> Micrograph.id-UL
            m_id, quadrant = key.split('-')
        else:
            m_id = key
        m = db.query(Micrograph).filter(
            Micrograph.micrograph_id == int(m_id)).one()
        labels.append(m.primary_microconstituent)
    labels = np.array(labels)

    if kernel == 'linear':
        x_pca = PCA(n_components=50).fit_transform(features)
    elif kernel == 'chi2':
        gamma = -1 / np.mean(additive_chi2_kernel(features))

        with warnings.catch_warnings():
            warnings.simplefilter("once", DeprecationWarning)
            x_pca = KernelPCA(n_components=50, kernel=chi2_kernel,
                              gamma=gamma).fit_transform(features)

    r = {
        'PCA': PCA(n_components=2),
        'MDS': MDS(n_components=2),
        'LLE': LocallyLinearEmbedding(n_components=2, n_neighbors=10),
        'Isomap': Isomap(n_components=2),
        'SpectralEmbedding': SpectralEmbedding(n_components=2)
    }

    x_embedding = r[method].fit_transform(x_pca)
    stash_embeddings(resultsfile, keys, x_embedding, method)
예제 #24
0
    def _nn(self, image_id, feature, distance='cosine', K=-1):
        """
        Exact nearest neighbor seach through exhaustive comparison.
        """
        # S = self.S[feature]
        feats = self.features[feature]
        feat = feats[self.index.index(image_id)]

        if distance == 'manhattan':
            dists = metrics.manhattan_distances(feat, feats)

        elif distance == 'euclidean':
            dists = metrics.euclidean_distances(feat, feats, squared=True)

        elif distance == 'chi_square':
            dists = -metrics.additive_chi2_kernel(feat, feats)

        elif distance == 'dot':
            dists = -np.dot(feats, feat)

        elif distance == 'cosine':
            feats_norm = self.features_norm[feature]
            dists = -np.dot(feats, feat) / feats_norm / np.linalg.norm(feat, 2)

        elif distance == 'projected':
            feats = self.features_proj[feature]
            feat = feats[self.index.index(image_id)]
            dists = sklearn.utils.extmath.row_norms(feats - feat)

        dists = dists.flatten()
        if K > 0:
            nn_ind = np.argsort(dists).flatten()[:K]
        else:
            nn_ind = np.argsort(dists)
        nn_dist = dists[nn_ind]

        return nn_ind, nn_dist
예제 #25
0
def tsne_embed(datafile, kernel, n_repeats, seed):
    # datafile = './data/full/features/vgg16_block5_conv3-vlad-32.h5'
    print('working')
    resultsfile = datafile.replace('features', 'tsne')

    keys, features = load_representations(datafile)

    if kernel == 'linear':
        x_pca = PCA(n_components=50).fit_transform(features)
    elif kernel == 'chi2':
        gamma = -1 / np.mean(additive_chi2_kernel(features))

        with warnings.catch_warnings():
            warnings.simplefilter("once", DeprecationWarning)
            x_pca = KernelPCA(n_components=50, kernel=chi2_kernel,
                              gamma=gamma).fit_transform(features)

    perplexity = [10, 20, 30, 40, 50, 60]
    for p in perplexity:
        x_tsne = ntsne.best_tsne(x_pca,
                                 perplexity=p,
                                 theta=0.1,
                                 n_repeats=n_repeats)
        stash_tsne_embeddings(resultsfile, keys, x_tsne, p)
예제 #26
0
def evaluate_metric(X_query,
                    y_query,
                    X_gallery,
                    y_gallery,
                    metric='euclidian',
                    parameters=None):

    rank_accuracies = []
    AP = []
    I, K = X_query.shape
    u = X_query.astype(np.float64)
    v = X_gallery.astype(np.float64)
    # u = X_query
    # v = X_gallery
    y_query = y_query.flatten()
    y_gallery = y_gallery.flatten()

    for query, y_q in zip(range(0, K), y_query):
        q_g_dists = []
        y_valid = []
        for gallery, y_g in zip(range(0, K), y_gallery):
            if query == gallery:
                continue
            else:
                if metric == 'euclidian':
                    dist = distance.euclidean(u[:, query], v[:, gallery])
                elif metric == 'sqeuclidean':
                    dist = distance.sqeuclidean(u[:, query], v[:, gallery])
                elif metric == 'seuclidean':
                    dist = distance.seuclidean(u[:, query], v[:, gallery])
                elif metric == 'minkowski':
                    dist = distance.minkowski(u[:, query], v[:, gallery],
                                              parameters)
                elif metric == 'chebyshev':
                    dist = distance.chebyshev(u[:, query], v[:, gallery])
                elif metric == 'chi2':
                    dist = -pairwise.additive_chi2_kernel(
                        u[:, query].reshape(1, -1), v[:, gallery].reshape(
                            1, -1))[0][0]
                elif metric == 'braycurtis':
                    dist = distance.braycurtis(u[:, query], v[:, gallery])
                elif metric == 'canberra':
                    dist = distance.canberra(u[:, query], v[:, gallery])
                elif metric == 'cosine':
                    dist = distance.cosine(u[:, query], v[:, gallery])
                elif metric == 'correlation':
                    dist = distance.correlation(u[:, query], v[:, gallery])
                elif metric == 'mahalanobis':
                    dist = distance.mahalanobis(u[:, query], v[:, gallery],
                                                parameters)
                else:
                    raise NameError('Specified metric not supported')
                q_g_dists.append(dist)
                y_valid.append(y_g)

        tot_label_occur = y_valid.count(y_q)

        q_g_dists = np.array(q_g_dists)
        y_valid = np.array(y_valid)

        _indexes = np.argsort(q_g_dists)

        # Sorted distances and labels
        q_g_dists, y_valid = q_g_dists[_indexes], y_valid[_indexes]

        AP_, rank_A = get_acc_score(y_valid, y_q, tot_label_occur)

        AP.append(AP_)

        rank_accuracies.append(rank_A)

    rank_accuracies = np.array(rank_accuracies)

    total = rank_accuracies.shape[0]
    rank_accuracies = rank_accuracies.sum(axis=0)
    rank_accuracies = np.divide(rank_accuracies, total)

    i = 0
    print('Accuracies by Rank:')
    while i < rank_accuracies.shape[0]:
        print('Rank ', i + 1, ' = %.2f%%' % (rank_accuracies[i] * 100), '\t',
              'Rank ', i + 2, ' = %.2f%%' % (rank_accuracies[i + 1] * 100),
              '\t', 'Rank ', i + 3,
              ' = %.2f%%' % (rank_accuracies[i + 2] * 100), '\t', 'Rank ',
              i + 4, ' = %.2f%%' % (rank_accuracies[i + 3] * 100), '\t',
              'Rank ', i + 5, ' = %.2f%%' % (rank_accuracies[i + 4] * 100))
        i = i + 5

    AP = np.array(AP)

    mAP = AP.sum() / AP.shape[0]
    print('mAP = %.2f%%' % (mAP * 100))

    return rank_accuracies, mAP
예제 #27
0
        print 'decay coefficient = ' + str(decay_coefficient)

        # initialise kernel matrix
        K = np.zeros((n, n))

        # 100 iterations
        # calculate chi-sq distance between martrices raised to the kth power
        # weight with decay and accumulate for a weighted sum of chi-sq distances
        for k in range(1, 101):

            decay = decay_coefficient**k
            edge_data_exp = np.apply_along_axis(
                lambda x: update_matrix_row(x, k), 1, edge_data_norm)
            #K_update = decay * chi2_kernel(edge_data_exp, gamma=gamma)
            # print K_update[:5, :5]
            K_update = decay * additive_chi2_kernel(edge_data_exp)
            #print K_update[:5, :5]
            #print decay
            #print K_update[:5, :5]
            K = K + K_update

        # convert to Gaussian kernel
        print K[:5, :5]
        K = np.exp(gamma * K)
        print K[:5, :5]

        # attach the labels and save
        K = np.hstack((np.transpose(labels), K))
        np.savetxt(kernel_dir + 'K_rw_chisq_ ' + str(i) + '_' + str(j) +
                   '.csv',
                   K,
def evaluate_metric(X_query,
                    camId_query,
                    y_query,
                    X_gallery,
                    camId_gallery,
                    y_gallery,
                    metric='euclidian',
                    parameters=None):

    rank_accuracies = []
    AP = []

    # Break condition for testing
    #q = 0

    for query, camId_q, y_q in zip(X_query, camId_query, y_query):
        q_g_dists = []
        y_valid = []
        for gallery, camId_g, y_g in zip(X_gallery, camId_gallery, y_gallery):
            if ((camId_q == camId_g) and (y_q == y_g)):
                continue
            else:
                if metric == 'euclidian':
                    dist = distance.euclidean(query, gallery)
                elif metric == 'sqeuclidean':
                    dist = distance.sqeuclidean(query, gallery)
                elif metric == 'seuclidean':
                    dist = distance.seuclidean(query, gallery)
                elif metric == 'minkowski':
                    dist = distance.minkowski(query, gallery, parameters)
                elif metric == 'chebyshev':
                    dist = distance.chebyshev(query, gallery)
                elif metric == 'chi2':
                    dist = -pairwise.additive_chi2_kernel(
                        query.reshape(1, -1), gallery.reshape(1, -1))[0][0]
                elif metric == 'braycurtis':
                    dist = distance.braycurtis(query, gallery)
                elif metric == 'canberra':
                    dist = distance.canberra(query, gallery)
                elif metric == 'cosine':
                    dist = distance.cosine(query, gallery)
                elif metric == 'correlation':
                    dist = distance.correlation(query, gallery)
                elif metric == 'mahalanobis':
                    dist = distance.mahalanobis(query, gallery, parameters)
                else:
                    raise NameError('Specified metric not supported')
                q_g_dists.append(dist)
                y_valid.append(y_g)

        tot_label_occur = y_valid.count(y_q)

        q_g_dists = np.array(q_g_dists)
        y_valid = np.array(y_valid)

        _indexes = np.argsort(q_g_dists)

        # Sorted distances and labels
        q_g_dists, y_valid = q_g_dists[_indexes], y_valid[_indexes]

        AP_, rank_A = get_acc_score(y_valid, y_q, tot_label_occur)

        AP.append(AP_)

        rank_accuracies.append(rank_A)

        #if q  > 5:
        #    break
        #q = q+1

    rank_accuracies = np.array(rank_accuracies)

    total = rank_accuracies.shape[0]
    rank_accuracies = rank_accuracies.sum(axis=0)
    rank_accuracies = np.divide(rank_accuracies, total)

    i = 0
    print('Accuracies by Rank:')
    while i < rank_accuracies.shape[0]:
        print('Rank ', i + 1, ' = %.2f%%' % (rank_accuracies[i] * 100), '\t',
              'Rank ', i + 2, ' = %.2f%%' % (rank_accuracies[i + 1] * 100),
              '\t', 'Rank ', i + 3,
              ' = %.2f%%' % (rank_accuracies[i + 2] * 100), '\t', 'Rank ',
              i + 4, ' = %.2f%%' % (rank_accuracies[i + 3] * 100), '\t',
              'Rank ', i + 5, ' = %.2f%%' % (rank_accuracies[i + 4] * 100))
        i = i + 5

    AP = np.array(AP)

    mAP = AP.sum() / AP.shape[0]
    print('mAP = %.2f%%' % (mAP * 100))

    return rank_accuracies, mAP