Exemple #1
0
 def setUp(self):
     self.tensor = DictTensor(2)
     self.tensor.update(
         nested_list_to_dict(numpy.random.random_sample((10, 12))))
     self.normalized_tensor = self.tensor.normalized()
     self.svd = self.normalized_tensor.svd(k=3)
     self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v
Exemple #2
0
 def setUp(self):
     self.tensor = DictTensor(2)
     # Note: this command actually puts 20 values in tensor!
     self.tensor.update(nested_list_to_dict(svd_2d_test_matrix))
     self.svd = self.tensor.svd(k=3)
     self.incremental = self.tensor.incremental_svd(k=3, niter=200)
     self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v
Exemple #3
0
 def setUp(self):
     self.tensor = DictTensor(2)
     # Note: this command actually puts 20 values in tensor!
     self.tensor.update(nested_list_to_dict(svd_2d_test_matrix))
     self.svd = self.tensor.svd(k=3,
                                offset_for_row=offset_for_row,
                                offset_for_col=offset_for_col)
     self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v
Exemple #4
0
    def testAdd(self):
        t1 = DictTensor(2)
        t2 = DictTensor(2)
        t1[0, 0] = 1
        t1[1, 1] = 1
        t1[1, 0] = 2
        t2[2, 1] = 4
        t2[1, 0] = 5

        t3 = t1 + t2
        assertTensorEqual(t3, [[1, None], [7, 1], [None, 4]])
Exemple #5
0
class SVD2DTest(unittest.TestCase):
    def setUp(self):
        self.tensor = DictTensor(2)
        # Note: this command actually puts 20 values in tensor!
        self.tensor.update(nested_list_to_dict(svd_2d_test_matrix))
        self.svd = self.tensor.svd(k=3, offset_for_row=offset_for_row, offset_for_col=offset_for_col)
        self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v


    def test_decomposition(self):
        self.assertEqual(self.u.shape[0], self.tensor.shape[0])
        self.assertEqual(len(self.svals), self.u.shape[1])
        self.assertEqual(len(self.svals), self.v.shape[1])
        self.assertEqual(self.v.shape[0], self.tensor.shape[1])

        assertTensorEqual(self.u,
                               [[0, 0, 1],
                                [0, -1, 0],
                                [0, 0, 0],
                                [-1, 0, 0]], abs=True)

        assertTensorEqual(self.v,
                               [[0, 0, sqrt(.2)],
                                [-1, 0, 0],
                                [0, -1, 0],
                                [0, 0, 0],
                                [0, 0, sqrt(.8)]], abs=True)

        assertTensorEqual(self.svals,
                               [4, 3, sqrt(5)])

    def test_reconstructed(self):
        assertTensorEqual(self.svd.reconstructed,
                               [[1, 0, 0, 0, 2],
                                [0, 0, 3, 0, 0],
                                [0, 0, 0, 0, 0],
                                [0, 4, 0, 0, 0]])
        assertTensorEqual(self.svd.reconstructed[1,:],
                                [0, 0, 3, 0, 0])
        assertTensorEqual(self.svd.reconstructed[:,2],
                               [0, 3, 0, 0])

    def test_orthonormality(self):
        identity = [[1, 0, 0],
                     [0, 1, 0],
                     [0, 0, 1]]
        assertTensorEqual(self.u.T * self.u,
                               identity)

        assertTensorEqual(self.v.T * self.v,
                               identity)
Exemple #6
0
 def setUp(self):
     self.tensor = DictTensor(2)
     # Note: this command actually puts 20 values in tensor!
     self.tensor.update(nested_list_to_dict(svd_2d_test_matrix))
     self.svd = self.tensor.svd(k=3)
     self.incremental = self.tensor.incremental_svd(k=3, niter=200)
     self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v
Exemple #7
0
 def load(cls, filebase):
     tensor = DictTensor.load(filebase)
     try:
         tensor = NormalizedView.load(filebase, tensor)
     except IOError:
         pass
     return super(SparseLabeledTensor,cls).load(filebase, tensor)
Exemple #8
0
    def test_transposed(self):
        '''Run the same testcase, but with the matrix transposed.'''
        tensor = DictTensor(2)
        # Consider a document containing 100 words wherein the word cow appears 3 times.
        # [specifically, let there be a document where 'cow' appears 3 times
        #  and 'moo' appears 97 times]
        doc = 0
        cow = 1
        moo = 2
        tensor[doc, cow] = 3
        tensor[doc, moo] = 97
        # Following the previously defined formulas, the term frequency (TF) for cow is then 0.03 (3 / 100).
        tfidf = TfIdfView(
            tensor,
            transposed=True)  # (can't create it earlier b/c it's read-only)
        self.assertEqual(tfidf.counts_for_document[doc], 100)
        self.assertAlmostEqual(tfidf.tf(cow, doc), 0.03)

        # Now, assume we have 10 million documents and cow appears in one thousand of these.
        #  [specifically, let 'cow' appear in documents 0 and 10,000,000-1000+1 till 10,000,000
        for doc in xrange(10000000 - 1000 + 1, 10000000):
            tensor[doc, cow] = 1

        # Then, the inverse document frequency is calculated as ln(10 000 000 / 1 000) = 9.21.
        tfidf = TfIdfView(
            tensor,
            transposed=True)  # (have to update after adding the other docs)
        self.assertEqual(tfidf.num_documents, 10000000)
        self.assertEqual(tfidf.num_docs_that_contain_term[cow], 1000)
        self.assertAlmostEqual(tfidf.idf(cow), 9.21, 2)

        # The TF-IDF score is the product of these quantities: 0.03 * 9.21 = 0.28.
        score = tfidf[0, cow]
        self.assertEqual(len(getattr(score, 'shape', ())), 0)
        self.assertAlmostEqual(score, 0.28, 2)
Exemple #9
0
def weight_feature_vector(vec, weight_dct, default_weight=0.0):
    '''
    Weights a feature vector by relation.

    vec: a feature vector (e.g., a slice of a reconstructed tensor)

    weight_dct: a mapping from (side, relation) tuples to weights,
    where side is 'left' or 'right'.

    default_weight: the weight to give entries that are not specified.

    Example:
    >>> from csc.conceptnet4.analogyspace import conceptnet_2d_from_db
    >>> t = conceptnet_2d_from_db('en')
    >>> svd = t.svd()
    >>> baseball = svd.reconstructed['baseball',:]
    >>> weights = {}
    >>> weights['right', 'IsA'] = 1.0
    >>> weights['right', 'AtLocation'] = 0.8
    >>> weight_feature_vector(baseball, weights).top_items()
    '''
    if vec.ndim != 1:
        raise TypeError('Feature vectors can only have one dimension')

    res = LabeledView(DictTensor(ndim=1), label_lists=vec.label_lists())
    for k, v in vec.iteritems():
        res[k] = v*weight_dct.get(k[0][:2], default_weight)
    return res
Exemple #10
0
    def svd(self, k=50, normalized=True):
        '''Run an SVD on this unfolding. Compacts, runs, and returns an
        SVD2DResults.'''
        # Set up a LabeledView to map column indices from unfolded products
        # to unique indices.
        col_indices = OrderedSet()
        compact = LabeledView(DictTensor(2), [IdentitySet(0), col_indices])
        self.compact_to(compact)

        if normalized:
            compact = compact.normalized(mode=0)

        svd = compact.svd(k)

        # Wrap the output so that the labeling all works out.
        if hasattr(self.tensor, '_labels'):
            # Case for labeled view beneath
            # FIXME: try not to rely on private vars.
            # TODO: it would be nice to factor this in such a way that we
            #  didn't have to worry about the labeling case here.
            u = LabeledView(svd.u, [self.tensor._labels[self.dim], None])
            v = LabeledView(svd.v, [
                UnfoldedSet.from_unfolding(self.dim, self.tensor.label_sets()),
                None
            ])
        else:
            u = svd.u
            v = LabeledView(svd.v, [
                UnfoldedSet.from_unfolding(
                    self.dim, [IdentitySet(dim)
                               for dim in self.tensor.shape]), None
            ])

        from csc.divisi.svd import SVD2DResults
        return SVD2DResults(u, v, svd.svals)
Exemple #11
0
 def load(cls, filebase):
     tensor = DictTensor.load(filebase)
     try:
         tensor = NormalizedView.load(filebase, tensor)
     except IOError:
         pass
     return super(SparseLabeledTensor, cls).load(filebase, tensor)
Exemple #12
0
 def setUp(self):
     self.tensor = DictTensor(2)
     self.tensor.update(nested_list_to_dict(
             numpy.random.random_sample((10, 12))))
     self.normalized_tensor = self.tensor.normalized()
     self.svd = self.normalized_tensor.svd(k=3)
     self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v
Exemple #13
0
 def __init__(self, *a, **kw):
     if 'ndim' in kw:
         ndim = kw.pop('ndim')
         data = DictTensor(ndim)
         label_lists = [OrderedSet() for i in xrange(ndim)]
         LabeledView.__init__(self, data, label_lists, *a, **kw)
     else:
         LabeledView.__init__(self, *a, **kw)
     self._slice_cache = {}
Exemple #14
0
class UnfoldedSparseTensorTest(unittest.TestCase):
    def setUp(self):
        self.raw = DictTensor(3)
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.raw[x1, x2, x3] = x1 * 100 + x2 * 10 + x3

    def test_unfold0(self):
        uf = self.raw.unfolded(0)
        self.assertEqual(uf.shape, (2, 3 * 4))
        self.assertEqual(len(uf), 2 * 3 * 4)
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.assertEqual(uf[x1, (x2, x3)], x1 * 100 + x2 * 10 + x3)

    def test_unfold1(self):
        uf = self.raw.unfolded(1)
        self.assertEqual(uf.shape, (3, 2 * 4))
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.assertEqual(uf[x2, (x1, x3)], x1 * 100 + x2 * 10 + x3)

    def test_unfold2(self):
        uf = self.raw.unfolded(2)
        self.assertEqual(uf.shape, (4, 2 * 3))
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.assertEqual(uf[x3, (x1, x2)], x1 * 100 + x2 * 10 + x3)

    def test_compact0(self):
        uf = self.raw.unfolded(0)
        compact = DictTensor(2)
        uf.compact_to(compact)
        self.assertEqual(len(compact), 2 * 3 * 4)
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.assertEqual(compact[x1, x2 * 4 + x3],
                                     x1 * 100 + x2 * 10 + x3)
Exemple #15
0
 def test_compact0(self):
     uf = self.raw.unfolded(0)
     compact = DictTensor(2)
     uf.compact_to(compact)
     self.assertEqual(len(compact), 2 * 3 * 4)
     for x1 in range(2):
         for x2 in range(3):
             for x3 in range(4):
                 self.assertEqual(compact[x1, x2 * 4 + x3],
                                  x1 * 100 + x2 * 10 + x3)
Exemple #16
0
class UnfoldedSparseTensorTest(unittest.TestCase):
    def setUp(self):
        self.raw = DictTensor(3)
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.raw[x1, x2, x3] = x1*100+x2*10+x3

    def test_unfold0(self):
        uf = self.raw.unfolded(0)
        self.assertEqual(uf.shape, (2, 3*4))
        self.assertEqual(len(uf), 2*3*4)
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.assertEqual(uf[x1, (x2, x3)], x1*100+x2*10+x3)

    def test_unfold1(self):
        uf = self.raw.unfolded(1)
        self.assertEqual(uf.shape, (3, 2*4))
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.assertEqual(uf[x2, (x1, x3)], x1*100+x2*10+x3)

    def test_unfold2(self):
        uf = self.raw.unfolded(2)
        self.assertEqual(uf.shape, (4, 2*3))
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.assertEqual(uf[x3, (x1, x2)], x1*100+x2*10+x3)

    def test_compact0(self):
        uf = self.raw.unfolded(0)
        compact = DictTensor(2)
        uf.compact_to(compact)
        self.assertEqual(len(compact), 2*3*4)
        for x1 in range(2):
            for x2 in range(3):
                for x3 in range(4):
                    self.assertEqual(compact[x1, x2*4+x3], x1*100+x2*10+x3)
Exemple #17
0
    def test_combine_by_element(self):
        t1 = DictTensor(2)
        t2 = DictTensor(2)
        t1[1, 1] = 1
        t1[1, 0] = 2
        t2[1, 1] = 4
        t2[0, 1] = 5

        t3 = t1.combine_by_element(t2, lambda x, y: x + (2*y))
        assertTensorEqual(t3,
                               [[None, 10],
                                [2, 9]])

        # Make sure errors are raised when the tensors don't have the
        # same shape or number of dimensions
        t4 = DictTensor(2)
        t4[0, 2] = 3
        t4[1, 0] = 5
        self.assertRaises(IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))
        t4 = DictTensor(3)
        self.assertRaises(IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))
Exemple #18
0
    def build_tensor(self, tensor=None):
        '''
        Build the combined tensor. Done explicitly because it's slow.

        If `tensor` is not None, it is used as the underlying numeric
        storage tensor. It should have the same number of dimensions
        as the blend. It defaults to a new DictTensor.
        '''
        self.logger.info('building combined tensor.')
        labels = self._labels
        if tensor is None: tensor = DictTensor(ndim=self.ndim)
        assert tensor.ndim == self.ndim

        if self._keys_never_overlap:
            self.logger.info('fast-merging.')
            tensor.update((tuple(
                label_list.index(label)
                for label_list, label in izip(labels, key)), val)
                          for key, val in self._fast_iteritems())
        else:
            for factor, cur_tensor, name in zip(self._weights, self._tensors,
                                                self.names):
                self.logger.info('slow-merging %s' % name)
                for key, val in cur_tensor.iteritems():
                    tensor.inc(
                        tuple(
                            label_list.index(label)
                            for label_list, label in izip(labels, key)),
                        factor * val)
        self._tensor = tensor
        self.logger.info('done building tensor.')
Exemple #19
0
    def test_tensordot(self):
        if True:  # FIXME XXX: skip this test.
            return
        # Test degenerate case of two 1-d vectors
        t1 = DictTensor(ndim=1)
        t2 = DictTensor(ndim=1)
        t1[0] = 1
        t1[2] = 2
        t2[0] = 3
        t2[1] = 4
        t2[2] = 5
        self.assertEqual(13, t1.tensordot(t2, 0))
        self.assertEqual(13, t1.tensordot(t2.to_dense(), 0))
        self.assertEqual(13, t1.to_dense().tensordot(t2, 0))
        self.assertEqual(13, t1.to_dense().tensordot(t2.to_dense(), 0))

        for i in range(5):
            # Make a random, randomly-shaped 3D tensor
            shape = random.sample(xrange(1, 30), 3)
            tensor = DenseTensor(numpy.random.random(shape))

            # Pick a random one of those dimensions
            dim = random.randrange(3)

            # Make a random vector of that length
            vec = DenseTensor(numpy.random.random((shape[dim], )))

            # Try the dense result
            result = tensor.tensordot(vec, dim)

            self.assertEqual(result.shape,
                             tuple(shape[:dim] + shape[dim + 1:]))

            # Try it with the tensor being sparse.
            sparset = tensor.to_sparse()
            result_s = sparset.tensordot(vec, dim)
            self.assertEqual(result_s.shape, result.shape)
            for key, val in result.iteritems():
                self.assertAlmostEqual(val, result_s[key])
Exemple #20
0
class NormalizedSVD2DTest(unittest.TestCase):
    def setUp(self):
        self.tensor = DictTensor(2)
        self.tensor.update(
            nested_list_to_dict(numpy.random.random_sample((10, 12))))
        self.normalized_tensor = self.tensor.normalized()
        self.svd = self.normalized_tensor.svd(k=3)
        self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v

    def test_decomposition(self):
        self.assertEqual(self.u.shape[0], self.tensor.shape[0])
        self.assertEqual(len(self.svals), self.u.shape[1])
        self.assertEqual(len(self.svals), self.v.shape[1])
        self.assertEqual(self.v.shape[0], self.tensor.shape[1])

        # Assert that the singular values are decreasing
        for i in range(1, len(self.svals)):
            self.assert_(self.svals[i] < self.svals[i - 1])

    def test_reconstructed(self):
        pass  # TODO

    def test_orthonormality(self):
        assertTensorEqual(self.u.T * self.u, numpy.eye(self.u.shape[1]))
        assertTensorEqual(self.v.T * self.v, numpy.eye(self.u.shape[1]))

    def test_variance(self):
        return  # TODO
        # Assert that the SVD explained some of the variance.
        diff_k3 = self.tensor - self.svd.reconstructed
        tensor_mag = self.tensor.magnitude()
        diff_k3_mag = diff_k3.magnitude()
        self.assert_(tensor_mag > diff_k3_mag)

        # Check that a smaller SVD explains less of the variance, but still some.
        svd_k1 = self.tensor.svd(k=1)
        diff_k1 = self.tensor - svd_k1.reconstructed
        diff_k1_mag = diff_k1.magnitude()
        self.assert_(tensor_mag > diff_k1_mag > diff_k3_mag)
Exemple #21
0
class NormalizedSVD2DTest(unittest.TestCase):
    def setUp(self):
        self.tensor = DictTensor(2)
        self.tensor.update(nested_list_to_dict(
                numpy.random.random_sample((10, 12))))
        self.normalized_tensor = self.tensor.normalized()
        self.svd = self.normalized_tensor.svd(k=3)
        self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v

    def test_decomposition(self):
        self.assertEqual(self.u.shape[0], self.tensor.shape[0])
        self.assertEqual(len(self.svals), self.u.shape[1])
        self.assertEqual(len(self.svals), self.v.shape[1])
        self.assertEqual(self.v.shape[0], self.tensor.shape[1])

        # Assert that the singular values are decreasing
        for i in range(1,len(self.svals)):
            self.assert_(self.svals[i] < self.svals[i-1])

    def test_reconstructed(self):
        pass # TODO

    def test_orthonormality(self):
        assertTensorEqual(self.u.T * self.u, numpy.eye(self.u.shape[1]))
        assertTensorEqual(self.v.T * self.v, numpy.eye(self.u.shape[1]))

    def test_variance(self):
        return # TODO
        # Assert that the SVD explained some of the variance.
        diff_k3 = self.tensor - self.svd.reconstructed
        tensor_mag = self.tensor.magnitude()
        diff_k3_mag = diff_k3.magnitude()
        self.assert_(tensor_mag > diff_k3_mag)

        # Check that a smaller SVD explains less of the variance, but still some.
        svd_k1 = self.tensor.svd(k=1)
        diff_k1 = self.tensor - svd_k1.reconstructed
        diff_k1_mag = diff_k1.magnitude()
        self.assert_(tensor_mag > diff_k1_mag > diff_k3_mag)
Exemple #22
0
class SVD2DTest(unittest.TestCase):
    def setUp(self):
        self.tensor = DictTensor(2)
        # Note: this command actually puts 20 values in tensor!
        self.tensor.update(nested_list_to_dict(svd_2d_test_matrix))
        self.svd = self.tensor.svd(k=3,
                                   offset_for_row=offset_for_row,
                                   offset_for_col=offset_for_col)
        self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v

    def test_decomposition(self):
        self.assertEqual(self.u.shape[0], self.tensor.shape[0])
        self.assertEqual(len(self.svals), self.u.shape[1])
        self.assertEqual(len(self.svals), self.v.shape[1])
        self.assertEqual(self.v.shape[0], self.tensor.shape[1])

        assertTensorEqual(self.u,
                          [[0, 0, 1], [0, -1, 0], [0, 0, 0], [-1, 0, 0]],
                          abs=True)

        assertTensorEqual(self.v, [[0, 0, sqrt(.2)], [-1, 0, 0], [0, -1, 0],
                                   [0, 0, 0], [0, 0, sqrt(.8)]],
                          abs=True)

        assertTensorEqual(self.svals, [4, 3, sqrt(5)])

    def test_reconstructed(self):
        assertTensorEqual(self.svd.reconstructed,
                          [[1, 0, 0, 0, 2], [0, 0, 3, 0, 0], [0, 0, 0, 0, 0],
                           [0, 4, 0, 0, 0]])
        assertTensorEqual(self.svd.reconstructed[1, :], [0, 0, 3, 0, 0])
        assertTensorEqual(self.svd.reconstructed[:, 2], [0, 3, 0, 0])

    def test_orthonormality(self):
        identity = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
        assertTensorEqual(self.u.T * self.u, identity)

        assertTensorEqual(self.v.T * self.v, identity)
Exemple #23
0
    def build_tensor(self, tensor=None):
        '''
        Build the combined tensor. Done explicitly because it's slow.

        If `tensor` is not None, it is used as the underlying numeric
        storage tensor. It should have the same number of dimensions
        as the blend. It defaults to a new DictTensor.
        '''
        self.logger.info('building combined tensor.')
        labels = self._labels
        if tensor is None: tensor = DictTensor(ndim=self.ndim)
        assert tensor.ndim == self.ndim

        if self._keys_never_overlap:
            self.logger.info('fast-merging.')
            tensor.update((tuple(label_list.index(label) for label_list, label in izip(labels, key)), val)
                          for key, val in self._fast_iteritems())
        else:
            for factor, cur_tensor, name in zip(self._weights, self._tensors, self.names):
                self.logger.info('slow-merging %s' % name)
                for key, val in cur_tensor.iteritems():
                    tensor.inc(tuple(label_list.index(label) for label_list, label in izip(labels, key)), factor*val)
        self._tensor = tensor
        self.logger.info('done building tensor.')
Exemple #24
0
def test_DictMatrixMatrixDot():
    # Numbers computed using numpy separately (and checked by hand)...
    A = DictTensor(2)
    B = DictTensor(2)

    A.update({
        (0, 0): 0.97878770132160475,
        (0, 1): 0.38968165255179188,
        (0, 2): 0.62726841877492023,
        (1, 0): 0.077757604769237876,
        (1, 1): 0.081345677776447523,
        (1, 2): 0.64136810022648949
    })

    B.update({
        (0, 0): 0.062059208836173663,
        (0, 1): 0.67286767409459525,
        (0, 2): 0.55410453533854442,
        (0, 3): 0.74671274663041698,
        (1, 0): 0.11565332983247767,
        (1, 1): 0.48262692547766795,
        (1, 2): 0.76280138705455269,
        (1, 3): 0.50230554417370143,
        (2, 0): 0.67149114912362429,
        (2, 1): 0.7656884479264322,
        (2, 2): 0.69286881606948747,
        (2, 3): 0.82598232206483091
    })

    test_result = {
        (0, 0): 0.52701596238696313,
        (0, 1): 1.3269576439118278,
        (0, 2): 1.2742151361864653,
        (0, 3): 1.4447251324591062,
        (1, 0): 0.444906476567622,
        (1, 1): 0.58266833824233299,
        (1, 2): 0.54952039356712779,
        (1, 3): 0.62868169229370208
    }

    result = A * B

    for key, value in result.iteritems():
        assert_almost_equal(value, test_result[key])
Exemple #25
0
    def test_combine_by_element(self):
        t1 = DictTensor(2)
        t2 = DictTensor(2)
        t1[1, 1] = 1
        t1[1, 0] = 2
        t2[1, 1] = 4
        t2[0, 1] = 5

        t3 = t1.combine_by_element(t2, lambda x, y: x + (2 * y))
        assertTensorEqual(t3, [[None, 10], [2, 9]])

        # Make sure errors are raised when the tensors don't have the
        # same shape or number of dimensions
        t4 = DictTensor(2)
        t4[0, 2] = 3
        t4[1, 0] = 5
        self.assertRaises(
            IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))
        t4 = DictTensor(3)
        self.assertRaises(
            IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))
Exemple #26
0
def testDictDotProduct():
    tensor = DictTensor(1)
    tensor.update({
        1: 0.06198828,
        3: 0.24177249,
        6: 0.5256805,
        7: 0.46505895,
        8: 0.27791615,
        9: 0.02906779
    })
    tensor2 = DictTensor(1)
    tensor2.update({
        0: 0.2502674,
        2: 0.34907184,
        3: 0.2209139,
        5: 0.45788618,
        6: 0.37133328,
        7: 0.48278861
    })

    result = tensor * tensor2
    assert_almost_equal(result, 0.473138731464)
Exemple #27
0
    def test_tensordot(self):
        if True: # FIXME XXX: skip this test.
            return
        # Test degenerate case of two 1-d vectors
        t1 = DictTensor(ndim=1)
        t2 = DictTensor(ndim=1)
        t1[0] = 1
        t1[2] = 2
        t2[0] = 3
        t2[1] = 4
        t2[2] = 5
        self.assertEqual(13, t1.tensordot(t2, 0))
        self.assertEqual(13, t1.tensordot(t2.to_dense(), 0))
        self.assertEqual(13, t1.to_dense().tensordot(t2, 0))
        self.assertEqual(13, t1.to_dense().tensordot(t2.to_dense(), 0))

        for i in range(5):
            # Make a random, randomly-shaped 3D tensor
            shape = random.sample(xrange(1,30), 3)
            tensor = DenseTensor(numpy.random.random(shape))

            # Pick a random one of those dimensions
            dim = random.randrange(3)

            # Make a random vector of that length
            vec = DenseTensor(numpy.random.random((shape[dim],)))

            # Try the dense result
            result = tensor.tensordot(vec, dim)

            self.assertEqual(result.shape, tuple(shape[:dim]+shape[dim+1:]))

            # Try it with the tensor being sparse.
            sparset = tensor.to_sparse()
            result_s = sparset.tensordot(vec, dim)
            self.assertEqual(result_s.shape, result.shape)
            for key, val in result.iteritems():
                self.assertAlmostEqual(val, result_s[key])
Exemple #28
0
def test_DictMatrixVectorDot():
    # Numbers computed using numpy separately (and checked by hand)...
    A = DictTensor(2)
    b = DictTensor(1)
    A.update({
        (0, 0): 0.18850744743616121,
        (0, 1): 0.64380371397047509,
        (1, 0): 0.40673500155569442,
        (1, 1): 0.77961381386745443,
        (2, 0): 0.38745898104117782,
        (2, 1): 0.39479530812173591
    })
    b.update({0: 0.95308634444417639, 1: 0.41483520394218798})

    test_result = {
        (0, ): 0.44673631896111365,
        (1, ): 0.71106483126206554,
        (2, ): 0.53305685602270081
    }

    result = A * b

    for k, value in result.iteritems():
        assert_almost_equal(value, test_result[k])
Exemple #29
0
def make_sparse_labeled_tensor(ndim, labels=None,
                               initial=None, accumulate=None,
                               normalize=False):
    '''
    Create a sparse labeled tensor.

    ndim: number of dimensions (usually 2)
    
    labels: if you already have label lists, pass them in here. (A
    None in this list means an unlabeled dimension. If you simply
    don't have labels yet, pass an OrderedSet().)

    initial / accumulate: sequences of (key, value) pairs to add to
    the tensor. ``initial`` is applied first by ``.update``, meaning
    that later values will override earlier ones. ``accumulate`` is
    applied afterwards, and all values add to anything already there.

    normalize:
     an int or tuple of ints: normalize along that dimension
     True: normalize along axis 0
     'tfidf': use tf-idf
     'tfidf.T': use tf-idf, transposed (matrix is documents by terms)
     a class: adds that class as a layer.
    '''
    if labels is None: labels = [OrderedSet() for _ in xrange(ndim)]
    tensor = LabeledView(DictTensor(ndim), labels)
    tensor.tensor._shape[:] = [len(label_list) for label_list in labels]
    if initial is not None:
        tensor.update(initial)
    for k, v in accumulate or []:
        tensor.inc(k, v)

    if normalize:
        return tensor.normalized(normalize)
    else:
        return tensor
Exemple #30
0
def test_DictMatrixVectorDot():
    # Numbers computed using numpy separately (and checked by hand)...
    A = DictTensor(2)
    b = DictTensor(1)
    A.update({(0, 0): 0.18850744743616121,
              (0, 1): 0.64380371397047509,
              (1, 0): 0.40673500155569442,
              (1, 1): 0.77961381386745443,
              (2, 0): 0.38745898104117782,
              (2, 1): 0.39479530812173591})
    b.update({0: 0.95308634444417639, 1: 0.41483520394218798})

    test_result = {(0,): 0.44673631896111365,
                   (1,): 0.71106483126206554,
                   (2,): 0.53305685602270081}

    result = A * b

    for k, value in result.iteritems():
        assert_almost_equal(value, test_result[k])
Exemple #31
0
def testDictDotProduct():
    tensor = DictTensor(1)
    tensor.update({
            1: 0.06198828,
            3: 0.24177249,
            6: 0.5256805,
            7: 0.46505895,
            8: 0.27791615,
            9: 0.02906779})
    tensor2 = DictTensor(1)
    tensor2.update({
            0: 0.2502674,
            2: 0.34907184,
            3: 0.2209139,
            5: 0.45788618,
            6: 0.37133328,
            7: 0.48278861})

    result = tensor * tensor2
    assert_almost_equal(result, 0.473138731464)
Exemple #32
0
def test_DictMatrixMatrixDot():
    # Numbers computed using numpy separately (and checked by hand)...
    A = DictTensor(2)
    B = DictTensor(2)

    A.update({(0, 0): 0.97878770132160475,
             (0, 1): 0.38968165255179188,
             (0, 2): 0.62726841877492023,
             (1, 0): 0.077757604769237876,
             (1, 1): 0.081345677776447523,
             (1, 2): 0.64136810022648949})

    B.update({(0, 0): 0.062059208836173663,
              (0, 1): 0.67286767409459525,
              (0, 2): 0.55410453533854442,
              (0, 3): 0.74671274663041698,
              (1, 0): 0.11565332983247767,
              (1, 1): 0.48262692547766795,
              (1, 2): 0.76280138705455269,
              (1, 3): 0.50230554417370143,
              (2, 0): 0.67149114912362429,
              (2, 1): 0.7656884479264322,
              (2, 2): 0.69286881606948747,
              (2, 3): 0.82598232206483091})

    test_result = {(0, 0): 0.52701596238696313,
                   (0, 1): 1.3269576439118278,
                   (0, 2): 1.2742151361864653,
                   (0, 3): 1.4447251324591062,
                   (1, 0): 0.444906476567622,
                   (1, 1): 0.58266833824233299,
                   (1, 2): 0.54952039356712779,
                   (1, 3): 0.62868169229370208}

    result = A * B

    for key, value in result.iteritems():
        assert_almost_equal(value, test_result[key])
Exemple #33
0
 def setUp(self):
     self.tensor = DictTensor(2)
Exemple #34
0
 def __init__(self):
     # FIXME: yes this saves space, but it might make a row or column be zero.
     concepts, relations = OrderedSet(), OrderedSet()
     super(ConceptRelationConceptTensor, self).__init__(
         DictTensor(3), [concepts, relations, concepts])
Exemple #35
0
 def setUp(self):
     self.tensor = DictTensor(2)
from csc.divisi.tensor import DictTensor
from csc.divisi.normalized_view import NormalizedView
from nose.tools import raises, assert_almost_equal
from tensor_util import assertTensorEqual, nones_removed, nested_list_to_dict

normalize_testcase = [[1, None], [3, 4]]

normalize_expected_result = [[1, None], [3 / 5., 4 / 5.]]

raw = DictTensor(2)
raw.update(nones_removed(nested_list_to_dict(normalize_testcase)))
tensor = NormalizedView(raw, 0)


def test_result():
    assertTensorEqual(tensor, normalize_expected_result)


def test_contains():
    assert (0, 0) in tensor
    assert tensor.has_key((0, 0))
    assert (0, 1) not in tensor
    assert not tensor.has_key((0, 1))


def test_unnormalize():
    assert_almost_equal(tensor[1, 0], 3 / 5.)
    assert_almost_equal(tensor.unnormalized()[1, 0], 3)


def test_labeled_unnormalize():
Exemple #37
0
import numpy as np
import unittest
from nose.tools import eq_, raises
from math import sqrt
from csc.divisi.tensor import DictTensor
from csc.divisi.util import nested_list_to_dict
from tensor_util import assertTensorEqual, zeros_removed

data = np.array([[1, 2, 3, 4],
                 [-1,2, 3, 4],
                 [0, 1, -1,0]])

tensor = DictTensor(2)
tensor.update(zeros_removed(nested_list_to_dict(data)))
eq_(len(tensor), 10)

# For NumPy, "along an axis" means something different.
ms_data = data - data.mean(1)[:,np.newaxis]
ms_tensor = DictTensor(2)
ms_tensor.update(nested_list_to_dict(ms_data))

def test_means():
    means = tensor.means()
    eq_(len(means), 2)
    assert np.allclose(means[0], [(1+2+3+4)/4., (-1+2+3+4)/4., (0+1+-1+0)/4.])
    assert np.allclose(means[1], [0, (2+2+1)/3., (3+3-1)/3., (4+4+0)/3.])

def test_mean_subtracted():
    mean_subtracted = tensor.mean_subtracted()
    m = np.zeros(data.shape)
    for (r, c), v in mean_subtracted.iteritems():
Exemple #38
0
    def test_1D(self):
        tensor_1D = DictTensor(1)
        tensor_1D[2] = 1

        assertTensorEqual(tensor_1D, [None, None, 1])
Exemple #39
0
class DictTensorTest(unittest.TestCase):
    slice_testcase = [[1, None, None], [None, 2, 3], [4, None, None],
                      [None, 5, None]]

    def test_initial(self):
        self.assertEqual(len(self.tensor), 0)
        self.assertEqual(len(self.tensor.keys()), 0)
        assert_dims_consistent(self.tensor)
        self.assertEqual(self.tensor.shape, (0, 0))
        assert isinstance(self.tensor[4, 5], (float, int, long))
        self.assertEqual(self.tensor[5, 5], 0)
        self.assertEqual(self.tensor[2, 7], 0)

    def test_storage(self):
        self.tensor[5, 5] = 1
        self.tensor[2, 7] = 2

        assertTensorEqual(
            self.tensor, [[None] * 8, [None] * 8, [None] * 7 + [2], [None] * 8,
                          [None] * 8, [None] * 5 + [1, None, None]])

    def test_slice(self):
        self.tensor.update(
            nones_removed(nested_list_to_dict(self.slice_testcase)))

        # Test end conditions: start index
        # is included in slice, end index is not
        slice = self.tensor[1:3, 0:2]
        assertTensorEqual(slice, [[None, 2], [4, None]])

        # Test that slicing on some dims correctly
        # reduces the dimensionality of the tensor
        slice = self.tensor[3, :]
        assertTensorEqual(slice, [None, 5, None])

        # Test the step parameter
        slice = self.tensor[1:4:2, :]
        assertTensorEqual(slice, [[None, 2, 3], [None, 5, None]])

    def test_transpose(self):
        self.tensor[0, 0] = 1
        self.tensor[1, 2] = 3
        self.tensor[2, 0] = 4
        self.tensor[3, 1] = 5

        t = self.tensor.transpose()
        assertTensorEqual(
            t,
            [[1, None, 4, None], [None, None, None, 5], [None, 3, None, None]])

    def test_delete(self):
        self.tensor.update(
            nones_removed(nested_list_to_dict(self.slice_testcase)))
        assertTensorEqual(self.tensor, self.slice_testcase)

        del self.tensor[0, 0]
        assertTensorEqual(self.tensor, [[None, None, None], [None, 2, 3],
                                        [4, None, None], [None, 5, None]])

    def test_contains(self):
        self.tensor[1, 2] = 1
        self.tensor[4, 5] = 2
        self.assertTrue((1, 2) in self.tensor)
        self.assertTrue(self.tensor.has_key((1, 2)))
        self.assertFalse((4, 2) in self.tensor)
        self.assertFalse((1, 5) in self.tensor)

    def setUp(self):
        self.tensor = DictTensor(2)

    def test_1D(self):
        tensor_1D = DictTensor(1)
        tensor_1D[2] = 1

        assertTensorEqual(tensor_1D, [None, None, 1])

    def test_combine_by_element(self):
        t1 = DictTensor(2)
        t2 = DictTensor(2)
        t1[1, 1] = 1
        t1[1, 0] = 2
        t2[1, 1] = 4
        t2[0, 1] = 5

        t3 = t1.combine_by_element(t2, lambda x, y: x + (2 * y))
        assertTensorEqual(t3, [[None, 10], [2, 9]])

        # Make sure errors are raised when the tensors don't have the
        # same shape or number of dimensions
        t4 = DictTensor(2)
        t4[0, 2] = 3
        t4[1, 0] = 5
        self.assertRaises(
            IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))
        t4 = DictTensor(3)
        self.assertRaises(
            IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))

    def testAdd(self):
        t1 = DictTensor(2)
        t2 = DictTensor(2)
        t1[0, 0] = 1
        t1[1, 1] = 1
        t1[1, 0] = 2
        t2[2, 1] = 4
        t2[1, 0] = 5

        t3 = t1 + t2
        assertTensorEqual(t3, [[1, None], [7, 1], [None, 4]])

    def testICmul(self):
        t1 = tensor_from_nested_list([[1, 2], [3, 4]])
        assertTensorEqual(t1, [[1, 2], [3, 4]])
        t1 *= 2
        assertTensorEqual(t1, [[2, 4], [6, 8]])

    def testICdiv(self):
        t1 = tensor_from_nested_list([[2, 4], [6, 8]])
        t1 /= 2
        assertTensorEqual(t1, [[1, 2], [3, 4]])

    def testReprOfEmpty(self):
        repr(self.tensor)
        self.tensor.example_key()

    def testNorm(self):
        norm_test = [[0, 0, 0], [0, 1, 0], [0, 5.0, 0]]
        self.tensor.update(nested_list_to_dict(norm_test))
        self.assertEqual(self.tensor.norm(), sqrt(26.0))
        self.assertEqual(self.tensor.magnitude(), sqrt(26.0))
Exemple #40
0
 def setUp(self):
     self.tensor = DictTensor(2)
     # Note: this command actually puts 20 values in tensor!
     self.tensor.update(nested_list_to_dict(svd_2d_test_matrix))
     self.svd = self.tensor.svd(k=3, offset_for_row=offset_for_row, offset_for_col=offset_for_col)
     self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v
from csc.divisi.tensor import DictTensor
from csc.divisi.normalized_view import NormalizedView
from nose.tools import raises, assert_almost_equal
from tensor_util import assertTensorEqual, nones_removed, nested_list_to_dict

normalize_testcase = [[1, None],
                      [3, 4]]

normalize_expected_result = [[1, None],
                             [3/5., 4/5.]]

raw = DictTensor(2)
raw.update(nones_removed(nested_list_to_dict(normalize_testcase)))
tensor = NormalizedView(raw, 0)

def test_result():
    assertTensorEqual(tensor, normalize_expected_result)

def test_contains():
    assert (0,0) in tensor
    assert tensor.has_key((0,0))
    assert (0,1) not in tensor
    assert not tensor.has_key((0,1))

def test_unnormalize():
    assert_almost_equal(tensor[1,0], 3/5.)
    assert_almost_equal(tensor.unnormalized()[1,0], 3)

def test_labeled_unnormalize():
    labeled = tensor.labeled([['a','b'],['A','B']])
    assert_almost_equal(labeled['b','A'], 3/5.)
Exemple #42
0
class SVD2DTest(unittest.TestCase):
    def setUp(self):
        self.tensor = DictTensor(2)
        # Note: this command actually puts 20 values in tensor!
        self.tensor.update(nested_list_to_dict(svd_2d_test_matrix))
        self.svd = self.tensor.svd(k=3)
        self.incremental = self.tensor.incremental_svd(k=3, niter=200)
        self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v

    def test_incremental(self):
        self.assertEqual(self.incremental.u.shape[0], self.tensor.shape[0])
        self.assertEqual(len(self.incremental.svals), self.incremental.u.shape[1])
        self.assertEqual(len(self.incremental.svals), self.incremental.v.shape[1])
        self.assertEqual(self.incremental.v.shape[0], self.tensor.shape[1])

        assertTensorEqual(self.incremental.u,
                               [[0, 0, 1],
                                [0, 1, 0],
                                [0, 0, 0],
                                [1, 0, 0]])

        assertTensorEqual(self.incremental.v,
                               [[0, 0, sqrt(.2)],
                                [1, 0, 0],
                                [0, 1, 0],
                                [0, 0, 0],
                                [0, 0, sqrt(.8)]])

        assertTensorEqual(self.incremental.svals,
                               [4, 3, sqrt(5)])

    def test_decomposition(self):
        self.assertEqual(self.u.shape[0], self.tensor.shape[0])
        self.assertEqual(len(self.svals), self.u.shape[1])
        self.assertEqual(len(self.svals), self.v.shape[1])
        self.assertEqual(self.v.shape[0], self.tensor.shape[1])

        assertTensorEqual(self.u,
                               [[0, 0, 1],
                                [0, -1, 0],
                                [0, 0, 0],
                                [-1, 0, 0]], abs=True)

        assertTensorEqual(self.v,
                               [[0, 0, sqrt(.2)],
                                [-1, 0, 0],
                                [0, -1, 0],
                                [0, 0, 0],
                                [0, 0, sqrt(.8)]], abs=True)

        assertTensorEqual(self.svals,
                               [4, 3, sqrt(5)])

    def test_reconstructed(self):
        assertTensorEqual(self.svd.reconstructed,
                               [[1, 0, 0, 0, 2],
                                [0, 0, 3, 0, 0],
                                [0, 0, 0, 0, 0],
                                [0, 4, 0, 0, 0]])
        assertTensorEqual(self.svd.reconstructed[1,:],
                                [0, 0, 3, 0, 0])
        assertTensorEqual(self.svd.reconstructed[:,2],
                               [0, 3, 0, 0])

    def test_orthonormality(self):
        identity = [[1, 0, 0],
                     [0, 1, 0],
                     [0, 0, 1]]
        assertTensorEqual(self.u.T * self.u,
                               identity)

        assertTensorEqual(self.v.T * self.v,
                               identity)

    def test_variance(self):
        # Assert that the SVD explained some of the variance.
        diff_k3 = self.tensor - self.svd.reconstructed
        tensor_mag = self.tensor.magnitude()
        diff_k3_mag = diff_k3.magnitude()
        self.assert_(tensor_mag > diff_k3_mag)

        # Check that a smaller SVD explains less of the variance, but still some.
        svd_k1 = self.tensor.svd(k=1)
        diff_k1 = self.tensor - svd_k1.reconstructed
        diff_k1_mag = diff_k1.magnitude()
        self.assert_(tensor_mag > diff_k1_mag > diff_k3_mag)
Exemple #43
0
import numpy as np
import unittest
from nose.tools import eq_, raises
from math import sqrt
from csc.divisi.tensor import DictTensor
from csc.divisi.util import nested_list_to_dict
from tensor_util import assertTensorEqual, zeros_removed

data = np.array([[1, 2, 3, 4], [-1, 2, 3, 4], [0, 1, -1, 0]])

tensor = DictTensor(2)
tensor.update(zeros_removed(nested_list_to_dict(data)))
eq_(len(tensor), 10)

# For NumPy, "along an axis" means something different.
ms_data = data - data.mean(1)[:, np.newaxis]
ms_tensor = DictTensor(2)
ms_tensor.update(nested_list_to_dict(ms_data))


def test_means():
    means = tensor.means()
    eq_(len(means), 2)
    assert np.allclose(means[0], [(1 + 2 + 3 + 4) / 4., (-1 + 2 + 3 + 4) / 4.,
                                  (0 + 1 + -1 + 0) / 4.])
    assert np.allclose(
        means[1], [0, (2 + 2 + 1) / 3., (3 + 3 - 1) / 3., (4 + 4 + 0) / 3.])


def test_mean_subtracted():
    mean_subtracted = tensor.mean_subtracted()
Exemple #44
0
 def setUp(self):
     self.raw = DictTensor(3)
     for x1 in range(2):
         for x2 in range(3):
             for x3 in range(4):
                 self.raw[x1, x2, x3] = x1*100+x2*10+x3
Exemple #45
0
 def __init__(self):
     super(FeatureByConceptMatrix, self).__init__(
         DictTensor(2), [OrderedSet() for _ in '01'])
Exemple #46
0
class DictTensorTest(unittest.TestCase):
    slice_testcase = [[1,    None, None],
                       [None, 2,    3   ],
                       [4,    None, None],
                       [None, 5,    None]]

    def test_initial(self):
        self.assertEqual(len(self.tensor), 0)
        self.assertEqual(len(self.tensor.keys()), 0)
        assert_dims_consistent(self.tensor)
        self.assertEqual(self.tensor.shape, (0, 0))
        assert isinstance(self.tensor[4, 5], (float, int, long))
        self.assertEqual(self.tensor[5, 5], 0)
        self.assertEqual(self.tensor[2, 7], 0)

    def test_storage(self):
        self.tensor[5, 5] = 1
        self.tensor[2, 7] = 2

        assertTensorEqual(self.tensor,
                          [[None]*8,
                           [None]*8,
                           [None]*7 + [2],
                           [None]*8,
                           [None]*8,
                           [None]*5 + [1, None, None]])

    def test_slice(self):
        self.tensor.update(nones_removed(nested_list_to_dict(self.slice_testcase)))

        # Test end conditions: start index
        # is included in slice, end index is not
        slice = self.tensor[1:3, 0:2]
        assertTensorEqual(slice,
                          [[None, 2],
                           [4, None]])

        # Test that slicing on some dims correctly
        # reduces the dimensionality of the tensor
        slice = self.tensor[3, :]
        assertTensorEqual(slice, [None, 5, None])

        # Test the step parameter
        slice = self.tensor[1:4:2, :]
        assertTensorEqual(slice,
                               [[None, 2, 3],
                                [None, 5, None]])

    def test_transpose(self):
        self.tensor[0, 0] = 1
        self.tensor[1, 2] = 3
        self.tensor[2, 0] = 4
        self.tensor[3, 1] = 5

        t = self.tensor.transpose()
        assertTensorEqual(t,
                          [[1, None, 4, None],
                           [None, None, None, 5],
                           [None, 3, None, None]])

    def test_delete(self):
        self.tensor.update(nones_removed(nested_list_to_dict(self.slice_testcase)))
        assertTensorEqual(self.tensor, self.slice_testcase)

        del self.tensor[0,0]
        assertTensorEqual(self.tensor,
                               [[None, None, None],
                                [None, 2,    3   ],
                                [4,    None, None],
                                [None, 5,    None]])

    def test_contains(self):
        self.tensor[1,2] = 1
        self.tensor[4,5] = 2
        self.assertTrue((1,2) in self.tensor)
        self.assertTrue(self.tensor.has_key((1,2)))
        self.assertFalse((4,2) in self.tensor)
        self.assertFalse((1,5) in self.tensor)


    def setUp(self):
        self.tensor = DictTensor(2)

    def test_1D(self):
        tensor_1D = DictTensor(1)
        tensor_1D[2] = 1

        assertTensorEqual(tensor_1D,
                               [None, None, 1])

    def test_combine_by_element(self):
        t1 = DictTensor(2)
        t2 = DictTensor(2)
        t1[1, 1] = 1
        t1[1, 0] = 2
        t2[1, 1] = 4
        t2[0, 1] = 5

        t3 = t1.combine_by_element(t2, lambda x, y: x + (2*y))
        assertTensorEqual(t3,
                               [[None, 10],
                                [2, 9]])

        # Make sure errors are raised when the tensors don't have the
        # same shape or number of dimensions
        t4 = DictTensor(2)
        t4[0, 2] = 3
        t4[1, 0] = 5
        self.assertRaises(IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))
        t4 = DictTensor(3)
        self.assertRaises(IndexError, lambda: t1.combine_by_element(t4, lambda x, y: x + y))

    def testAdd(self):
        t1 = DictTensor(2)
        t2 = DictTensor(2)
        t1[0, 0] = 1
        t1[1, 1] = 1
        t1[1, 0] = 2
        t2[2, 1] = 4
        t2[1, 0] = 5

        t3 = t1 + t2
        assertTensorEqual(t3,
                               [[1, None],
                                [7, 1],
                                [None, 4]])

    def testICmul(self):
        t1 = tensor_from_nested_list([[1, 2], [3, 4]])
        assertTensorEqual(t1, [[1, 2], [3, 4]])
        t1 *= 2
        assertTensorEqual(t1, [[2, 4], [6, 8]])

    def testICdiv(self):
        t1 = tensor_from_nested_list([[2, 4], [6, 8]])
        t1 /= 2
        assertTensorEqual(t1, [[1, 2], [3, 4]])

    def testReprOfEmpty(self):
        repr(self.tensor)
        self.tensor.example_key()

    def testNorm(self):
        norm_test = [[0,0,0],
                    [0,1,0],
                    [0,5.0,0]]
        self.tensor.update(nested_list_to_dict(norm_test))
        self.assertEqual(self.tensor.norm(), sqrt(26.0))
        self.assertEqual(self.tensor.magnitude(), sqrt(26.0))
Exemple #47
0
class SVD2DTest(unittest.TestCase):
    def setUp(self):
        self.tensor = DictTensor(2)
        # Note: this command actually puts 20 values in tensor!
        self.tensor.update(nested_list_to_dict(svd_2d_test_matrix))
        self.svd = self.tensor.svd(k=3)
        self.incremental = self.tensor.incremental_svd(k=3, niter=200)
        self.u, self.svals, self.v = self.svd.u, self.svd.svals, self.svd.v

    def test_incremental(self):
        self.assertEqual(self.incremental.u.shape[0], self.tensor.shape[0])
        self.assertEqual(len(self.incremental.svals),
                         self.incremental.u.shape[1])
        self.assertEqual(len(self.incremental.svals),
                         self.incremental.v.shape[1])
        self.assertEqual(self.incremental.v.shape[0], self.tensor.shape[1])

        assertTensorEqual(self.incremental.u,
                          [[0, 0, 1], [0, 1, 0], [0, 0, 0], [1, 0, 0]])

        assertTensorEqual(self.incremental.v,
                          [[0, 0, sqrt(.2)], [1, 0, 0], [0, 1, 0], [0, 0, 0],
                           [0, 0, sqrt(.8)]])

        assertTensorEqual(self.incremental.svals, [4, 3, sqrt(5)])

    def test_decomposition(self):
        self.assertEqual(self.u.shape[0], self.tensor.shape[0])
        self.assertEqual(len(self.svals), self.u.shape[1])
        self.assertEqual(len(self.svals), self.v.shape[1])
        self.assertEqual(self.v.shape[0], self.tensor.shape[1])

        assertTensorEqual(self.u,
                          [[0, 0, 1], [0, -1, 0], [0, 0, 0], [-1, 0, 0]],
                          abs=True)

        assertTensorEqual(self.v, [[0, 0, sqrt(.2)], [-1, 0, 0], [0, -1, 0],
                                   [0, 0, 0], [0, 0, sqrt(.8)]],
                          abs=True)

        assertTensorEqual(self.svals, [4, 3, sqrt(5)])

    def test_reconstructed(self):
        assertTensorEqual(self.svd.reconstructed,
                          [[1, 0, 0, 0, 2], [0, 0, 3, 0, 0], [0, 0, 0, 0, 0],
                           [0, 4, 0, 0, 0]])
        assertTensorEqual(self.svd.reconstructed[1, :], [0, 0, 3, 0, 0])
        assertTensorEqual(self.svd.reconstructed[:, 2], [0, 3, 0, 0])

    def test_orthonormality(self):
        identity = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
        assertTensorEqual(self.u.T * self.u, identity)

        assertTensorEqual(self.v.T * self.v, identity)

    def test_variance(self):
        # Assert that the SVD explained some of the variance.
        diff_k3 = self.tensor - self.svd.reconstructed
        tensor_mag = self.tensor.magnitude()
        diff_k3_mag = diff_k3.magnitude()
        self.assert_(tensor_mag > diff_k3_mag)

        # Check that a smaller SVD explains less of the variance, but still some.
        svd_k1 = self.tensor.svd(k=1)
        diff_k1 = self.tensor - svd_k1.reconstructed
        diff_k1_mag = diff_k1.magnitude()
        self.assert_(tensor_mag > diff_k1_mag > diff_k3_mag)
Exemple #48
0
 def test_oob(self):
     self.assertRaises(IndexError, lambda: DictTensor(3).unfolded(3))
Exemple #49
0
 def setUp(self):
     self.raw = DictTensor(3)
     for x1 in range(2):
         for x2 in range(3):
             for x3 in range(4):
                 self.raw[x1, x2, x3] = x1 * 100 + x2 * 10 + x3