Exemple #1
0
def test_identity():
    '''
    Identity sets are just ranges of numbers.
    '''
    iset = IdentitySet(10)
    eq_(iset[5], 5)
    eq_(iset.index(2), 2)
    eq_(len(iset), 10)
    assert iset == OrderedSet(range(10))

    iset = pickle.loads(pickle.dumps(iset))
    eq_(iset[5], 5)
    eq_(iset.index(2), 2)
    eq_(len(iset), 10)
    assert iset == OrderedSet(range(10))
Exemple #2
0
    def svd(self, k=50, normalized=True):
        '''Run an SVD on this unfolding. Compacts, runs, and returns an
        SVD2DResults.'''
        # Set up a LabeledView to map column indices from unfolded products
        # to unique indices.
        col_indices = OrderedSet()
        compact = LabeledView(DictTensor(2), [IdentitySet(0), col_indices])
        self.compact_to(compact)

        if normalized:
            compact = compact.normalized(mode=0)

        svd = compact.svd(k)

        # Wrap the output so that the labeling all works out.
        if hasattr(self.tensor, '_labels'):
            # Case for labeled view beneath
            # FIXME: try not to rely on private vars.
            # TODO: it would be nice to factor this in such a way that we
            #  didn't have to worry about the labeling case here.
            u = LabeledView(svd.u, [self.tensor._labels[self.dim], None])
            v = LabeledView(svd.v, [
                UnfoldedSet.from_unfolding(self.dim, self.tensor.label_sets()),
                None
            ])
        else:
            u = svd.u
            v = LabeledView(svd.v, [
                UnfoldedSet.from_unfolding(
                    self.dim, [IdentitySet(dim)
                               for dim in self.tensor.shape]), None
            ])

        from csc.divisi.svd import SVD2DResults
        return SVD2DResults(u, v, svd.svals)
Exemple #3
0
 def compressed_svd_u(self, k=100):
     """
     Not done yet. --Rob
     """
     labelset = set()
     for t in self.weights:
         labelset += set(t.label_list(0))
     ulabels = OrderedSet(list(labelset))
     svds = [t.svd(k) for t in self.weights]
Exemple #4
0
 def __init__(self, *a, **kw):
     if 'ndim' in kw:
         ndim = kw.pop('ndim')
         data = DictTensor(ndim)
         label_lists = [OrderedSet() for i in xrange(ndim)]
         LabeledView.__init__(self, data, label_lists, *a, **kw)
     else:
         LabeledView.__init__(self, *a, **kw)
     self._slice_cache = {}
Exemple #5
0
    def _set_tensors(self, tensors):
        '''
        Set the input tensors. Computes the label lists also. You
        should not call this function directly; rather, assign to
        blend.tensors.

        You can pass a ``dict`` or sequence of ``(label, tensor)``
        pairs; the tensors will be labeled according to the keys.
        '''
        if isinstance(tensors, Tensor):
            raise TypeError(
                'Give Blend a _list_ (or dict or whatever) of tensors.')
        if hasattr(tensors, 'items'):
            # Extract the items, if we have some.
            tensors = tensors.items()
        if isinstance(tensors[0], (list, tuple)):
            # Assign names. Don't call `dict()`, in case a sequence
            # was passed and two tensors have the same label.
            names, tensors = zip(*tensors)
        else:
            names = map(repr, tensors)

        for tensor in tensors:
            if tensor.stack_contains(MeanSubtractedView):
                raise TypeError(
                    "You can't blend MeanSubtractedViews. Try mean-subtracting the resulting blend."
                )

        self._tensors = tuple(tensors)
        self.names = tuple(names)
        self.logger.info('tensors: %s', ', '.join(self.names))
        self.ndim = ndim = tensors[0].ndim
        if not all(tensor.ndim == ndim for tensor in tensors):
            raise TypeError(
                'Blended tensors must have the same dimensionality.')

        self.logger.info('Making ordered sets')
        self._labels = labels = [OrderedSet() for _ in xrange(ndim)]
        self.label_overlap = label_overlap = [0] * ndim

        for tensor in self._tensors:
            for dim, label_list in enumerate(labels):
                for key in tensor.label_list(dim):
                    # XXX(kcarnold) This checks containment twice.
                    if key in label_list: label_overlap[dim] += 1
                    else: label_list.add(key)

        self._shape = tuple(map(len, labels))
        self._keys_never_overlap = not all(label_overlap)
        self.logger.info('Done making ordered sets. label_overlap: %r',
                         label_overlap)
        if not any(label_overlap):
            self.logger.warn('No labels overlap.')

        # Invalidate other data
        self._weights = self._tensor = self._svals = None
Exemple #6
0
def test_delete_and_pickle():
    '''
    Deleting an element doesn't affect the remaining elements'
    indices.
    '''
    s = OrderedSet(['dog','cat','banana'])
    del s[1]
    eq_(s[1], None)
    eq_(s.index('banana'), 2)

    # Pickling doesn't change things.
    s2 = pickle.loads(pickle.dumps(s))

    eq_(s, s2)
    eq_(s2[1], None)
    eq_(s2.index('banana'), 2)

    assert None not in s2
    assert None not in s2
Exemple #7
0
def test_delete_and_pickle():
    '''
    Deleting an element doesn't affect the remaining elements'
    indices.
    '''
    s = OrderedSet(['dog', 'cat', 'banana'])
    del s[1]
    eq_(s[1], None)
    eq_(s.index('banana'), 2)

    # Pickling doesn't change things.
    s2 = pickle.loads(pickle.dumps(s))

    eq_(s, s2)
    eq_(s2[1], None)
    eq_(s2.index('banana'), 2)

    assert None not in s2
    assert None not in s2
Exemple #8
0
def test_pickle():
    '''
    Test that OrderedSets can be pickled.
    '''
    s = OrderedSet(['dog', 'cat', 'banana'])
    import cPickle as pickle
    s2 = pickle.loads(pickle.dumps(s))

    eq_(s, s2)
    eq_(s2[0], 'dog')
    eq_(s2.index('cat'), 1)
Exemple #9
0
def make_sparse_labeled_tensor(ndim, labels=None,
                               initial=None, accumulate=None,
                               normalize=False):
    '''
    Create a sparse labeled tensor.

    ndim: number of dimensions (usually 2)
    
    labels: if you already have label lists, pass them in here. (A
    None in this list means an unlabeled dimension. If you simply
    don't have labels yet, pass an OrderedSet().)

    initial / accumulate: sequences of (key, value) pairs to add to
    the tensor. ``initial`` is applied first by ``.update``, meaning
    that later values will override earlier ones. ``accumulate`` is
    applied afterwards, and all values add to anything already there.

    normalize:
     an int or tuple of ints: normalize along that dimension
     True: normalize along axis 0
     'tfidf': use tf-idf
     'tfidf.T': use tf-idf, transposed (matrix is documents by terms)
     a class: adds that class as a layer.
    '''
    if labels is None: labels = [OrderedSet() for _ in xrange(ndim)]
    tensor = LabeledView(DictTensor(ndim), labels)
    tensor.tensor._shape[:] = [len(label_list) for label_list in labels]
    if initial is not None:
        tensor.update(initial)
    for k, v in accumulate or []:
        tensor.inc(k, v)

    if normalize:
        return tensor.normalized(normalize)
    else:
        return tensor
Exemple #10
0
 def concatenate(self, other):
     concat_dense = self.tensor.concatenate(other.tensor)
     newlabels = OrderedSet(list(self.label_list(0)) +
                            list(other.label_list(0)))
     return LabeledView(concat_dense, [newlabels] + self.label_lists()[1:])
Exemple #11
0
 def __init__(self):
     super(FeatureByConceptMatrix, self).__init__(
         DictTensor(2), [OrderedSet() for _ in '01'])
Exemple #12
0
 def __init__(self):
     # FIXME: yes this saves space, but it might make a row or column be zero.
     concepts, relations = OrderedSet(), OrderedSet()
     super(ConceptRelationConceptTensor, self).__init__(
         DictTensor(3), [concepts, relations, concepts])
Exemple #13
0
 def setUp(self):
     self.sets = [
         OrderedSet([2, 4, 6, 8, 10]),  # 5 items
         IdentitySet(10),  # 10 items
         OrderedSet(['a', 'b', 'c', 'd']),  # 4 items
     ]
Exemple #14
0
def test_make_shape():
    labels = OrderedSet(list('abcde'))
    t = make_sparse_labeled_tensor(ndim=1, labels=[labels])
    eq_(t.shape[0], len(labels))
    eq_(t.tensor.shape[0], len(labels))
Exemple #15
0
def test_reprOfEmpty():
    '''
    repr() of an empty OrderedSet should not fail.
    '''
    repr(OrderedSet())