Exemple #1
0
def test_predicted_svals():
    '''
    The predicted_svals function shows the predicted singular values.
    '''
    t1 = MockTensor()
    t1.svals = range(5, 0, -1)
    t2 = MockTensor()
    t2.svals = range(10, 0, -2)

    # Weighting one side heavily should make its svals uniquely show up.
    weight = 0.999999
    b = Blend([t1, t2], weights=[weight, 1-weight], k_values=1)

    # with origin tracking:
    svals = b.predicted_svals(num=5, for_each_tensor=5, track_origin=True)
    for expected, (actual, src) in zip(t1.svals, svals):
        assert_almost_equal(actual/weight, expected)
        eq_(src, 0)

    # without origin tracking
    svals = b.predicted_svals(num=5, for_each_tensor=5)
    for expected, actual in zip(t1.svals, svals):
        assert_almost_equal(actual/weight, expected)

    # Flip it around.
    b.weights = [1-weight, weight]
    # Note: this is an easy way to transpose the "matrix"
    sval, src = zip(*b.predicted_svals(num=5, for_each_tensor=5, track_origin=True))
    for actual, expected in zip(sval, t2.svals):
        assert_almost_equal(actual/weight, expected)
    eq_(src, (1,)*5)
Exemple #2
0
def test_autoblend():
    '''
    If weights are not specified explicitly, Blend computes them automatically
    so as to maximize the amount of interaction between the two matrices.

    This is hard to test in general. The strategy used here is to
    blend two copies of the same matrix (so the singular values are
    the same), but with different labels.
    '''

    t1 = ez_matrix('0013', '0421', [1,2,3,4])
    t2 = ez_matrix('2214', '0421', [3,6,9,12]) # one overlapping label, 3x the values
    b = Blend([t1, t2]) # don't specify weights => autoblend.

    eq_(b.label_overlap[0], 1)
    eq_(b.label_overlap[1], 4)

    # This should result in t2 getting weighted 1/3 the weight of t1:
    logging.info(b.weights)
    assert allclose(b.weights, [.75, .25])

    # Test the resulting tensor
    b.build_tensor()
    # -non-overlapping elements
    assert t1['0', '0'] == 1
    assert_almost_equal(b['0', '0'], .75*1) # remember that the original tensors had non-unity values.
    assert t2['4', '1'] == 4*3
    assert_almost_equal(b['4', '1'], .25*4*3)
    # -overlapping element
    assert t1['1', '2'] == 3
    assert t2['1', '2'] == 3*3
    assert_almost_equal(b['1', '2'],  0.75*3 + 0.25*3*3) # just to be explicit...
Exemple #3
0
def test_manual_weights():
    '''
    Specifying weights manually causes the result to be the weighted sum.
    '''
    b = Blend([t1, t2], weights=[.75,.25])
    b.build_tensor()
    expected = dict(zip(zip('aabc', '1211'), (.75, 1.0, .75, .25)))
    assertTensorEqualCompleteDict(b, expected)
Exemple #4
0
def test_manual_weights():
    '''
    Specifying weights manually causes the result to be the weighted sum.
    '''
    b = Blend([t1, t2], weights=[.75, .25])
    b.build_tensor()
    expected = dict(zip(zip('aabc', '1211'), (.75, 1.0, .75, .25)))
    assertTensorEqualCompleteDict(b, expected)
Exemple #5
0
def test_no_overlap():
    '''
    Certain optimizations are possible if there is no overlap.
    '''
    t2 = ez_matrix('dc', '21', [1] * 2)
    b = Blend([t1, t2], weights=[.75, .25])
    b.build_tensor()
    expected = dict(zip(zip('aabdc', '12121'), (.75, .75, .75, .25, .25)))
    assertTensorEqualCompleteDict(b, expected)
Exemple #6
0
def test_no_overlap():
    '''
    Certain optimizations are possible if there is no overlap.
    '''
    t2 = ez_matrix('dc', '21', [1]*2)
    b = Blend([t1, t2], weights=[.75,.25])
    b.build_tensor()
    expected = dict(zip(zip('aabdc', '12121'), (.75, .75, .75, .25, .25)))
    assertTensorEqualCompleteDict(b, expected)
Exemple #7
0
def test_fake_tensor():
    '''
    Blends have a fake tensor that does iteritems really fast.
    '''
    # labels will be abc, 12
    t3 = ez_matrix('c', '1', [1])
    b = Blend([t1, t3], weights=[.75, .25])
    fake_tensor = b.fake_tensor()
    items = set(fake_tensor.iteritems())
    expected_items = set(
        (((0, 0), .75), ((0, 1), .75), ((1, 0), .75), ((2, 0), .25)))
    assertSetEquals(items, expected_items)
Exemple #8
0
def test_fake_tensor():
    '''
    Blends have a fake tensor that does iteritems really fast.
    '''
    # labels will be abc, 12
    t3 = ez_matrix('c', '1', [1])
    b = Blend([t1, t3], weights=[.75,.25])
    fake_tensor = b.fake_tensor()
    items = set(fake_tensor.iteritems())
    expected_items = set((
            ((0,0), .75),
            ((0,1), .75),
            ((1,0), .75),
            ((2,0), .25)))
    assertSetEquals(items, expected_items)
Exemple #9
0
	def __init__(self, emoticon_file=path+'/data/emoticons.csv', \
			affect_wordnet_file=path+'/data/affectiveWNmatrix.pickle'):
		# Build emoticon dictionary
		self.emoticon = {}
		emoticon_reader = csv.reader(open(emoticon_file, 'r'))
		for emoticon, meaning in emoticon_reader:
			self.emoticon[emoticon.decode('utf-8')] = meaning
		self.emoticon_list = self.emoticon.keys()
		# Create blending of affect WordNet and ConceptNet
		cnet = conceptnet_2d_from_db('en')
		affectwn_raw = get_picklecached_thing(affect_wordnet_file)
		affectwn_normalized = affectwn_raw.normalized()
		theblend = Blend([affectwn_normalized, cnet])
		self.affectwn = theblend.svd()
		# Get natural language processing tool
		self.nl = get_nl('en')
Exemple #10
0
def test_rough_blend2():
    '''
    You can pass in the svals and it doesn't try to compute them.
    '''
    t1 = MockTensor()
    t2 = MockTensor()
    b = Blend([t1, t2], k_values=1, svals=[range(5, 0, -1), range(15, 0, -3)])
    eq_(b.weights, (.75, .25))
Exemple #11
0
def test_names():
    '''
    Blends can name their tensors.
    '''
    # Uses repr() by default
    b = Blend([t1, t2])
    eq_(b.names, (repr(t1), repr(t2)))

    # but you can specify it with a dict:
    b = Blend(dict(t1=t1, t2=t2))
    tensors = sorted(list(zip(b.names, b.tensors)))
    eq_(tensors, [('t1', t1), ('t2', t2)])

    # or a sequence of items
    b = Blend([('t1', t1), ('t2', t2)])
    eq_(b.names, ('t1', 't2'))
    eq_(b.tensors, (t1, t2))
Exemple #12
0
 def __init__(self):
     self.helper = DivsiHelper()
     self.cnet_normalized = conceptnet_2d_from_db('en')
     self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle')
     self.affectWN = self.affectwn_raw.normalized()
     self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd()
     
     
     self.EN_NL = get_nl('en')
Exemple #13
0
def test_rough_blend():
    '''
    The rough weights are 1/svals[0], normalized.
    '''
    t1 = MockTensor()
    t1.svals = range(5, 0, -1)
    t2 = MockTensor()
    t2.svals = range(15, 0, -3)

    b = Blend([t1, t2], k_values=1)
    eq_(b.weights, (.75, .25))
Exemple #14
0
def test_predicted_svals():
    '''
    The predicted_svals function shows the predicted singular values.
    '''
    t1 = MockTensor()
    t1.svals = range(5, 0, -1)
    t2 = MockTensor()
    t2.svals = range(10, 0, -2)

    # Weighting one side heavily should make its svals uniquely show up.
    weight = 0.999999
    b = Blend([t1, t2], weights=[weight, 1 - weight], k_values=1)

    # with origin tracking:
    svals = b.predicted_svals(num=5, for_each_tensor=5, track_origin=True)
    for expected, (actual, src) in zip(t1.svals, svals):
        assert_almost_equal(actual / weight, expected)
        eq_(src, 0)

    # without origin tracking
    svals = b.predicted_svals(num=5, for_each_tensor=5)
    for expected, actual in zip(t1.svals, svals):
        assert_almost_equal(actual / weight, expected)

    # Flip it around.
    b.weights = [1 - weight, weight]
    # Note: this is an easy way to transpose the "matrix"
    sval, src = zip(
        *b.predicted_svals(num=5, for_each_tensor=5, track_origin=True))
    for actual, expected in zip(sval, t2.svals):
        assert_almost_equal(actual / weight, expected)
    eq_(src, (1, ) * 5)
Exemple #15
0
class Divsi:
    svd = None
    def __init__(self):
        self.helper = DivsiHelper()
        self.cnet_normalized = conceptnet_2d_from_db('en')
        self.affectwn_raw = get_picklecached_thing('data/divsi/affectiveWNmatrix.pickle')
        self.affectWN = self.affectwn_raw.normalized()
        self.analogySpace = Blend([self.affectWN, self.cnet_normalized]).svd()
        
        
        self.EN_NL = get_nl('en')

    def load_svd(self, k=100):
        svd = self.tensor.svd(k=k)
        return svd
    
    def concept_similarity(self, universal_word):
        similarity = {}
        common = {}
        for interesting in self.helper.interestingTags(universal_word):
            L = interesting[0]
            R = interesting[1]
            try:
                left  = self.analogySpace.weighted_u_vec(L)
                right = self.analogySpace.weighted_u_vec(R)
                similar = left.hat() * right.hat()
                
                common[L] = self.analogySpace.u_dotproducts_with(left).top_items(10)
                common[R] = self.analogySpace.u_dotproducts_with(right).top_items(10)
                
                similarity[similar] = [L, R]
            except Exception, E:
                pass
                
        sorted_similarity = self.helper.sortDictionary(similarity)
        return (sorted_similarity, common)
Exemple #16
0
def test_autoblend():
    '''
    If weights are not specified explicitly, Blend computes them automatically
    so as to maximize the amount of interaction between the two matrices.

    This is hard to test in general. The strategy used here is to
    blend two copies of the same matrix (so the singular values are
    the same), but with different labels.
    '''

    t1 = ez_matrix('0013', '0421', [1, 2, 3, 4])
    t2 = ez_matrix('2214', '0421',
                   [3, 6, 9, 12])  # one overlapping label, 3x the values
    b = Blend([t1, t2])  # don't specify weights => autoblend.

    eq_(b.label_overlap[0], 1)
    eq_(b.label_overlap[1], 4)

    # This should result in t2 getting weighted 1/3 the weight of t1:
    logging.info(b.weights)
    assert allclose(b.weights, [.75, .25])

    # Test the resulting tensor
    b.build_tensor()
    # -non-overlapping elements
    assert t1['0', '0'] == 1
    assert_almost_equal(
        b['0', '0'],
        .75 * 1)  # remember that the original tensors had non-unity values.
    assert t2['4', '1'] == 4 * 3
    assert_almost_equal(b['4', '1'], .25 * 4 * 3)
    # -overlapping element
    assert t1['1', '2'] == 3
    assert t2['1', '2'] == 3 * 3
    assert_almost_equal(b['1', '2'],
                        0.75 * 3 + 0.25 * 3 * 3)  # just to be explicit...
Exemple #17
0
def test_factor_too_big():
    "Factor is between 0 and 1."
    Blend([t1, t2], factor=1.5)
Exemple #18
0
def test_specifying_factor():
    '''
    When a factor is supplied, use it as the weight of the second matrix.
    '''
    eq_(Blend([t1, t2], factor=.25).weights, (0.75, 0.25))
Exemple #19
0
def test_bare_blend():
    '''
    A matrix blended with nothing else changes nothing.
    '''
    b = Blend([t1], weights=[1])
    assertSetEquals(set(t1.label_list(0)), set(b.label_list(0)))
Exemple #20
0
def make_blend(other):
    return Blend([cnet, other])
Exemple #21
0
def test_bare_blend():
    '''
    A matrix blended with nothing else changes nothing.
    '''
    b = Blend([t1], weights=[1])
    assertSetEquals(set(t1.label_list(0)), set(b.label_list(0)))
Exemple #22
0
def test_nonlist():
    Blend(t1, weights=[1])
Exemple #23
0
def test_factor_wrong_dims():
    "Factor only applies to two matrices."
    Blend([t1, t2, t1], factor=0.5)
Exemple #24
0
def test_blend_mean_subtracted():
    from csc.divisi.blend import Blend
    Blend([tensor.mean_subtracted()])
Exemple #25
0
def veering_by_overlap(mat1, mat2, row_overlap, col_overlap):
    t1, t2 = overlap_matrices(mat1, mat2, row_overlap, col_overlap)
    blend = Blend([t1, t2], factor=0)
    return [blend.total_veering_at_factor(factor, num=15)
            for factor in factors]
Exemple #26
0
def predicted_by_overlap(factors, mat1, mat2, row_overlap, col_overlap):
    blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap), factor=0)
    return [blend.predicted_svals_at_factor(factor, num=15) for factor in factors]
Exemple #27
0
def svals_by_overlap(factors, mat1, mat2, row_overlap, col_overlap):
    blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap), factor=0)
    return [blend.svals_at_factor(factor, k=15) for factor in factors]
Exemple #28
0
def test_specifying_factor_and_weights():
    "Shouldn't specify both factor and weights at the same time."
    Blend([t1, t2], factor=0.5, weights=[0.5, 0.5])
from csc.conceptnet4.analogyspace import conceptnet_by_relations, identities_for_all_relations
from csc.divisi.blend import Blend
from csc.divisi import export_svdview

byrel = conceptnet_by_relations('en')
t=identities_for_all_relations(byrel)
b=Blend(byrel.values()+[t])
s=b.svd()
export_svdview.write_packed(s.u, 'littleblend', lambda x:x)
s.summarize()
Exemple #30
0
def _get_color_blend():
    colors = get_picklecached_thing(FILEPATH+os.sep+'colormatrix.pickle.gz', _make_color_matrix)
    cnet = get_picklecached_thing(FILEPATH+os.sep+'cnet.pickle.gz', lambda: conceptnet_2d_from_db('en'))
    colorblend = Blend([colors, cnet]).normalized(mode=[0,1]).bake()
    return colorblend
Exemple #31
0
def veering_by_overlap(mat1, mat2, row_overlap, col_overlap):
    t1, t2 = overlap_matrices(mat1, mat2, row_overlap, col_overlap)
    blend = Blend([t1, t2], factor=0)
    return [
        blend.total_veering_at_factor(factor, num=15) for factor in factors
    ]
Exemple #32
0
def svals_by_overlap(factors, mat1, mat2, row_overlap, col_overlap):
    blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap),
                  factor=0)
    return [blend.svals_at_factor(factor, k=15) for factor in factors]
Exemple #33
0
def predicted_by_overlap(factors, mat1, mat2, row_overlap, col_overlap):
    blend = Blend(overlap_matrices(mat1, mat2, row_overlap, col_overlap),
                  factor=0)
    return [
        blend.predicted_svals_at_factor(factor, num=15) for factor in factors
    ]
Exemple #34
0
def test_wrong_dims():
    from csc.divisi.labeled_view import make_sparse_labeled_tensor
    t1 = make_sparse_labeled_tensor(ndim=1)
    t2 = make_sparse_labeled_tensor(ndim=2)
    Blend([t1, t2])
Exemple #35
0
from csc.conceptnet4.analogyspace import conceptnet_by_relations, identities_for_all_relations
from csc.divisi.blend import Blend
from csc.divisi import export_svdview

byrel = conceptnet_by_relations('en')
t = identities_for_all_relations(byrel)
b = Blend(byrel.values() + [t])
s = b.svd()
export_svdview.write_packed(s.u, 'littleblend', lambda x: x)
s.summarize()