def test_name(): a = SemanticPointer(np.ones(4), name='a') b = SemanticPointer(np.ones(4), name='b') unnamed = SemanticPointer(np.ones(4), name=None) assert str(a) == "SemanticPointer<a>" assert repr(a) == ( "SemanticPointer({!r}, vocab={!r}, algebra={!r}, name={!r}".format( a.v, a.vocab, a.algebra, a.name)) assert (-a).name == "-(a)" assert (~a).name == "~(a)" assert a.normalized().name == "(a).normalized()" assert a.unitary().name == "(a).unitary()" assert (a + b).name == "(a)+(b)" assert (a * b).name == "(a)*(b)" assert (2. * a).name == "(2.0)*(a)" assert (a + unnamed).name is None assert (a * unnamed).name is None
def test_normalized(): a = SemanticPointer([1, 1]).normalized() b = a.normalized() assert a is not b assert np.allclose(b.length(), 1)
def text(v, vocab, minimum_count=1, maximum_count=None, threshold=0.1, join=';', terms=None, normalize=False): """Return a human-readable text version of the provided vector. This is meant to give a quick text version of a vector for display purposes. To do this, compute the dot product between the vector and all the terms in the vocabulary. The top few vectors are chosen for inclusion in the text. It will try to only return terms with a match above the *threshold*, but will always return at least *minimum_count* and at most maximum_count terms. Terms are sorted from most to least similar. Parameters ---------- v : SemanticPointer or array_like The vector to convert into text. minimum_count : int, optional Always return at least this many terms in the text. maximum_count : int, optional Never return more than this many terms in the text. If None, all terms will be returned. threshold : float, optional How small a similarity for a term to be ignored. join : str, optional The text separator to use between terms. terms : list, optional Only consider terms in this list of strings. normalize : bool Whether to normalize the vector before computing similarity. """ if not isinstance(v, SemanticPointer): v = SemanticPointer(v) if normalize: v = v.normalized() if terms is None: terms = vocab.keys() vectors = vocab.vectors else: vectors = vocab.parse_n(*terms) matches = list(zip(similarity(v, vectors), terms)) matches.sort() matches.reverse() r = [] for m in matches: if minimum_count is not None and len(r) < minimum_count: r.append(m) elif maximum_count is not None and len(r) == maximum_count: break elif threshold is None or m[0] > threshold: r.append(m) else: break return join.join(['%0.2f%s' % (sim, key) for (sim, key) in r])