def test_decode_primitive_list_in_dict(self): """ Test that when decoding a dictionary with a list of primitives in it, the list's items are decoded as well. """ data = {'a': [1, 2], 'b': 3} self.assertEqual(data, Exportable.decode(Exportable.encode(data)))
def test_decode_dict_in_list(self): """ Test that when decoding a list with a dictionary in it, the dictionary is decoded as well. """ data = [{'a': [1, 2], 'b': 3}, 5] self.assertEqual(data, Exportable.decode(Exportable.encode(data)))
def test_decode_primitive_list(self): """ Test that when decoding a list of primitives, the list's items are unchanged. """ data = [1, 2] self.assertEqual(data, Exportable.decode(Exportable.encode(data)))
def test_decode_vector(self): """ Test that when decoding a vector, its array representation is returned. """ v = Vector({'a': 1}, {'b': 2}) encoded = Exportable.encode(v) decoded = Exportable.decode(encoded) self.assertEqual(v.dimensions, decoded.dimensions) self.assertEqual(v.attributes, decoded.attributes)
def test_decode_vector(self): """ Test that when decoding a vector, it is converted into a dictionary. """ v = Vector({'a': 1}, {'b': 2}) data = Exportable.encode({'vector': v}) decoded = Exportable.decode(data) self.assertTrue({v}, decoded.keys()) self.assertEqual(v.__dict__, decoded['vector'].__dict__)
def test_decode_nested(self): """ Test that when decoding an exportable object that has an exportable object, the highest one is decoded. """ tfidf = TFIDF(idf={'a': 1}, documents=10) data = Exportable.encode({'tfidf': tfidf}) decoded = Exportable.decode(data) self.assertTrue({tfidf}, decoded.keys()) self.assertEqual(tfidf.local_scheme.__dict__, decoded['tfidf'].local_scheme.__dict__) self.assertEqual(tfidf.global_scheme.__dict__, decoded['tfidf'].global_scheme.__dict__)
def test_decode_vector_list(self): """ Test that when decoding a list of vectors, the list's items are decoded as well. """ vectors = [Vector({'a': 1}, {'b': 2}), Vector({'c': 3}, {'d': 4})] encoded = Exportable.encode(vectors) decoded = Exportable.decode(encoded) self.assertTrue( all(vector.dimensions == v.dimensions for vector, v in zip(vectors, decoded))) self.assertTrue( all(vector.attributes == v.attributes for vector, v in zip(vectors, decoded)))
def test_get_class_class_only(self): """ Test that when getting the class name from a string that contains only a class name, that name is returned. """ self.assertEqual('Document', Exportable.get_class("<class 'Document'>"))
def test_encode_primitive_dict(self): """ Test that when encoding a dictionary with primitive values, the same dictionary is returned. """ data = {'a': 1, 'b': [1, 2]} self.assertEqual(data, Exportable.encode({'a': 1, 'b': [1, 2]}))
def test_decode_vector_list_in_dict(self): """ Test that when decoding a dictionary with a list of vectors in it, the list's items are decoded as well. """ v = [Vector({'a': 1}, {'b': 2}), Vector({'c': 3}, {'d': 4})] data = {'a': v, 'e': 5} encoded = Exportable.encode(data) decoded = Exportable.decode(encoded) self.assertTrue( all(vector.dimensions == v.dimensions for vector, v in zip(v, decoded['a']))) self.assertTrue( all(vector.attributes == v.attributes for vector, v in zip(v, decoded['a']))) self.assertEqual(5, decoded['e'])
def test_encode_primitive_recursive_dict(self): """ Test that when encoding a dictionary with primitive values stored recursively, the same dictionary is returned. """ data = {'a': 1, 'b': {'c': 1}} self.assertEqual(data, Exportable.encode({'a': 1, 'b': {'c': 1}}))
def test_encode_vector(self): """ Test that when encoding a vector, its array representation is returned. """ v = Vector({'a': 1}, {'b': 2}) self.assertEqual(v.to_array(), Exportable.encode(v))
def test_encode_vector_list_in_dict(self): """ Test that when encoding a dictionary with a list of vectors in it, the list's items are encoded as well. """ v = [Vector({'a': 1}, {'b': 2}), Vector({'c': 3}, {'d': 4})] data = {'a': v, 'e': 5} self.assertEqual( { 'a': [{ 'class': "<class 'vsm.vector.Vector'>", 'attributes': { 'b': 2 }, 'dimensions': { 'a': 1 } }, { 'class': "<class 'vsm.vector.Vector'>", 'attributes': { 'd': 4 }, 'dimensions': { 'c': 3 } }], 'e': 5, }, Exportable.encode(data))
def test_get_class(self): """ Test getting the class name from a string. """ self.assertEqual( 'Document', Exportable.get_class("<class 'nlp.document.Document'>"))
def test_get_module_alias(self): """ Test that when loading the module and it starts with an alias, it is replaced. """ self.assertEqual( 'nlp.weighting.tfidf', Exportable.get_module("<class 'nlp.term_weighting.tfidf.TFIDF'>"))
def from_array(array): """ Create a :class:`~vsm.clustering.cluster.Cluster` instance from the given associative array. :param array: The associative array with the attributes to create the cluster. :type array: dict :return: A new instance of an object with the same attributes stored in the object. :rtype: :class:`~vsm.clustering.cluster.Cluster` """ vectors = [] for vector in array.get('vectors'): module = importlib.import_module( Exportable.get_module(vector.get('class'))) cls = getattr(module, Exportable.get_class(vector.get('class'))) vectors.append(cls.from_array(vector)) return Cluster(vectors=vectors, attributes=array.get('attributes'))
def test_encode_primitive_copy(self): """ Test that when encoding a dictionary of primitives, the encoding is a copy. """ data = {'a': 1, 'b': {'c': 1}} encoding = Exportable.encode({'a': 1, 'b': {'c': 1}}) self.assertEqual(data, encoding) data['b']['c'] = 2 self.assertEqual(2, data['b']['c']) self.assertEqual(1, encoding['b']['c'])
def test_encode_vector(self): """ Test that when encoding a vector, it is converted into a dictionary. """ v = Vector({'a': 1}, {'b': 2}) data = {'vector': v} encoding = Exportable.encode(data) json.loads(json.dumps(encoding)) self.assertEqual("<class 'vsm.vector.Vector'>", encoding['vector']['class']) self.assertEqual({'a': 1}, encoding['vector']['dimensions']) self.assertEqual({'b': 2}, encoding['vector']['attributes'])
def test_tokenize_corpus_normalized(self): """ Test that the documents returned by the corpus tokenization are normalized. """ """ Load the corpus. """ filename = os.path.join(os.path.dirname(__file__), '..', '..', 'tests', 'corpora', 'understanding', 'CRYCHE.json') corpus = [] with open(filename) as f: for i, line in enumerate(f): tweet = json.loads(line) original = tweet while "retweeted_status" in tweet: tweet = tweet["retweeted_status"] if "extended_tweet" in tweet: text = tweet["extended_tweet"].get("full_text", tweet.get("text", "")) else: text = tweet.get("text", "") document = Document(text) corpus.append(document) """ Load the TF-IDF scheme. """ idf_filename = os.path.join(os.path.dirname(__file__), '..', '..', 'tests', 'corpora', 'idf.json') with open(idf_filename) as f: scheme = Exportable.decode(json.loads(f.readline()))['tfidf'] """ Tokenize the corpus. """ tokenizer = Tokenizer(stopwords=stopwords.words('english'), normalize_words=True, character_normalization_count=3, remove_unicode_entities=True) apd = ELDParticipantDetector(extractor=EntityExtractor()) corpus = apd._tokenize_corpus(corpus, scheme, tokenizer) self.assertTrue( all( round(vector_math.magnitude(document), 10) in [0, 1] for document in corpus))
def test_encode_vector_list(self): """ Test that when encoding a list of vectors, the list's items are encoded as well. """ v = [Vector({'a': 1}, {'b': 2}), Vector({'c': 3}, {'d': 4})] self.assertEqual([{ 'class': "<class 'vsm.vector.Vector'>", 'attributes': { 'b': 2 }, 'dimensions': { 'a': 1 } }, { 'class': "<class 'vsm.vector.Vector'>", 'attributes': { 'd': 4 }, 'dimensions': { 'c': 3 } }], Exportable.encode(v))
def test_encode_empty_dict(self): """ Test that when encoding an empty dictionary, another empty dictionary is returned. """ self.assertEqual({}, Exportable.encode({}))
def test_get_module_class_only(self): """ Test that when getting the module name from a string that contains only a class name, nothing is returned. """ self.assertEqual('', Exportable.get_module("<class 'Document'>"))