Exemple #1
0
    def test_decode_primitive_list_in_dict(self):
        """
        Test that when decoding a dictionary with a list of primitives in it, the list's items are decoded as well.
        """

        data = {'a': [1, 2], 'b': 3}
        self.assertEqual(data, Exportable.decode(Exportable.encode(data)))
Exemple #2
0
    def test_decode_dict_in_list(self):
        """
        Test that when decoding a list with a dictionary in it, the dictionary is decoded as well.
        """

        data = [{'a': [1, 2], 'b': 3}, 5]
        self.assertEqual(data, Exportable.decode(Exportable.encode(data)))
Exemple #3
0
    def test_decode_primitive_list(self):
        """
        Test that when decoding a list of primitives, the list's items are unchanged.
        """

        data = [1, 2]
        self.assertEqual(data, Exportable.decode(Exportable.encode(data)))
Exemple #4
0
    def test_decode_vector(self):
        """
        Test that when decoding a vector, its array representation is returned.
        """

        v = Vector({'a': 1}, {'b': 2})
        encoded = Exportable.encode(v)
        decoded = Exportable.decode(encoded)
        self.assertEqual(v.dimensions, decoded.dimensions)
        self.assertEqual(v.attributes, decoded.attributes)
Exemple #5
0
    def test_decode_vector(self):
        """
        Test that when decoding a vector, it is converted into a dictionary.
        """

        v = Vector({'a': 1}, {'b': 2})
        data = Exportable.encode({'vector': v})
        decoded = Exportable.decode(data)
        self.assertTrue({v}, decoded.keys())
        self.assertEqual(v.__dict__, decoded['vector'].__dict__)
Exemple #6
0
    def test_decode_nested(self):
        """
        Test that when decoding an exportable object that has an exportable object, the highest one is decoded.
        """

        tfidf = TFIDF(idf={'a': 1}, documents=10)
        data = Exportable.encode({'tfidf': tfidf})
        decoded = Exportable.decode(data)
        self.assertTrue({tfidf}, decoded.keys())
        self.assertEqual(tfidf.local_scheme.__dict__,
                         decoded['tfidf'].local_scheme.__dict__)
        self.assertEqual(tfidf.global_scheme.__dict__,
                         decoded['tfidf'].global_scheme.__dict__)
Exemple #7
0
    def test_decode_vector_list(self):
        """
        Test that when decoding a list of vectors, the list's items are decoded as well.
        """

        vectors = [Vector({'a': 1}, {'b': 2}), Vector({'c': 3}, {'d': 4})]
        encoded = Exportable.encode(vectors)
        decoded = Exportable.decode(encoded)
        self.assertTrue(
            all(vector.dimensions == v.dimensions
                for vector, v in zip(vectors, decoded)))
        self.assertTrue(
            all(vector.attributes == v.attributes
                for vector, v in zip(vectors, decoded)))
Exemple #8
0
    def test_get_class_class_only(self):
        """
        Test that when getting the class name from a string that contains only a class name, that name is returned.
        """

        self.assertEqual('Document',
                         Exportable.get_class("<class 'Document'>"))
Exemple #9
0
    def test_encode_primitive_dict(self):
        """
        Test that when encoding a dictionary with primitive values, the same dictionary is returned.
        """

        data = {'a': 1, 'b': [1, 2]}
        self.assertEqual(data, Exportable.encode({'a': 1, 'b': [1, 2]}))
Exemple #10
0
    def test_decode_vector_list_in_dict(self):
        """
        Test that when decoding a dictionary with a list of vectors in it, the list's items are decoded as well.
        """

        v = [Vector({'a': 1}, {'b': 2}), Vector({'c': 3}, {'d': 4})]
        data = {'a': v, 'e': 5}
        encoded = Exportable.encode(data)
        decoded = Exportable.decode(encoded)
        self.assertTrue(
            all(vector.dimensions == v.dimensions
                for vector, v in zip(v, decoded['a'])))
        self.assertTrue(
            all(vector.attributes == v.attributes
                for vector, v in zip(v, decoded['a'])))
        self.assertEqual(5, decoded['e'])
Exemple #11
0
    def test_encode_primitive_recursive_dict(self):
        """
        Test that when encoding a dictionary with primitive values stored recursively, the same dictionary is returned.
        """

        data = {'a': 1, 'b': {'c': 1}}
        self.assertEqual(data, Exportable.encode({'a': 1, 'b': {'c': 1}}))
Exemple #12
0
    def test_encode_vector(self):
        """
        Test that when encoding a vector, its array representation is returned.
        """

        v = Vector({'a': 1}, {'b': 2})
        self.assertEqual(v.to_array(), Exportable.encode(v))
Exemple #13
0
    def test_encode_vector_list_in_dict(self):
        """
        Test that when encoding a dictionary with a list of vectors in it, the list's items are encoded as well.
        """

        v = [Vector({'a': 1}, {'b': 2}), Vector({'c': 3}, {'d': 4})]
        data = {'a': v, 'e': 5}
        self.assertEqual(
            {
                'a': [{
                    'class': "<class 'vsm.vector.Vector'>",
                    'attributes': {
                        'b': 2
                    },
                    'dimensions': {
                        'a': 1
                    }
                }, {
                    'class': "<class 'vsm.vector.Vector'>",
                    'attributes': {
                        'd': 4
                    },
                    'dimensions': {
                        'c': 3
                    }
                }],
                'e':
                5,
            }, Exportable.encode(data))
Exemple #14
0
    def test_get_class(self):
        """
        Test getting the class name from a string.
        """

        self.assertEqual(
            'Document',
            Exportable.get_class("<class 'nlp.document.Document'>"))
Exemple #15
0
    def test_get_module_alias(self):
        """
        Test that when loading the module and it starts with an alias, it is replaced.
        """

        self.assertEqual(
            'nlp.weighting.tfidf',
            Exportable.get_module("<class 'nlp.term_weighting.tfidf.TFIDF'>"))
Exemple #16
0
    def from_array(array):
        """
        Create a :class:`~vsm.clustering.cluster.Cluster` instance from the given associative array.

        :param array: The associative array with the attributes to create the cluster.
        :type array: dict

        :return: A new instance of an object with the same attributes stored in the object.
        :rtype: :class:`~vsm.clustering.cluster.Cluster`
        """

        vectors = []
        for vector in array.get('vectors'):
            module = importlib.import_module(
                Exportable.get_module(vector.get('class')))
            cls = getattr(module, Exportable.get_class(vector.get('class')))
            vectors.append(cls.from_array(vector))

        return Cluster(vectors=vectors, attributes=array.get('attributes'))
Exemple #17
0
    def test_encode_primitive_copy(self):
        """
        Test that when encoding a dictionary of primitives, the encoding is a copy.
        """

        data = {'a': 1, 'b': {'c': 1}}
        encoding = Exportable.encode({'a': 1, 'b': {'c': 1}})
        self.assertEqual(data, encoding)
        data['b']['c'] = 2
        self.assertEqual(2, data['b']['c'])
        self.assertEqual(1, encoding['b']['c'])
Exemple #18
0
    def test_encode_vector(self):
        """
        Test that when encoding a vector, it is converted into a dictionary.
        """

        v = Vector({'a': 1}, {'b': 2})
        data = {'vector': v}
        encoding = Exportable.encode(data)
        json.loads(json.dumps(encoding))
        self.assertEqual("<class 'vsm.vector.Vector'>",
                         encoding['vector']['class'])
        self.assertEqual({'a': 1}, encoding['vector']['dimensions'])
        self.assertEqual({'b': 2}, encoding['vector']['attributes'])
Exemple #19
0
    def test_tokenize_corpus_normalized(self):
        """
        Test that the documents returned by the corpus tokenization are normalized.
        """
        """
        Load the corpus.
        """
        filename = os.path.join(os.path.dirname(__file__), '..', '..', 'tests',
                                'corpora', 'understanding', 'CRYCHE.json')
        corpus = []
        with open(filename) as f:
            for i, line in enumerate(f):
                tweet = json.loads(line)
                original = tweet
                while "retweeted_status" in tweet:
                    tweet = tweet["retweeted_status"]

                if "extended_tweet" in tweet:
                    text = tweet["extended_tweet"].get("full_text",
                                                       tweet.get("text", ""))
                else:
                    text = tweet.get("text", "")

                document = Document(text)
                corpus.append(document)
        """
        Load the TF-IDF scheme.
        """
        idf_filename = os.path.join(os.path.dirname(__file__), '..', '..',
                                    'tests', 'corpora', 'idf.json')
        with open(idf_filename) as f:
            scheme = Exportable.decode(json.loads(f.readline()))['tfidf']
        """
        Tokenize the corpus.
        """
        tokenizer = Tokenizer(stopwords=stopwords.words('english'),
                              normalize_words=True,
                              character_normalization_count=3,
                              remove_unicode_entities=True)
        apd = ELDParticipantDetector(extractor=EntityExtractor())
        corpus = apd._tokenize_corpus(corpus, scheme, tokenizer)
        self.assertTrue(
            all(
                round(vector_math.magnitude(document), 10) in [0, 1]
                for document in corpus))
Exemple #20
0
    def test_encode_vector_list(self):
        """
        Test that when encoding a list of vectors, the list's items are encoded as well.
        """

        v = [Vector({'a': 1}, {'b': 2}), Vector({'c': 3}, {'d': 4})]
        self.assertEqual([{
            'class': "<class 'vsm.vector.Vector'>",
            'attributes': {
                'b': 2
            },
            'dimensions': {
                'a': 1
            }
        }, {
            'class': "<class 'vsm.vector.Vector'>",
            'attributes': {
                'd': 4
            },
            'dimensions': {
                'c': 3
            }
        }], Exportable.encode(v))
Exemple #21
0
    def test_encode_empty_dict(self):
        """
        Test that when encoding an empty dictionary, another empty dictionary is returned.
        """

        self.assertEqual({}, Exportable.encode({}))
Exemple #22
0
    def test_get_module_class_only(self):
        """
        Test that when getting the module name from a string that contains only a class name, nothing is returned.
        """

        self.assertEqual('', Exportable.get_module("<class 'Document'>"))