class UtilityMethodsTests(unittest.TestCase):
    def setUp(self):
        self.phraseVector = {'project':1, 'cluster':1, 'highdimensional':1, 'streams':1}
        self.phraseTextAndDimensionMap = TwoWayMap()
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'project', 0)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 1)
        self.phraseTextToPhraseObjectMap = {'project': Phrase('project', test_time, score=8), 'cluster': Phrase('cluster', test_time, score=8), 'abcd': Phrase('abcd', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], score=8)}
        self.vector = Vector({0:1, 1:1, 2:1, 3:1})
        self.initial_max_dimensions = stream_settings['dimensions']
        stream_settings['dimensions'] = 2
    def tearDown(self): stream_settings['dimensions'] = self.initial_max_dimensions
    def test_updatedPhraseObject_PhraseObjectScoresAreUpdatedCorrectly(self): 
        UtilityMethods.updatePhraseTextToPhraseObject(self.phraseVector, test_time+timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings)
        self.assertEqual(5, len(self.phraseTextToPhraseObjectMap))
        self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score)
        self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)
    
    def test_updatedPhraseObject_phrase_does_not_exist_in_phraseToIdMap_but_exists_in_phraseTextToPhraseObjectMap_with_dimensions_full(self): 
        stream_settings['dimensions'] = 1
        self.phraseTextAndDimensionMap.remove(TwoWayMap.MAP_FORWARD, 'cluster')
        UtilityMethods.updatePhraseTextToPhraseObject(self.phraseVector, test_time+timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings)
        self.assertEqual({'project':0}, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
        self.assertEqual(5, len(self.phraseTextToPhraseObjectMap))
        self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score)
        self.assertEqual(5, self.phraseTextToPhraseObjectMap['cluster'].score)
        self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)

    def test_createOrAddNewPhraseObject(self):
        UtilityMethods.createOrAddNewPhraseObject('new_phrase', self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        UtilityMethods.createOrAddNewPhraseObject('project', self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual(4, len(self.phraseTextToPhraseObjectMap))
        self.assertEqual(1, self.phraseTextToPhraseObjectMap['new_phrase'].score)
        self.assertEqual(9, self.phraseTextToPhraseObjectMap['project'].score)
    
    def test_updateDimensions_when_phraseTextToIdMap_is_filled_to_max_dimensions(self):
        for phrase, score in zip(['added'], range(10,11)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual({'project':0, 'added': 1}, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
    
    def test_updateDimensions_when_phraseTextToIdMap_is_filled_to_max_dimensions_and_entire_map_is_changed(self):
        for phrase, score in zip(['added', 'are'], range(10,12)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual({'added':1, 'are': 0}, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
    
    def test_updateDimensions_when_phraseTextToIdMap_has_lesser_than_max_dimensions(self):
        stream_settings['dimensions'] = 4
        for phrase, score in zip(['new', 'phrases', 'are', 'added'], range(7,11)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual(set({'project':0, 'phrases': 1, 'are':2, 'added':3}), set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)))
        self.assertEqual(4, len(self.phraseTextAndDimensionMap))
    
    def test_updateDimensions_when_phrases_with_lower_id_are_removed_from_phraseTextToIdMap(self):
        stream_settings['dimensions'] = 3
        for phrase, score in zip(['new', 'phrases', 'are'], range(100,103)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 2)
        self.phraseTextToPhraseObjectMap['cluster'].score=100
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual(range(3), sorted(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD).values()))
    
    def test_updateDimensions_remove_old_phrases(self):
        originalTime=self.phraseTextToPhraseObjectMap['abcd'].latestOccuranceTime
        self.phraseTextToPhraseObjectMap['abcd'].latestOccuranceTime=test_time
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertTrue('abcd' in self.phraseTextToPhraseObjectMap)
        self.phraseTextToPhraseObjectMap['abcd'].latestOccuranceTime=originalTime
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertTrue('abcd' not in self.phraseTextToPhraseObjectMap)
    
    
    def test_updateDimensions_when_dimensions_have_to_be_removed(self):
        stream_settings['dimensions'] = 4
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdx', 2)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdxy', 3)
        for phrase, score in zip(['new_text'], range(7,8)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score)
        self.phraseTextToPhraseObjectMap['cluster'].latestOccuranceTime=test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds']
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings)
        self.assertEqual(set({'project':0, 'new_text': 1}), set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)))
    
    def test_checkCriticalErrorsInPhraseTextToIdMap_larger_than_expected_dimensions(self):
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'sdfsd', 3)
        print 'Ignore this message: ',
        self.assertRaises(SystemExit, UtilityMethods.checkCriticalErrorsInPhraseTextToIdMap, self.phraseTextAndDimensionMap, **stream_settings)
    
    def test_pruningConditionDeterministic(self):
        phrase1 = Phrase('dsf', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], 1)
        phrase2 = Phrase('dsf', test_time, 1)
        self.assertTrue(UtilityMethods.pruningConditionDeterministic(phrase1, test_time, **stream_settings))
        self.assertFalse(UtilityMethods.pruningConditionDeterministic(phrase2, test_time, **stream_settings))
    
    def test_pruningConditionRandom(self):
        phrase1 = Phrase('dsf', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], 1)
        phrase2 = Phrase('dsf', test_time, 1)
        self.assertTrue(UtilityMethods.pruningConditionRandom(phrase1, test_time, **stream_settings))
        self.assertFalse(UtilityMethods.pruningConditionRandom(phrase2, test_time, **stream_settings))
    
    def test_pruneUnnecessaryPhrases(self):
        phraseTextToPhraseObjectMap = {'dsf': Phrase('dsf', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], 1), 'abc': Phrase('abc', test_time, 1)}
        UtilityMethods.pruneUnnecessaryPhrases(phraseTextToPhraseObjectMap, test_time, UtilityMethods.pruningConditionRandom, **stream_settings)
        self.assertTrue('dsf' not in phraseTextToPhraseObjectMap)
        self.assertTrue('abc' in phraseTextToPhraseObjectMap)
Ejemplo n.º 2
0
class UtilityMethodsTests(unittest.TestCase):
    def setUp(self):
        self.phraseVector = {
            'project': 1,
            'cluster': 1,
            'highdimensional': 1,
            'streams': 1
        }
        self.phraseTextAndDimensionMap = TwoWayMap()
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'project', 0)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 1)
        self.phraseTextToPhraseObjectMap = {
            'project':
            Phrase('project', test_time, score=8),
            'cluster':
            Phrase('cluster', test_time, score=8),
            'abcd':
            Phrase(
                'abcd',
                test_time -
                3 * stream_settings['max_phrase_inactivity_time_in_seconds'],
                score=8)
        }
        self.vector = Vector({0: 1, 1: 1, 2: 1, 3: 1})
        self.initial_max_dimensions = stream_settings['dimensions']
        stream_settings['dimensions'] = 2

    def tearDown(self):
        stream_settings['dimensions'] = self.initial_max_dimensions

    def test_updatedPhraseObject_PhraseObjectScoresAreUpdatedCorrectly(self):
        UtilityMethods.updatePhraseTextToPhraseObject(
            self.phraseVector, test_time + timedelta(seconds=60),
            self.phraseTextToPhraseObjectMap, **stream_settings)
        self.assertEqual(5, len(self.phraseTextToPhraseObjectMap))
        self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score)
        self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)

    def test_updatedPhraseObject_phrase_does_not_exist_in_phraseToIdMap_but_exists_in_phraseTextToPhraseObjectMap_with_dimensions_full(
            self):
        stream_settings['dimensions'] = 1
        self.phraseTextAndDimensionMap.remove(TwoWayMap.MAP_FORWARD, 'cluster')
        UtilityMethods.updatePhraseTextToPhraseObject(
            self.phraseVector, test_time + timedelta(seconds=60),
            self.phraseTextToPhraseObjectMap, **stream_settings)
        self.assertEqual({'project': 0},
                         self.phraseTextAndDimensionMap.getMap(
                             TwoWayMap.MAP_FORWARD))
        self.assertEqual(5, len(self.phraseTextToPhraseObjectMap))
        self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score)
        self.assertEqual(5, self.phraseTextToPhraseObjectMap['cluster'].score)
        self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)

    def test_createOrAddNewPhraseObject(self):
        UtilityMethods.createOrAddNewPhraseObject(
            'new_phrase', self.phraseTextToPhraseObjectMap, test_time,
            **stream_settings)
        UtilityMethods.createOrAddNewPhraseObject(
            'project', self.phraseTextToPhraseObjectMap, test_time,
            **stream_settings)
        self.assertEqual(4, len(self.phraseTextToPhraseObjectMap))
        self.assertEqual(1,
                         self.phraseTextToPhraseObjectMap['new_phrase'].score)
        self.assertEqual(9, self.phraseTextToPhraseObjectMap['project'].score)

    def test_updateDimensions_when_phraseTextToIdMap_is_filled_to_max_dimensions(
            self):
        for phrase, score in zip(['added'], range(10, 11)):
            self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase,
                                                              test_time,
                                                              score=score)
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap,
                                        self.phraseTextToPhraseObjectMap,
                                        test_time, **stream_settings)
        self.assertEqual({
            'project': 0,
            'added': 1
        }, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))

    def test_updateDimensions_when_phraseTextToIdMap_is_filled_to_max_dimensions_and_entire_map_is_changed(
            self):
        for phrase, score in zip(['added', 'are'], range(10, 12)):
            self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase,
                                                              test_time,
                                                              score=score)
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap,
                                        self.phraseTextToPhraseObjectMap,
                                        test_time, **stream_settings)
        self.assertEqual({
            'added': 1,
            'are': 0
        }, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))

    def test_updateDimensions_when_phraseTextToIdMap_has_lesser_than_max_dimensions(
            self):
        stream_settings['dimensions'] = 4
        for phrase, score in zip(['new', 'phrases', 'are', 'added'],
                                 range(7, 11)):
            self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase,
                                                              test_time,
                                                              score=score)
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap,
                                        self.phraseTextToPhraseObjectMap,
                                        test_time, **stream_settings)
        self.assertEqual(
            set({
                'project': 0,
                'phrases': 1,
                'are': 2,
                'added': 3
            }),
            set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)))
        self.assertEqual(4, len(self.phraseTextAndDimensionMap))

    def test_updateDimensions_when_phrases_with_lower_id_are_removed_from_phraseTextToIdMap(
            self):
        stream_settings['dimensions'] = 3
        for phrase, score in zip(['new', 'phrases', 'are'], range(100, 103)):
            self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase,
                                                              test_time,
                                                              score=score)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 2)
        self.phraseTextToPhraseObjectMap['cluster'].score = 100
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap,
                                        self.phraseTextToPhraseObjectMap,
                                        test_time, **stream_settings)
        self.assertEqual(
            range(3),
            sorted(
                self.phraseTextAndDimensionMap.getMap(
                    TwoWayMap.MAP_FORWARD).values()))

    def test_updateDimensions_remove_old_phrases(self):
        originalTime = self.phraseTextToPhraseObjectMap[
            'abcd'].latestOccuranceTime
        self.phraseTextToPhraseObjectMap[
            'abcd'].latestOccuranceTime = test_time
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap,
                                        self.phraseTextToPhraseObjectMap,
                                        test_time, **stream_settings)
        self.assertTrue('abcd' in self.phraseTextToPhraseObjectMap)
        self.phraseTextToPhraseObjectMap[
            'abcd'].latestOccuranceTime = originalTime
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap,
                                        self.phraseTextToPhraseObjectMap,
                                        test_time, **stream_settings)
        self.assertTrue('abcd' not in self.phraseTextToPhraseObjectMap)

    def test_updateDimensions_when_dimensions_have_to_be_removed(self):
        stream_settings['dimensions'] = 4
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdx', 2)
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdxy', 3)
        for phrase, score in zip(['new_text'], range(7, 8)):
            self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase,
                                                              test_time,
                                                              score=score)
        self.phraseTextToPhraseObjectMap[
            'cluster'].latestOccuranceTime = test_time - 3 * stream_settings[
                'max_phrase_inactivity_time_in_seconds']
        UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap,
                                        self.phraseTextToPhraseObjectMap,
                                        test_time, **stream_settings)
        self.assertEqual(
            set({
                'project': 0,
                'new_text': 1
            }),
            set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)))

    def test_checkCriticalErrorsInPhraseTextToIdMap_larger_than_expected_dimensions(
            self):
        self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'sdfsd', 3)
        print 'Ignore this message: ',
        self.assertRaises(
            SystemExit, UtilityMethods.checkCriticalErrorsInPhraseTextToIdMap,
            self.phraseTextAndDimensionMap, **stream_settings)

    def test_pruningConditionDeterministic(self):
        phrase1 = Phrase(
            'dsf', test_time -
            3 * stream_settings['max_phrase_inactivity_time_in_seconds'], 1)
        phrase2 = Phrase('dsf', test_time, 1)
        self.assertTrue(
            UtilityMethods.pruningConditionDeterministic(
                phrase1, test_time, **stream_settings))
        self.assertFalse(
            UtilityMethods.pruningConditionDeterministic(
                phrase2, test_time, **stream_settings))

    def test_pruningConditionRandom(self):
        phrase1 = Phrase(
            'dsf', test_time -
            3 * stream_settings['max_phrase_inactivity_time_in_seconds'], 1)
        phrase2 = Phrase('dsf', test_time, 1)
        self.assertTrue(
            UtilityMethods.pruningConditionRandom(phrase1, test_time,
                                                  **stream_settings))
        self.assertFalse(
            UtilityMethods.pruningConditionRandom(phrase2, test_time,
                                                  **stream_settings))

    def test_pruneUnnecessaryPhrases(self):
        phraseTextToPhraseObjectMap = {
            'dsf':
            Phrase(
                'dsf', test_time -
                3 * stream_settings['max_phrase_inactivity_time_in_seconds'],
                1),
            'abc':
            Phrase('abc', test_time, 1)
        }
        UtilityMethods.pruneUnnecessaryPhrases(
            phraseTextToPhraseObjectMap, test_time,
            UtilityMethods.pruningConditionRandom, **stream_settings)
        self.assertTrue('dsf' not in phraseTextToPhraseObjectMap)
        self.assertTrue('abc' in phraseTextToPhraseObjectMap)