def getClusterForDocument(self, document): UtilityMethods.updatePhraseTextAndDimensionsMap(document, self.phraseTextAndDimensionMap, **self.clustering_settings) document.setSignatureUsingVectorPermutations(self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap) predictedCluster = None possibleNearestNeighbors = reduce(lambda x,y:x.union(y), (permutation.getNearestDocuments(document) for permutation in self.signaturePermutations), set()) if possibleNearestNeighbors: predictedCluster = max(((clusterId, self.clusters[clusterId].cosineSimilarity(document)) for clusterId in possibleNearestNeighbors), key=itemgetter(1)) if predictedCluster and predictedCluster[1]>=self.thresholdForDocumentToBeInACluster:return predictedCluster[0]
def test_updatedPhraseObject_PhraseObjectScoresAreUpdatedCorrectly(self): UtilityMethods.updatePhraseTextToPhraseObject( self.phraseVector, test_time + timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings) self.assertEqual(5, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score) self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)
def cluster(self, dataIterator): i = 1 for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) # message = data if DataStreamMethods.messageInOrder(message.timeStamp): UtilityMethods.updatePhraseTextToPhraseObject( message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) if message.streamId not in self.streamIdToStreamObjectMap: self.streamIdToStreamObjectMap[message.streamId] = Stream( message.streamId, message) else: self.streamIdToStreamObjectMap[ message.streamId].updateForMessage( message, VectorUpdateMethods.exponentialDecay, **self.stream_settings) streamObject = self.streamIdToStreamObjectMap[message.streamId] self.updateDimensionsMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterFilteringMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterAnalysisMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.getClusterAndUpdateExistingClusters(streamObject)
def cluster(self, dataIterator): i = 1 for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) # message = data if DataStreamMethods.messageInOrder(message.timeStamp): UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) if message.streamId not in self.streamIdToStreamObjectMap: self.streamIdToStreamObjectMap[message.streamId] = Stream(message.streamId, message) self.getClusterAndUpdateExistingClusters(self.streamIdToStreamObjectMap[message.streamId]) else: previousStreamObject=Vector(vectorInitialValues=self.streamIdToStreamObjectMap[message.streamId]) self.streamIdToStreamObjectMap[message.streamId].updateForMessage(message, VectorUpdateMethods.exponentialDecay, **self.stream_settings ) streamObject=self.streamIdToStreamObjectMap[message.streamId] distance = Vector.euclideanDistance(streamObject, previousStreamObject) if distance>10: # print i, len(self.clusters), distance self.getClusterAndUpdateExistingClusters(self.streamIdToStreamObjectMap[message.streamId]) self.updateDimensionsMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterFilteringMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) # self.clusterAnalysisMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterAnalysisMethod.call(time.time(), hdStreamClusteringObject=self, currentMessageTime=message.timeStamp, numberOfMessages=i) # print i, len(self.clusters) i+=1
def test_updateDimensions_when_phrases_with_lower_id_are_removed_from_phraseTextToIdMap(self): stream_settings['dimensions'] = 3 for phrase, score in zip(['new', 'phrases', 'are'], range(100,103)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score) self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 2) self.phraseTextToPhraseObjectMap['cluster'].score=100 UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual(range(3), sorted(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD).values()))
def test_updateDimensions_when_dimensions_have_to_be_removed(self): stream_settings['dimensions'] = 4 self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdx', 2) self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdxy', 3) for phrase, score in zip(['new_text'], range(7,8)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score) self.phraseTextToPhraseObjectMap['cluster'].latestOccuranceTime=test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'] UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual(set({'project':0, 'new_text': 1}), set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)))
def getNearestDocument(self, document): UtilityMethods.updatePhraseTextAndDimensionsMap(document, self.phraseTextAndDimensionMap, **self.settings) document.setSignatureUsingVectorPermutations(self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap) predictedNeighbor = None possibleNearestNeighbors = reduce(lambda x,y:x.union(y), (permutation.getNearestDocuments(document) for permutation in self.signaturePermutations), set()) if possibleNearestNeighbors: predictedNeighbor = max(((docId, self.documentIdToDocumentMap[docId].cosineSimilarity(document)) for docId in possibleNearestNeighbors), key=itemgetter(1)) print predictedNeighbor if predictedNeighbor and predictedNeighbor[1]>=self.nearestNeighborThreshold:return predictedNeighbor[0]
def update(self, newDocument): UtilityMethods.updatePhraseTextAndDimensionsMap(newDocument, self.phraseTextAndDimensionMap, **self.settings) currentDocument = self.documentIdToDocumentMap.get(newDocument.docId, None) self.documentIdToDocumentMap[newDocument.docId] = newDocument newDocument.setSignatureUsingVectorPermutations(self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap) for permutation in self.signaturePermutations: if currentDocument!=None: permutation.removeDocument(currentDocument) permutation.addDocument(newDocument)
def updateDimensions(hdStreamClusteringObject, currentMessageTime): # Update dimensions. UtilityMethods.updateDimensions( hdStreamClusteringObject.phraseTextAndDimensionMap, hdStreamClusteringObject.phraseTextToPhraseObjectMap, currentMessageTime, **hdStreamClusteringObject.stream_settings) DataStreamMethods._resetClustersInSignatureTries( hdStreamClusteringObject, currentMessageTime)
def run(self, dataIterator, estimationMethod, parameterSpecificDataCollectionMethod=None): estimationMethod = FixedIntervalMethod(estimationMethod, self.timeUnitInSeconds) for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) if CDA.messageInOrder(message.timeStamp): if parameterSpecificDataCollectionMethod != None: parameterSpecificDataCollectionMethod(estimationObject=self, message=message) UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) estimationMethod.call(message.timeStamp, estimationObject=self, currentMessageTime=message.timeStamp)
def test_updatePhraseTextAndDimensionsMap_PhraseMapHasLesserDimensions( self): settings['dimensions'] = 4 UtilityMethods.updatePhraseTextAndDimensionsMap( self.phraseVector, self.phraseTextAndDimensionMap, **settings) self.assertEqual( self.finalPhraseToIdMap, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
def test_updateDimensions_remove_old_phrases(self): originalTime=self.phraseTextToPhraseObjectMap['abcd'].latestOccuranceTime self.phraseTextToPhraseObjectMap['abcd'].latestOccuranceTime=test_time UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertTrue('abcd' in self.phraseTextToPhraseObjectMap) self.phraseTextToPhraseObjectMap['abcd'].latestOccuranceTime=originalTime UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertTrue('abcd' not in self.phraseTextToPhraseObjectMap)
def test_updatedPhraseObject_phrase_does_not_exist_in_phraseToIdMap_but_exists_in_phraseTextToPhraseObjectMap_with_dimensions_full(self): stream_settings['dimensions'] = 1 self.phraseTextAndDimensionMap.remove(TwoWayMap.MAP_FORWARD, 'cluster') UtilityMethods.updatePhraseTextToPhraseObject(self.phraseVector, test_time+timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings) self.assertEqual({'project':0}, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)) self.assertEqual(5, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score) self.assertEqual(5, self.phraseTextToPhraseObjectMap['cluster'].score) self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)
def test_updatePhraseTextAndDimensionsMap_PhraseMapHasMaximumDimensions( self): UtilityMethods.updatePhraseTextAndDimensionsMap( self.phraseVector, self.phraseTextAndDimensionMap, **settings) for k in ['streams', 'highdimensional']: del self.finalPhraseToIdMap[k] self.assertEqual( self.finalPhraseToIdMap, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
def test_createOrAddNewPhraseObject(self): UtilityMethods.createOrAddNewPhraseObject( 'new_phrase', self.phraseTextToPhraseObjectMap, test_time, **stream_settings) UtilityMethods.createOrAddNewPhraseObject( 'project', self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual(4, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(1, self.phraseTextToPhraseObjectMap['new_phrase'].score) self.assertEqual(9, self.phraseTextToPhraseObjectMap['project'].score)
def test_pruningConditionRandom(self): phrase1 = Phrase( 'dsf', test_time - 3 * stream_settings['max_phrase_inactivity_time_in_seconds'], 1) phrase2 = Phrase('dsf', test_time, 1) self.assertTrue( UtilityMethods.pruningConditionRandom(phrase1, test_time, **stream_settings)) self.assertFalse( UtilityMethods.pruningConditionRandom(phrase2, test_time, **stream_settings))
def update(self, newDocument): UtilityMethods.updatePhraseTextAndDimensionsMap( newDocument, self.phraseTextAndDimensionMap, **self.settings) currentDocument = self.documentIdToDocumentMap.get( newDocument.docId, None) self.documentIdToDocumentMap[newDocument.docId] = newDocument newDocument.setSignatureUsingVectorPermutations( self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap) for permutation in self.signaturePermutations: if currentDocument != None: permutation.removeDocument(currentDocument) permutation.addDocument(newDocument)
def test_updateDimensions_when_phraseTextToIdMap_is_filled_to_max_dimensions( self): for phrase, score in zip(['added'], range(10, 11)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score) UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual({ 'project': 0, 'added': 1 }, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
def cluster(self, dataIterator): i = 1 for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) # message = data if DataStreamMethods.messageInOrder(message.timeStamp): UtilityMethods.updatePhraseTextToPhraseObject( message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) if message.streamId not in self.streamIdToStreamObjectMap: self.streamIdToStreamObjectMap[message.streamId] = Stream( message.streamId, message) self.getClusterAndUpdateExistingClusters( self.streamIdToStreamObjectMap[message.streamId]) else: previousStreamObject = Vector( vectorInitialValues=self.streamIdToStreamObjectMap[ message.streamId]) self.streamIdToStreamObjectMap[ message.streamId].updateForMessage( message, VectorUpdateMethods.exponentialDecay, **self.stream_settings) streamObject = self.streamIdToStreamObjectMap[ message.streamId] distance = Vector.euclideanDistance( streamObject, previousStreamObject) if distance > 10: # print i, len(self.clusters), distance self.getClusterAndUpdateExistingClusters( self.streamIdToStreamObjectMap[message.streamId]) self.updateDimensionsMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterFilteringMethod.call( message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) # self.clusterAnalysisMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterAnalysisMethod.call( time.time(), hdStreamClusteringObject=self, currentMessageTime=message.timeStamp, numberOfMessages=i) # print i, len(self.clusters) i += 1
def cluster(self, dataIterator): i = 1 for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) # message = data if DataStreamMethods.messageInOrder(message.timeStamp): UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) if message.streamId not in self.streamIdToStreamObjectMap: self.streamIdToStreamObjectMap[message.streamId] = Stream(message.streamId, message) else: self.streamIdToStreamObjectMap[message.streamId].updateForMessage(message, VectorUpdateMethods.exponentialDecay, **self.stream_settings ) streamObject=self.streamIdToStreamObjectMap[message.streamId] self.updateDimensionsMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterFilteringMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.clusterAnalysisMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp) self.getClusterAndUpdateExistingClusters(streamObject)
def test_updatedPhraseObject_phrase_does_not_exist_in_phraseToIdMap_but_exists_in_phraseTextToPhraseObjectMap_with_dimensions_full( self): stream_settings['dimensions'] = 1 self.phraseTextAndDimensionMap.remove(TwoWayMap.MAP_FORWARD, 'cluster') UtilityMethods.updatePhraseTextToPhraseObject( self.phraseVector, test_time + timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings) self.assertEqual({'project': 0}, self.phraseTextAndDimensionMap.getMap( TwoWayMap.MAP_FORWARD)) self.assertEqual(5, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score) self.assertEqual(5, self.phraseTextToPhraseObjectMap['cluster'].score) self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)
def test_pruneUnnecessaryPhrases(self): phraseTextToPhraseObjectMap = { 'dsf': Phrase( 'dsf', test_time - 3 * stream_settings['max_phrase_inactivity_time_in_seconds'], 1), 'abc': Phrase('abc', test_time, 1) } UtilityMethods.pruneUnnecessaryPhrases( phraseTextToPhraseObjectMap, test_time, UtilityMethods.pruningConditionRandom, **stream_settings) self.assertTrue('dsf' not in phraseTextToPhraseObjectMap) self.assertTrue('abc' in phraseTextToPhraseObjectMap)
def test_updateDimensions_remove_old_phrases(self): originalTime = self.phraseTextToPhraseObjectMap[ 'abcd'].latestOccuranceTime self.phraseTextToPhraseObjectMap[ 'abcd'].latestOccuranceTime = test_time UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertTrue('abcd' in self.phraseTextToPhraseObjectMap) self.phraseTextToPhraseObjectMap[ 'abcd'].latestOccuranceTime = originalTime UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertTrue('abcd' not in self.phraseTextToPhraseObjectMap)
def test_updateDimensions_when_phrases_with_lower_id_are_removed_from_phraseTextToIdMap( self): stream_settings['dimensions'] = 3 for phrase, score in zip(['new', 'phrases', 'are'], range(100, 103)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score) self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'cluster', 2) self.phraseTextToPhraseObjectMap['cluster'].score = 100 UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual( range(3), sorted( self.phraseTextAndDimensionMap.getMap( TwoWayMap.MAP_FORWARD).values()))
def test_updateDimensions_when_dimensions_have_to_be_removed(self): stream_settings['dimensions'] = 4 self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdx', 2) self.phraseTextAndDimensionMap.set(TwoWayMap.MAP_FORWARD, 'abcdxy', 3) for phrase, score in zip(['new_text'], range(7, 8)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score) self.phraseTextToPhraseObjectMap[ 'cluster'].latestOccuranceTime = test_time - 3 * stream_settings[ 'max_phrase_inactivity_time_in_seconds'] UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual( set({ 'project': 0, 'new_text': 1 }), set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD)))
def test_updateDimensions_when_phraseTextToIdMap_has_lesser_than_max_dimensions( self): stream_settings['dimensions'] = 4 for phrase, score in zip(['new', 'phrases', 'are', 'added'], range(7, 11)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score) UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual( set({ 'project': 0, 'phrases': 1, 'are': 2, 'added': 3 }), set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))) self.assertEqual(4, len(self.phraseTextAndDimensionMap))
def getClusterForDocument(self, document): UtilityMethods.updatePhraseTextAndDimensionsMap( document, self.phraseTextAndDimensionMap, **self.clustering_settings) document.setSignatureUsingVectorPermutations( self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap) predictedCluster = None possibleNearestNeighbors = reduce( lambda x, y: x.union(y), (permutation.getNearestDocuments(document) for permutation in self.signaturePermutations), set()) if possibleNearestNeighbors: predictedCluster = max( ((clusterId, self.clusters[clusterId].cosineSimilarity(document)) for clusterId in possibleNearestNeighbors), key=itemgetter(1)) if predictedCluster and predictedCluster[ 1] >= self.thresholdForDocumentToBeInACluster: return predictedCluster[0]
def getNearestDocument(self, document): UtilityMethods.updatePhraseTextAndDimensionsMap( document, self.phraseTextAndDimensionMap, **self.settings) document.setSignatureUsingVectorPermutations( self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap) predictedNeighbor = None possibleNearestNeighbors = reduce( lambda x, y: x.union(y), (permutation.getNearestDocuments(document) for permutation in self.signaturePermutations), set()) if possibleNearestNeighbors: predictedNeighbor = max((( docId, self.documentIdToDocumentMap[docId].cosineSimilarity(document)) for docId in possibleNearestNeighbors), key=itemgetter(1)) print predictedNeighbor if predictedNeighbor and predictedNeighbor[ 1] >= self.nearestNeighborThreshold: return predictedNeighbor[0]
def test_pruningConditionRandom(self): phrase1 = Phrase('dsf', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], 1) phrase2 = Phrase('dsf', test_time, 1) self.assertTrue(UtilityMethods.pruningConditionRandom(phrase1, test_time, **stream_settings)) self.assertFalse(UtilityMethods.pruningConditionRandom(phrase2, test_time, **stream_settings))
def test_updatePhraseTextAndDimensionsMap_PhraseMapHasLesserDimensions(self): settings['dimensions'] = 4 UtilityMethods.updatePhraseTextAndDimensionsMap(self.phraseVector, self.phraseTextAndDimensionMap, **settings) self.assertEqual(self.finalPhraseToIdMap, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
def test_updatePhraseTextAndDimensionsMap_PhraseMapHasMaximumDimensions(self): UtilityMethods.updatePhraseTextAndDimensionsMap(self.phraseVector, self.phraseTextAndDimensionMap, **settings) for k in ['streams', 'highdimensional']: del self.finalPhraseToIdMap[k] self.assertEqual(self.finalPhraseToIdMap, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
def test_updateDimensions_when_phraseTextToIdMap_has_lesser_than_max_dimensions(self): stream_settings['dimensions'] = 4 for phrase, score in zip(['new', 'phrases', 'are', 'added'], range(7,11)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score) UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual(set({'project':0, 'phrases': 1, 'are':2, 'added':3}), set(self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))) self.assertEqual(4, len(self.phraseTextAndDimensionMap))
def test_updateDimensions_when_phraseTextToIdMap_is_filled_to_max_dimensions_and_entire_map_is_changed(self): for phrase, score in zip(['added', 'are'], range(10,12)): self.phraseTextToPhraseObjectMap[phrase] = Phrase(phrase, test_time, score=score) UtilityMethods.updateDimensions(self.phraseTextAndDimensionMap, self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual({'added':1, 'are': 0}, self.phraseTextAndDimensionMap.getMap(TwoWayMap.MAP_FORWARD))
def test_createOrAddNewPhraseObject(self): UtilityMethods.createOrAddNewPhraseObject('new_phrase', self.phraseTextToPhraseObjectMap, test_time, **stream_settings) UtilityMethods.createOrAddNewPhraseObject('project', self.phraseTextToPhraseObjectMap, test_time, **stream_settings) self.assertEqual(4, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(1, self.phraseTextToPhraseObjectMap['new_phrase'].score) self.assertEqual(9, self.phraseTextToPhraseObjectMap['project'].score)
def test_updatedPhraseObject_PhraseObjectScoresAreUpdatedCorrectly(self): UtilityMethods.updatePhraseTextToPhraseObject(self.phraseVector, test_time+timedelta(seconds=60), self.phraseTextToPhraseObjectMap, **stream_settings) self.assertEqual(5, len(self.phraseTextToPhraseObjectMap)) self.assertEqual(5, self.phraseTextToPhraseObjectMap['project'].score) self.assertEqual(1, self.phraseTextToPhraseObjectMap['streams'].score)
def test_pruneUnnecessaryPhrases(self): phraseTextToPhraseObjectMap = {'dsf': Phrase('dsf', test_time-3*stream_settings['max_phrase_inactivity_time_in_seconds'], 1), 'abc': Phrase('abc', test_time, 1)} UtilityMethods.pruneUnnecessaryPhrases(phraseTextToPhraseObjectMap, test_time, UtilityMethods.pruningConditionRandom, **stream_settings) self.assertTrue('dsf' not in phraseTextToPhraseObjectMap) self.assertTrue('abc' in phraseTextToPhraseObjectMap)
def updateDimensions(hdStreamClusteringObject, currentMessageTime): # Update dimensions. UtilityMethods.updateDimensions(hdStreamClusteringObject.phraseTextAndDimensionMap, hdStreamClusteringObject.phraseTextToPhraseObjectMap, currentMessageTime, **hdStreamClusteringObject.stream_settings) DataStreamMethods._resetClustersInSignatureTries(hdStreamClusteringObject, currentMessageTime)