def run(self, dataIterator, estimationMethod, parameterSpecificDataCollectionMethod=None):
     estimationMethod = FixedIntervalMethod(estimationMethod, self.timeUnitInSeconds)
     for data in dataIterator:
         message = self.convertDataToMessageMethod(data, **self.stream_settings)
         if CDA.messageInOrder(message.timeStamp):
             if parameterSpecificDataCollectionMethod != None: parameterSpecificDataCollectionMethod(estimationObject=self, message=message)
             UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings)
             estimationMethod.call(message.timeStamp, estimationObject=self, currentMessageTime=message.timeStamp)
 def run(self, dataIterator, estimationMethod, parameterSpecificDataCollectionMethod=None):
     estimationMethod = FixedIntervalMethod(estimationMethod, self.timeUnitInSeconds)
     for data in dataIterator:
         message = self.convertDataToMessageMethod(data, **self.stream_settings)
         if CDA.messageInOrder(message.timeStamp):
             if parameterSpecificDataCollectionMethod != None: parameterSpecificDataCollectionMethod(estimationObject=self, message=message)
             UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings)
             estimationMethod.call(message.timeStamp, estimationObject=self, currentMessageTime=message.timeStamp)
    def __init__(self, **stream_settings):
        super(HDStreaminClustering, self).__init__(**stream_settings)
        self.stream_settings = stream_settings
        self.phraseTextToPhraseObjectMap, self.streamIdToStreamObjectMap = {}, {}
        
        self.dimensionsUpdatingFrequency = stream_settings['dimension_update_frequency_in_seconds']
        self.clustersAnalysisFrequency = stream_settings['cluster_analysis_frequency_in_seconds']
        self.clustersFilteringFrequency = stream_settings['cluster_filtering_frequency_in_seconds']

        self.updateDimensionsMethod = FixedIntervalMethod(stream_settings.get('update_dimensions_method', DataStreamMethods.updateDimensions), self.dimensionsUpdatingFrequency)
        self.clusterAnalysisMethod = FixedIntervalMethod(stream_settings.get('cluster_analysis_method', DataStreamMethods.clusterAnalysisMethod), self.clustersAnalysisFrequency)
        self.clusterFilteringMethod = FixedIntervalMethod(stream_settings.get('cluster_filtering_method', DataStreamMethods.clusterFilteringMethod), self.clustersFilteringFrequency)
        
        self.combineClustersMethod=stream_settings.get('combine_clusters_method',None)
        self.convertDataToMessageMethod=stream_settings['convert_data_to_message_method']
        
        DataStreamMethods.messageInOrderVariable = None
Ejemplo n.º 4
0
    def __init__(self, **stream_settings):
        super(HDStreaminClustering, self).__init__(**stream_settings)
        self.stream_settings = stream_settings
        self.phraseTextToPhraseObjectMap, self.streamIdToStreamObjectMap = {}, {}

        self.dimensionsUpdatingFrequency = stream_settings[
            'dimension_update_frequency_in_seconds']
        self.clustersAnalysisFrequency = stream_settings[
            'cluster_analysis_frequency_in_seconds']
        self.clustersFilteringFrequency = stream_settings[
            'cluster_filtering_frequency_in_seconds']

        self.updateDimensionsMethod = FixedIntervalMethod(
            stream_settings.get('update_dimensions_method',
                                DataStreamMethods.updateDimensions),
            self.dimensionsUpdatingFrequency)
        self.clusterAnalysisMethod = FixedIntervalMethod(
            stream_settings.get('cluster_analysis_method',
                                DataStreamMethods.clusterAnalysisMethod),
            self.clustersAnalysisFrequency)
        self.clusterFilteringMethod = FixedIntervalMethod(
            stream_settings.get('cluster_filtering_method',
                                DataStreamMethods.clusterFilteringMethod),
            self.clustersFilteringFrequency)

        self.combineClustersMethod = stream_settings.get(
            'combine_clusters_method', None)
        self.convertDataToMessageMethod = stream_settings[
            'convert_data_to_message_method']

        DataStreamMethods.messageInOrderVariable = None
class HDSkipStreamClustering(StreamingLSHClustering):
    def __init__(self, **stream_settings):
        super(HDSkipStreamClustering, self).__init__(**stream_settings)
        self.stream_settings = stream_settings
        self.phraseTextToPhraseObjectMap, self.streamIdToStreamObjectMap = {}, {}
        
        self.dimensionsUpdatingFrequency = stream_settings['dimension_update_frequency_in_seconds']
        self.clustersAnalysisFrequency = stream_settings['cluster_analysis_frequency_in_seconds']
        self.clustersFilteringFrequency = stream_settings['cluster_filtering_frequency_in_seconds']

        self.updateDimensionsMethod = FixedIntervalMethod(stream_settings.get('update_dimensions_method', DataStreamMethods.updateDimensions), self.dimensionsUpdatingFrequency)
        self.clusterAnalysisMethod = FixedIntervalMethod(stream_settings.get('cluster_analysis_method', DataStreamMethods.clusterAnalysisMethod), self.clustersAnalysisFrequency)
        self.clusterFilteringMethod = FixedIntervalMethod(stream_settings.get('cluster_filtering_method', DataStreamMethods.clusterFilteringMethod), self.clustersFilteringFrequency)
        
        self.combineClustersMethod=stream_settings.get('combine_clusters_method',None)
        self.convertDataToMessageMethod=stream_settings['convert_data_to_message_method']
        
        DataStreamMethods.messageInOrderVariable = None

    def cluster(self, dataIterator):
        i = 1
        for data in dataIterator:
            message = self.convertDataToMessageMethod(data, **self.stream_settings)
#            message = data
            if DataStreamMethods.messageInOrder(message.timeStamp):
                UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings)
                if message.streamId not in self.streamIdToStreamObjectMap: 
                    self.streamIdToStreamObjectMap[message.streamId] = Stream(message.streamId, message)
                    self.getClusterAndUpdateExistingClusters(self.streamIdToStreamObjectMap[message.streamId])
                else: 
                    previousStreamObject=Vector(vectorInitialValues=self.streamIdToStreamObjectMap[message.streamId])
                    self.streamIdToStreamObjectMap[message.streamId].updateForMessage(message, VectorUpdateMethods.exponentialDecay, **self.stream_settings )
                    streamObject=self.streamIdToStreamObjectMap[message.streamId]
                    distance = Vector.euclideanDistance(streamObject, previousStreamObject)
                    if distance>10: 
#                        print i, len(self.clusters), distance
                        self.getClusterAndUpdateExistingClusters(self.streamIdToStreamObjectMap[message.streamId])

                        self.updateDimensionsMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp)
                        self.clusterFilteringMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp)
        
        #                self.clusterAnalysisMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp)
        
                    self.clusterAnalysisMethod.call(time.time(), hdStreamClusteringObject=self, currentMessageTime=message.timeStamp, numberOfMessages=i)

#                print i, len(self.clusters)
                i+=1
#                self.getClusterAndUpdateExistingClusters(streamObject)
#            self.getClusterAndUpdateExistingClusters(message)

    def getClusterAndUpdateExistingClusters(self, stream):
        predictedCluster = self.getClusterForDocument(stream)
        if predictedCluster!=None: self.clusters[predictedCluster].addDocument(stream, **self.stream_settings)
        else:
            newCluster = StreamCluster(stream)
            newCluster.setSignatureUsingVectorPermutations(self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap)
            for permutation in self.signaturePermutations: permutation.addDocument(newCluster)
            self.clusters[newCluster.clusterId] = newCluster
class HDStreaminClustering(StreamingLSHClustering):
    def __init__(self, **stream_settings):
        super(HDStreaminClustering, self).__init__(**stream_settings)
        self.stream_settings = stream_settings
        self.phraseTextToPhraseObjectMap, self.streamIdToStreamObjectMap = {}, {}
        
        self.dimensionsUpdatingFrequency = stream_settings['dimension_update_frequency_in_seconds']
        self.clustersAnalysisFrequency = stream_settings['cluster_analysis_frequency_in_seconds']
        self.clustersFilteringFrequency = stream_settings['cluster_filtering_frequency_in_seconds']

        self.updateDimensionsMethod = FixedIntervalMethod(stream_settings.get('update_dimensions_method', DataStreamMethods.updateDimensions), self.dimensionsUpdatingFrequency)
        self.clusterAnalysisMethod = FixedIntervalMethod(stream_settings.get('cluster_analysis_method', DataStreamMethods.clusterAnalysisMethod), self.clustersAnalysisFrequency)
        self.clusterFilteringMethod = FixedIntervalMethod(stream_settings.get('cluster_filtering_method', DataStreamMethods.clusterFilteringMethod), self.clustersFilteringFrequency)
        
        self.combineClustersMethod=stream_settings.get('combine_clusters_method',None)
        self.convertDataToMessageMethod=stream_settings['convert_data_to_message_method']
        
        DataStreamMethods.messageInOrderVariable = None

    def cluster(self, dataIterator):
        i = 1
        for data in dataIterator:
            message = self.convertDataToMessageMethod(data, **self.stream_settings)
#            message = data
            if DataStreamMethods.messageInOrder(message.timeStamp):
                UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings)
                if message.streamId not in self.streamIdToStreamObjectMap: self.streamIdToStreamObjectMap[message.streamId] = Stream(message.streamId, message)
                else: self.streamIdToStreamObjectMap[message.streamId].updateForMessage(message, VectorUpdateMethods.exponentialDecay, **self.stream_settings )
                streamObject=self.streamIdToStreamObjectMap[message.streamId]
                self.updateDimensionsMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp)
                self.clusterFilteringMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp)
                self.clusterAnalysisMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp)
                self.getClusterAndUpdateExistingClusters(streamObject)
#            self.getClusterAndUpdateExistingClusters(message)

    def getClusterAndUpdateExistingClusters(self, stream):
        predictedCluster = self.getClusterForDocument(stream)
        '''
        Do not remove this comment. Might need this if StreamCluster is used again in future.
        if predictedCluster!=None: self.clusters[predictedCluster].addStream(stream, **self.stream_settings)
        '''
        if predictedCluster!=None: self.clusters[predictedCluster].addDocument(stream, **self.stream_settings)
        else:
            newCluster = StreamCluster(stream)
            newCluster.setSignatureUsingVectorPermutations(self.unitVector, self.vectorPermutations, self.phraseTextAndDimensionMap)
            for permutation in self.signaturePermutations: permutation.addDocument(newCluster)
            self.clusters[newCluster.clusterId] = newCluster
Ejemplo n.º 7
0
class HDStreaminClustering(StreamingLSHClustering):
    def __init__(self, **stream_settings):
        super(HDStreaminClustering, self).__init__(**stream_settings)
        self.stream_settings = stream_settings
        self.phraseTextToPhraseObjectMap, self.streamIdToStreamObjectMap = {}, {}

        self.dimensionsUpdatingFrequency = stream_settings[
            'dimension_update_frequency_in_seconds']
        self.clustersAnalysisFrequency = stream_settings[
            'cluster_analysis_frequency_in_seconds']
        self.clustersFilteringFrequency = stream_settings[
            'cluster_filtering_frequency_in_seconds']

        self.updateDimensionsMethod = FixedIntervalMethod(
            stream_settings.get('update_dimensions_method',
                                DataStreamMethods.updateDimensions),
            self.dimensionsUpdatingFrequency)
        self.clusterAnalysisMethod = FixedIntervalMethod(
            stream_settings.get('cluster_analysis_method',
                                DataStreamMethods.clusterAnalysisMethod),
            self.clustersAnalysisFrequency)
        self.clusterFilteringMethod = FixedIntervalMethod(
            stream_settings.get('cluster_filtering_method',
                                DataStreamMethods.clusterFilteringMethod),
            self.clustersFilteringFrequency)

        self.combineClustersMethod = stream_settings.get(
            'combine_clusters_method', None)
        self.convertDataToMessageMethod = stream_settings[
            'convert_data_to_message_method']

        DataStreamMethods.messageInOrderVariable = None

    def cluster(self, dataIterator):
        i = 1
        for data in dataIterator:
            message = self.convertDataToMessageMethod(data,
                                                      **self.stream_settings)
            #            message = data
            if DataStreamMethods.messageInOrder(message.timeStamp):
                UtilityMethods.updatePhraseTextToPhraseObject(
                    message.vector, message.timeStamp,
                    self.phraseTextToPhraseObjectMap, **self.stream_settings)
                if message.streamId not in self.streamIdToStreamObjectMap:
                    self.streamIdToStreamObjectMap[message.streamId] = Stream(
                        message.streamId, message)
                else:
                    self.streamIdToStreamObjectMap[
                        message.streamId].updateForMessage(
                            message, VectorUpdateMethods.exponentialDecay,
                            **self.stream_settings)
                streamObject = self.streamIdToStreamObjectMap[message.streamId]
                self.updateDimensionsMethod.call(
                    message.timeStamp,
                    hdStreamClusteringObject=self,
                    currentMessageTime=message.timeStamp)
                self.clusterFilteringMethod.call(
                    message.timeStamp,
                    hdStreamClusteringObject=self,
                    currentMessageTime=message.timeStamp)
                self.clusterAnalysisMethod.call(
                    message.timeStamp,
                    hdStreamClusteringObject=self,
                    currentMessageTime=message.timeStamp)
                self.getClusterAndUpdateExistingClusters(streamObject)
#            self.getClusterAndUpdateExistingClusters(message)

    def getClusterAndUpdateExistingClusters(self, stream):
        predictedCluster = self.getClusterForDocument(stream)
        '''
        Do not remove this comment. Might need this if StreamCluster is used again in future.
        if predictedCluster!=None: self.clusters[predictedCluster].addStream(stream, **self.stream_settings)
        '''
        if predictedCluster != None:
            self.clusters[predictedCluster].addDocument(
                stream, **self.stream_settings)
        else:
            newCluster = StreamCluster(stream)
            newCluster.setSignatureUsingVectorPermutations(
                self.unitVector, self.vectorPermutations,
                self.phraseTextAndDimensionMap)
            for permutation in self.signaturePermutations:
                permutation.addDocument(newCluster)
            self.clusters[newCluster.clusterId] = newCluster
Ejemplo n.º 8
0
class HDSkipStreamClustering(StreamingLSHClustering):
    def __init__(self, **stream_settings):
        super(HDSkipStreamClustering, self).__init__(**stream_settings)
        self.stream_settings = stream_settings
        self.phraseTextToPhraseObjectMap, self.streamIdToStreamObjectMap = {}, {}

        self.dimensionsUpdatingFrequency = stream_settings[
            'dimension_update_frequency_in_seconds']
        self.clustersAnalysisFrequency = stream_settings[
            'cluster_analysis_frequency_in_seconds']
        self.clustersFilteringFrequency = stream_settings[
            'cluster_filtering_frequency_in_seconds']

        self.updateDimensionsMethod = FixedIntervalMethod(
            stream_settings.get('update_dimensions_method',
                                DataStreamMethods.updateDimensions),
            self.dimensionsUpdatingFrequency)
        self.clusterAnalysisMethod = FixedIntervalMethod(
            stream_settings.get('cluster_analysis_method',
                                DataStreamMethods.clusterAnalysisMethod),
            self.clustersAnalysisFrequency)
        self.clusterFilteringMethod = FixedIntervalMethod(
            stream_settings.get('cluster_filtering_method',
                                DataStreamMethods.clusterFilteringMethod),
            self.clustersFilteringFrequency)

        self.combineClustersMethod = stream_settings.get(
            'combine_clusters_method', None)
        self.convertDataToMessageMethod = stream_settings[
            'convert_data_to_message_method']

        DataStreamMethods.messageInOrderVariable = None

    def cluster(self, dataIterator):
        i = 1
        for data in dataIterator:
            message = self.convertDataToMessageMethod(data,
                                                      **self.stream_settings)
            #            message = data
            if DataStreamMethods.messageInOrder(message.timeStamp):
                UtilityMethods.updatePhraseTextToPhraseObject(
                    message.vector, message.timeStamp,
                    self.phraseTextToPhraseObjectMap, **self.stream_settings)
                if message.streamId not in self.streamIdToStreamObjectMap:
                    self.streamIdToStreamObjectMap[message.streamId] = Stream(
                        message.streamId, message)
                    self.getClusterAndUpdateExistingClusters(
                        self.streamIdToStreamObjectMap[message.streamId])
                else:
                    previousStreamObject = Vector(
                        vectorInitialValues=self.streamIdToStreamObjectMap[
                            message.streamId])
                    self.streamIdToStreamObjectMap[
                        message.streamId].updateForMessage(
                            message, VectorUpdateMethods.exponentialDecay,
                            **self.stream_settings)
                    streamObject = self.streamIdToStreamObjectMap[
                        message.streamId]
                    distance = Vector.euclideanDistance(
                        streamObject, previousStreamObject)
                    if distance > 10:
                        #                        print i, len(self.clusters), distance
                        self.getClusterAndUpdateExistingClusters(
                            self.streamIdToStreamObjectMap[message.streamId])

                        self.updateDimensionsMethod.call(
                            message.timeStamp,
                            hdStreamClusteringObject=self,
                            currentMessageTime=message.timeStamp)
                        self.clusterFilteringMethod.call(
                            message.timeStamp,
                            hdStreamClusteringObject=self,
                            currentMessageTime=message.timeStamp)

        #                self.clusterAnalysisMethod.call(message.timeStamp, hdStreamClusteringObject=self, currentMessageTime=message.timeStamp)

                    self.clusterAnalysisMethod.call(
                        time.time(),
                        hdStreamClusteringObject=self,
                        currentMessageTime=message.timeStamp,
                        numberOfMessages=i)

#                print i, len(self.clusters)
                i += 1
#                self.getClusterAndUpdateExistingClusters(streamObject)
#            self.getClusterAndUpdateExistingClusters(message)

    def getClusterAndUpdateExistingClusters(self, stream):
        predictedCluster = self.getClusterForDocument(stream)
        if predictedCluster != None:
            self.clusters[predictedCluster].addDocument(
                stream, **self.stream_settings)
        else:
            newCluster = StreamCluster(stream)
            newCluster.setSignatureUsingVectorPermutations(
                self.unitVector, self.vectorPermutations,
                self.phraseTextAndDimensionMap)
            for permutation in self.signaturePermutations:
                permutation.addDocument(newCluster)
            self.clusters[newCluster.clusterId] = newCluster