def __init__(self, **stream_settings): super(HDStreaminClustering, self).__init__(**stream_settings) self.stream_settings = stream_settings self.phraseTextToPhraseObjectMap, self.streamIdToStreamObjectMap = {}, {} self.dimensionsUpdatingFrequency = stream_settings[ 'dimension_update_frequency_in_seconds'] self.clustersAnalysisFrequency = stream_settings[ 'cluster_analysis_frequency_in_seconds'] self.clustersFilteringFrequency = stream_settings[ 'cluster_filtering_frequency_in_seconds'] self.updateDimensionsMethod = FixedIntervalMethod( stream_settings.get('update_dimensions_method', DataStreamMethods.updateDimensions), self.dimensionsUpdatingFrequency) self.clusterAnalysisMethod = FixedIntervalMethod( stream_settings.get('cluster_analysis_method', DataStreamMethods.clusterAnalysisMethod), self.clustersAnalysisFrequency) self.clusterFilteringMethod = FixedIntervalMethod( stream_settings.get('cluster_filtering_method', DataStreamMethods.clusterFilteringMethod), self.clustersFilteringFrequency) self.combineClustersMethod = stream_settings.get( 'combine_clusters_method', None) self.convertDataToMessageMethod = stream_settings[ 'convert_data_to_message_method'] DataStreamMethods.messageInOrderVariable = None
def run(self, dataIterator, estimationMethod, parameterSpecificDataCollectionMethod=None): estimationMethod = FixedIntervalMethod(estimationMethod, self.timeUnitInSeconds) for data in dataIterator: message = self.convertDataToMessageMethod(data, **self.stream_settings) if CDA.messageInOrder(message.timeStamp): if parameterSpecificDataCollectionMethod != None: parameterSpecificDataCollectionMethod(estimationObject=self, message=message) UtilityMethods.updatePhraseTextToPhraseObject(message.vector, message.timeStamp, self.phraseTextToPhraseObjectMap, **self.stream_settings) estimationMethod.call(message.timeStamp, estimationObject=self, currentMessageTime=message.timeStamp)