def generateExperimentData(self, with_vanilla_lsh): global previousTime if with_vanilla_lsh: experts_twitter_stream_settings[ 'lsh_type'] = JustifyNotUsingVanillaLSH.with_vanilla_lsh experts_twitter_stream_settings['phrase_decay_coefficient'] = 1.0 experts_twitter_stream_settings['stream_decay_coefficient'] = 1.0 experts_twitter_stream_settings[ 'stream_cluster_decay_coefficient'] = 1.0 experts_twitter_stream_settings[ 'cluster_filtering_method'] = emptyClusterFilteringMethod experts_twitter_stream_settings[ 'signature_type'] = 'signature_type_list' experts_twitter_stream_settings[ 'dimensions'] = getLargestPrimeLesserThan(10000) experts_twitter_stream_settings[ 'update_dimensions_method'] = emptyUpdateDimensionsMethod else: experts_twitter_stream_settings[ 'lsh_type'] = JustifyNotUsingVanillaLSH.with_modified_lsh experts_twitter_stream_settings[ 'cluster_analysis_method'] = JustifyNotUsingVanillaLSH.modifiedClusterAnalysisMethod previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 27)))
def generateExperimentData2(self, fixedType): global previousTime experts_twitter_stream_settings[ 'cluster_analysis_method'] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod2 if fixedType: experts_twitter_stream_settings[ 'dimensions_performance_type'] = JustifyDimensionsEstimation.first_n_dimension # experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod experts_twitter_stream_settings['phrase_decay_coefficient'] = 1.0 experts_twitter_stream_settings['stream_decay_coefficient'] = 1.0 experts_twitter_stream_settings[ 'stream_cluster_decay_coefficient'] = 1.0 for dimensions in range(10**4, 21 * 10**4, 10**4): experts_twitter_stream_settings[ 'dimensions'] = getLargestPrimeLesserThan(dimensions) previousTime = time.time() HDStreaminClustering( **experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5))) else: experts_twitter_stream_settings[ 'dimensions_performance_type'] = JustifyDimensionsEstimation.top_n_dimension previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5)))
def generateExperimentData2(self, fixedType): global previousTime experts_twitter_stream_settings[ "cluster_analysis_method" ] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod2 if fixedType: experts_twitter_stream_settings[ "dimensions_performance_type" ] = JustifyDimensionsEstimation.first_n_dimension # experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod experts_twitter_stream_settings["phrase_decay_coefficient"] = 1.0 experts_twitter_stream_settings["stream_decay_coefficient"] = 1.0 experts_twitter_stream_settings["stream_cluster_decay_coefficient"] = 1.0 for dimensions in range(10 ** 4, 21 * 10 ** 4, 10 ** 4): experts_twitter_stream_settings["dimensions"] = getLargestPrimeLesserThan(dimensions) previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5) ) ) else: experts_twitter_stream_settings["dimensions_performance_type"] = JustifyDimensionsEstimation.top_n_dimension previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster( TwitterIterators.iterateTweetsFromExperts( expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5) ) )
def generateExperimentData(self): global previousTime experts_twitter_stream_settings['dimensions_performance_type'] = JustifyDimensionsEstimation.first_n_dimension experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod experts_twitter_stream_settings['cluster_analysis_method'] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod for dimensions in range(10**4,21*10**4,10**4): experts_twitter_stream_settings['dimensions'] = getLargestPrimeLesserThan(dimensions) previousTime = time.time() try: HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts()) except Exception as e: pass
def generateExperimentData(self, with_vanilla_lsh): global previousTime if with_vanilla_lsh: experts_twitter_stream_settings['lsh_type'] = JustifyNotUsingVanillaLSH.with_vanilla_lsh experts_twitter_stream_settings['phrase_decay_coefficient']=1.0; experts_twitter_stream_settings['stream_decay_coefficient']=1.0; experts_twitter_stream_settings['stream_cluster_decay_coefficient']=1.0; experts_twitter_stream_settings['cluster_filtering_method'] = emptyClusterFilteringMethod; experts_twitter_stream_settings['signature_type']='signature_type_list' experts_twitter_stream_settings['dimensions'] = getLargestPrimeLesserThan(10000) experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod else: experts_twitter_stream_settings['lsh_type'] = JustifyNotUsingVanillaLSH.with_modified_lsh experts_twitter_stream_settings['cluster_analysis_method'] = JustifyNotUsingVanillaLSH.modifiedClusterAnalysisMethod previousTime = time.time() HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts(expertsDataStartTime=datetime(2011,3,19), expertsDataEndTime=datetime(2011,3,27)))
houston_twitter_stream_settings.plot_color = '#CC00FF' houston_twitter_stream_settings.plot_label = 'Houston stream' houston_twitter_stream_settings.dimension_update_frequency_in_seconds=time_unit_in_seconds*2 houston_twitter_stream_settings.max_phrase_inactivity_time_in_seconds=time_unit_in_seconds*126 houston_twitter_stream_settings.cluster_filter_threshold = 5 houston_twitter_stream_settings.cluster_inactivity_time_in_seconds=time_unit_in_seconds*3 houston_twitter_stream_settings.twitter_users_tweets_folder='%shouston/'%twitterDataFolder houston_twitter_stream_settings.lsh_clusters_folder='%slsh_crowds/houston_stream/clusters/'%twitterDataFolder houston_twitter_stream_settings.parameter_estimation_folder='%slsh_crowds/houston_stream/parameter_estimation/'%twitterDataFolder # Settings for expert specific streams with default values. default_experts_twitter_stream_settings = Settings() default_experts_twitter_stream_settings.update(twitter_stream_settings) default_experts_twitter_stream_settings.dimensions = 199999 default_experts_twitter_stream_settings.stream_id = 'default_experts_twitter_stream' default_experts_twitter_stream_settings.plot_color = '#0085F2' default_experts_twitter_stream_settings.plot_label = 'Un-optimized experts stream' default_experts_twitter_stream_settings.dimension_update_frequency_in_seconds=time_unit_in_seconds*5 #default_experts_twitter_stream_settings.clustering_frequency_in_seconds=time_unit_in_seconds*24 default_experts_twitter_stream_settings.cluster_analysis_frequency_in_seconds=time_unit_in_seconds*12*4 default_experts_twitter_stream_settings.max_phrase_inactivity_time_in_seconds=time_unit_in_seconds*12 default_experts_twitter_stream_settings.cluster_filter_threshold = 2 default_experts_twitter_stream_settings.cluster_inactivity_time_in_seconds=time_unit_in_seconds*120 default_experts_twitter_stream_settings.twitter_users_tweets_folder='%susers/tweets/'%twitterDataFolder default_experts_twitter_stream_settings.users_to_crawl_file='%susers/crawl/users_to_crawl'%twitterDataFolder default_experts_twitter_stream_settings.lsh_clusters_folder='%slsh_crowds/default_experts_stream/clusters/'%twitterDataFolder default_experts_twitter_stream_settings.parameter_estimation_folder='%slsh_crowds/default_experts_stream/parameter_estimation/'%twitterDataFolder if __name__ == '__main__': print getLargestPrimeLesserThan(200000)