コード例 #1
0
 def generateExperimentData(self, with_vanilla_lsh):
     global previousTime
     if with_vanilla_lsh:
         experts_twitter_stream_settings[
             'lsh_type'] = JustifyNotUsingVanillaLSH.with_vanilla_lsh
         experts_twitter_stream_settings['phrase_decay_coefficient'] = 1.0
         experts_twitter_stream_settings['stream_decay_coefficient'] = 1.0
         experts_twitter_stream_settings[
             'stream_cluster_decay_coefficient'] = 1.0
         experts_twitter_stream_settings[
             'cluster_filtering_method'] = emptyClusterFilteringMethod
         experts_twitter_stream_settings[
             'signature_type'] = 'signature_type_list'
         experts_twitter_stream_settings[
             'dimensions'] = getLargestPrimeLesserThan(10000)
         experts_twitter_stream_settings[
             'update_dimensions_method'] = emptyUpdateDimensionsMethod
     else:
         experts_twitter_stream_settings[
             'lsh_type'] = JustifyNotUsingVanillaLSH.with_modified_lsh
     experts_twitter_stream_settings[
         'cluster_analysis_method'] = JustifyNotUsingVanillaLSH.modifiedClusterAnalysisMethod
     previousTime = time.time()
     HDStreaminClustering(**experts_twitter_stream_settings).cluster(
         TwitterIterators.iterateTweetsFromExperts(
             expertsDataStartTime=datetime(2011, 3, 19),
             expertsDataEndTime=datetime(2011, 3, 27)))
コード例 #2
0
 def generateExperimentData2(self, fixedType):
     global previousTime
     experts_twitter_stream_settings[
         'cluster_analysis_method'] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod2
     if fixedType:
         experts_twitter_stream_settings[
             'dimensions_performance_type'] = JustifyDimensionsEstimation.first_n_dimension
         #            experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod
         experts_twitter_stream_settings['phrase_decay_coefficient'] = 1.0
         experts_twitter_stream_settings['stream_decay_coefficient'] = 1.0
         experts_twitter_stream_settings[
             'stream_cluster_decay_coefficient'] = 1.0
         for dimensions in range(10**4, 21 * 10**4, 10**4):
             experts_twitter_stream_settings[
                 'dimensions'] = getLargestPrimeLesserThan(dimensions)
             previousTime = time.time()
             HDStreaminClustering(
                 **experts_twitter_stream_settings).cluster(
                     TwitterIterators.iterateTweetsFromExperts(
                         expertsDataStartTime=datetime(2011, 3, 19),
                         expertsDataEndTime=datetime(2011, 3, 20, 5)))
     else:
         experts_twitter_stream_settings[
             'dimensions_performance_type'] = JustifyDimensionsEstimation.top_n_dimension
         previousTime = time.time()
         HDStreaminClustering(**experts_twitter_stream_settings).cluster(
             TwitterIterators.iterateTweetsFromExperts(
                 expertsDataStartTime=datetime(2011, 3, 19),
                 expertsDataEndTime=datetime(2011, 3, 20, 5)))
コード例 #3
0
 def generateExperimentData2(self, fixedType):
     global previousTime
     experts_twitter_stream_settings[
         "cluster_analysis_method"
     ] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod2
     if fixedType:
         experts_twitter_stream_settings[
             "dimensions_performance_type"
         ] = JustifyDimensionsEstimation.first_n_dimension
         #            experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod
         experts_twitter_stream_settings["phrase_decay_coefficient"] = 1.0
         experts_twitter_stream_settings["stream_decay_coefficient"] = 1.0
         experts_twitter_stream_settings["stream_cluster_decay_coefficient"] = 1.0
         for dimensions in range(10 ** 4, 21 * 10 ** 4, 10 ** 4):
             experts_twitter_stream_settings["dimensions"] = getLargestPrimeLesserThan(dimensions)
             previousTime = time.time()
             HDStreaminClustering(**experts_twitter_stream_settings).cluster(
                 TwitterIterators.iterateTweetsFromExperts(
                     expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5)
                 )
             )
     else:
         experts_twitter_stream_settings["dimensions_performance_type"] = JustifyDimensionsEstimation.top_n_dimension
         previousTime = time.time()
         HDStreaminClustering(**experts_twitter_stream_settings).cluster(
             TwitterIterators.iterateTweetsFromExperts(
                 expertsDataStartTime=datetime(2011, 3, 19), expertsDataEndTime=datetime(2011, 3, 20, 5)
             )
         )
コード例 #4
0
 def generateExperimentData(self):
     global previousTime
     experts_twitter_stream_settings['dimensions_performance_type'] = JustifyDimensionsEstimation.first_n_dimension
     experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod
     experts_twitter_stream_settings['cluster_analysis_method'] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod
     for dimensions in range(10**4,21*10**4,10**4):
         experts_twitter_stream_settings['dimensions'] = getLargestPrimeLesserThan(dimensions)
         previousTime = time.time()
         try:
             HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts())
         except Exception as e: pass
コード例 #5
0
 def generateExperimentData(self):
     global previousTime
     experts_twitter_stream_settings['dimensions_performance_type'] = JustifyDimensionsEstimation.first_n_dimension
     experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod
     experts_twitter_stream_settings['cluster_analysis_method'] = JustifyDimensionsEstimation.modifiedClusterAnalysisMethod
     for dimensions in range(10**4,21*10**4,10**4):
         experts_twitter_stream_settings['dimensions'] = getLargestPrimeLesserThan(dimensions)
         previousTime = time.time()
         try:
             HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts())
         except Exception as e: pass
コード例 #6
0
 def generateExperimentData(self, with_vanilla_lsh):
     global previousTime
     if with_vanilla_lsh: 
         experts_twitter_stream_settings['lsh_type'] = JustifyNotUsingVanillaLSH.with_vanilla_lsh
         experts_twitter_stream_settings['phrase_decay_coefficient']=1.0; experts_twitter_stream_settings['stream_decay_coefficient']=1.0; experts_twitter_stream_settings['stream_cluster_decay_coefficient']=1.0;
         experts_twitter_stream_settings['cluster_filtering_method'] = emptyClusterFilteringMethod;
         experts_twitter_stream_settings['signature_type']='signature_type_list'
         experts_twitter_stream_settings['dimensions'] = getLargestPrimeLesserThan(10000)
         experts_twitter_stream_settings['update_dimensions_method'] = emptyUpdateDimensionsMethod
     else: experts_twitter_stream_settings['lsh_type'] = JustifyNotUsingVanillaLSH.with_modified_lsh
     experts_twitter_stream_settings['cluster_analysis_method'] = JustifyNotUsingVanillaLSH.modifiedClusterAnalysisMethod
     previousTime = time.time()
     HDStreaminClustering(**experts_twitter_stream_settings).cluster(TwitterIterators.iterateTweetsFromExperts(expertsDataStartTime=datetime(2011,3,19), expertsDataEndTime=datetime(2011,3,27))) 
コード例 #7
0
houston_twitter_stream_settings.plot_color = '#CC00FF'
houston_twitter_stream_settings.plot_label = 'Houston stream'
houston_twitter_stream_settings.dimension_update_frequency_in_seconds=time_unit_in_seconds*2
houston_twitter_stream_settings.max_phrase_inactivity_time_in_seconds=time_unit_in_seconds*126
houston_twitter_stream_settings.cluster_filter_threshold = 5
houston_twitter_stream_settings.cluster_inactivity_time_in_seconds=time_unit_in_seconds*3
houston_twitter_stream_settings.twitter_users_tweets_folder='%shouston/'%twitterDataFolder
houston_twitter_stream_settings.lsh_clusters_folder='%slsh_crowds/houston_stream/clusters/'%twitterDataFolder
houston_twitter_stream_settings.parameter_estimation_folder='%slsh_crowds/houston_stream/parameter_estimation/'%twitterDataFolder

# Settings for expert specific streams with default values.
default_experts_twitter_stream_settings = Settings()
default_experts_twitter_stream_settings.update(twitter_stream_settings)
default_experts_twitter_stream_settings.dimensions = 199999
default_experts_twitter_stream_settings.stream_id = 'default_experts_twitter_stream'
default_experts_twitter_stream_settings.plot_color = '#0085F2'
default_experts_twitter_stream_settings.plot_label = 'Un-optimized experts stream'
default_experts_twitter_stream_settings.dimension_update_frequency_in_seconds=time_unit_in_seconds*5
#default_experts_twitter_stream_settings.clustering_frequency_in_seconds=time_unit_in_seconds*24
default_experts_twitter_stream_settings.cluster_analysis_frequency_in_seconds=time_unit_in_seconds*12*4
default_experts_twitter_stream_settings.max_phrase_inactivity_time_in_seconds=time_unit_in_seconds*12
default_experts_twitter_stream_settings.cluster_filter_threshold = 2
default_experts_twitter_stream_settings.cluster_inactivity_time_in_seconds=time_unit_in_seconds*120
default_experts_twitter_stream_settings.twitter_users_tweets_folder='%susers/tweets/'%twitterDataFolder
default_experts_twitter_stream_settings.users_to_crawl_file='%susers/crawl/users_to_crawl'%twitterDataFolder
default_experts_twitter_stream_settings.lsh_clusters_folder='%slsh_crowds/default_experts_stream/clusters/'%twitterDataFolder
default_experts_twitter_stream_settings.parameter_estimation_folder='%slsh_crowds/default_experts_stream/parameter_estimation/'%twitterDataFolder

if __name__ == '__main__':
    print getLargestPrimeLesserThan(200000)