Python FileLog Exemples, FileLog Python Exemples

Exemple #1

0

Afficher le fichier

def log(message):
    """Helper method to modularize the format of log messages.
  
  Keyword Arguments:
  message -- A string to log.
  """
    FileLog.log(_LOG_FILE, message)

Exemple #2

0

Afficher le fichier

Fichier : a_rate_increase.py Projet : chucheng/TwitterResearch

def log(message):
  """Helper method to modularize the format of log messages.
  
  Keyword Arguments:
  message -- A string to log.
  """
  FileLog.log(_LOG_FILE, message)

Exemple #3

0

Afficher le fichier

Fichier : StreamFileSaverWrapper.py Projet : chucheng/EmhTwitterCrwaler

    def __init__(self, input_filter_words, verbose_log=False):
                 
        #Setup log 
        FileLog.set_log_dir() 
        self.log_filename = "StreamFileSaverWrapper.log"
        self.verbose_log = verbose_log #suppress all debug log
        
        #Setup filenames (for storing tsv)
        self.data_path = "../data/tsv/" 
        if self.data_path [-1] != os.sep: #make sure we have a path separtor charcter 
            self.data_path  += os.sep
        if not os.path.exists(self.data_path): #make sure path exist
            self.log("Creating dir: " + self.data_path)
            os.makedirs(self.data_path )          
        
        #self.check_all_old_processes_are_dead = False #Set to true to check "old processes" status            

        self.filter_words = input_filter_words        
        self.today_str = StreamFileSaverWrapper.static_get_date_string() #'2011_03_09'
        
        ### Setup Worker Processes ###
        #For storing the obsolete worker processes
        self.obsolete_process_dict  = None 
        #Create a dictionary that holds all the worker processes.        
        self.current_process_dict = self.build_new_worker_process_dict()                
        #start all worker processes
        self.start_all_worker(self.current_process_dict)

Exemple #4

0

Afficher le fichier

Fichier : a_tweet_lifespan.py Projet : cmoghbel/TwitterResearch

def run():
    """Contains the main logic for this analysis."""
    FileLog.set_log_dir()
    seeds = load_seeds()
    counts = find_total_tweet_count(seeds)
    calc_90_percent_count(counts)
    time_of_90s = find_90_times(counts)
    truths = get_gt_rankings(seeds)
    top_news = get_top_news(truths, _SIZE_TOP_NEWS)
    aggregates, aggregates_top = aggr_by_hour(time_of_90s, top_news)
    draw_graph(aggregates, aggregates_top)

Exemple #5

0

Afficher le fichier

Fichier : a_tweet_lifespan.py Projet : chucheng/TwitterResearch

def run():
  """Contains the main logic for this analysis."""
  FileLog.set_log_dir()
  seeds = load_seeds()
  counts = find_total_tweet_count(seeds)
  calc_90_percent_count(counts)
  time_of_90s = find_90_times(counts)
  truths = get_gt_rankings(seeds)
  top_news = get_top_news(truths, _SIZE_TOP_NEWS)
  aggregates, aggregates_top = aggr_by_hour(time_of_90s, top_news)
  draw_graph(aggregates, aggregates_top)

Exemple #6

0

Afficher le fichier

Fichier : StreamFileSaverWrapper.py Projet : chucheng/EmhTwitterCrwaler

    def static_status_to_tweet_tsv(status, filter_words):
        """Extract data related to Tweets Table, and return a tab splitted string.

        Returns:
        line -- a tab splitted string
        None -- some exception happens
        """
        retweeted = False
        origin_user_id = ''
        origin_tweet_id = ''
        source = ''
        source_url = ''
        retweet_count = 0
                            
         #if it is a retweet, update the retweet infomation
        if hasattr(status,'retweeted_status'): 
            origin_user_id = str(status.retweeted_status.user.id)
            origin_tweet_id = str(status.retweeted_status.id)
            retweeted = True        
                    
        if hasattr(status, 'source'):
            source = status.source
            
        if hasattr(status, 'source_url'):
            source_url = status.source_url        
        
        if str(status.retweet_count) == '100+':
            status.retweet_count = 101
        try:
            line = ("\t".join([
                str(status.id_str), #tweet_id
                str(status.user.id), #user_id
                StreamFileSaverWrapper.ensure_escape_for_mysql(status.text.encode('utf8')), #content
                str(status.created_at), #created_at
                str(retweeted), #retweeted
                str(int(status.retweet_count)), #retweeted_count
                str(origin_user_id), #origin_user_id
                str(origin_tweet_id), #origin_tweet_id
                str(source.encode('utf8')), # source
                str(source_url), #source_url
                str(filter_words), #filter
                datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') #insert_timestamp
                # leave it blank: HASH                
                ]))
            
            return line + "\n"       
        except Exception as e:        
            FileLog.log("FileSaver.log", "FileSaver(pid:{0}): "
                        .format(str(os.getpid())) + str(e), 
                        exception_tb=sys.exc_traceback)
        return None

Exemple #7

0

Afficher le fichier

Fichier : StreamingCrawler.py Projet : chucheng/EmhTwitterCrwaler

 def __init__(self, keywords, username, password):
     self.pid = str(os.getpid())
     ### Log ###
     self.log_file = "StreamingCrawler.log"
     FileLog.set_log_dir() #check the log dir, and create it if necessary
     
     ### Run ###
     self.key = keywords #keyword for filtering                
     self.listener = CrawlerListener(keywords)
     self.log("(" + self.pid + ") " +
              "Start monitoring streaming... keywords: " + str(keywords))
     
     auth = BasicAuthHandler(username, password)
     self.stream = Stream(auth, self.listener, timeout=None, retry_count = sys.maxint); #retry_time = 10.0

Exemple #8

0

Afficher le fichier

def run():
    """Contains the main logic for this analysis."""
    FileLog.set_log_dir()

    num_tweets_in_log = []
    count_tweets_bin_log = []

    with open(_DATA_DIR + 'popularity.graph.data.100bins') as in_file:
        for line in in_file.readlines():
            tokens = line.split('\t')
            num_tweets_log = float(tokens[1])
            count_tweets_in_bin = float(tokens[4]) * 100.0
            num_tweets_in_log.append(num_tweets_log)
            count_tweets_bin_log.append(count_tweets_in_bin)

    log('x: %s' % num_tweets_in_log)
    log('y: %s' % count_tweets_bin_log)

    draw_graph(num_tweets_in_log, count_tweets_bin_log)

Exemple #9

0

Afficher le fichier

Fichier : a_tweet_popularity.py Projet : chucheng/TwitterResearch

def run():
  """Contains the main logic for this analysis."""
  FileLog.set_log_dir()

  num_tweets_in_log = []
  count_tweets_bin_log = []

  with open(_DATA_DIR + 'popularity.graph.data.100bins') as in_file:
    for line in in_file.readlines():
      tokens = line.split('\t')
      num_tweets_log = float(tokens[1])
      count_tweets_in_bin = float(tokens[4]) * 100.0
      num_tweets_in_log.append(num_tweets_log)
      count_tweets_bin_log.append(count_tweets_in_bin)

  log('x: %s' % num_tweets_in_log)
  log('y: %s' % count_tweets_bin_log)

  draw_graph(num_tweets_in_log, count_tweets_bin_log)

Exemple #10

0

Afficher le fichier

Fichier : StreamingCrawler.py Projet : chucheng/EmhTwitterCrwaler

    def __init__(self, filter ):
        self.running = True #Set to FALSE will stop a listener
        self.on_data_running = False # this value is true if we are processing data
        self.pid = str(os.getpid()) 
        self.log_file = "StreamingCrawler.log"
        
        #database module
        if gSave_to_DB:
            #self.dbconn = DBConnection.DBConnection(log=self.log) 
            self.log("not support: storing to DB")
        
        #FileSaver
        self.fileconn = StreamFileSaverWrapper.StreamFileSaverWrapper(filter[0]) # ['http nyti ms']
        
        StreamListener.__init__(self) 
        self.filter = filter                

        now_str = datetime.datetime.now().strftime('%m%d%Y_%H%M%S')
        self.log_file = "CrawlerListener." + now_str + "." + "pid_{0}".format(self.pid) + ".log"
        FileLog.set_log_dir() #check the log dir, and create it if necessary

Exemple #11

0

Afficher le fichier

Fichier : a_source_device.py Projet : chucheng/TwitterResearch

def run():
  """Main logic for this analysis."""
  FileLog.set_log_dir()
  Util.ensure_dir_exist(_OUTPUT_DIR)
  if _REGENERATE_DATA:
    deltas = find_deltas()
    cache = Util.load_cache()
    seeds = Util.load_seeds()

    # Find top news
    param_str = '_t%s' % (int(_SIZE_TOP_NEWS * 100))
    gts = ground_truths.get_gt_rankings(seeds, DataSet.ALL)
    top_news = ground_truths.find_target_news(gts, _SIZE_TOP_NEWS)

    # Do analysis for all delta, including sys.max to do analysis with no delta.
    for delta in [sys.maxint] + _DELTAS:
      param_str = _get_param_str(delta) 

      (all_counts, original_counts,
       retweet_counts, top_counts) = find_device_counts(delta, deltas, top_news,
                                                        cache)

      (sorted_all_counts, sorted_original_counts,
       sorted_retweet_counts, sorted_top_counts) = sort_data(all_counts,
                                                             original_counts,
                                                             retweet_counts,
                                                             top_counts)

      output_data(sorted_all_counts, sorted_original_counts,
                  sorted_retweet_counts, sorted_top_counts, param_str)

  if _REDRAW_GRAPH:
    for delta in [sys.maxint] + _DELTAS:
      param_str = _get_param_str(delta) 

      (top, original_dict, retweet_dict) = load_data(param_str)
      log('Drawing graph for delta %s...' % delta)
      draw_graph(top, original_dict, retweet_dict, param_str)

  log('Analysis complete.')

Exemple #12

0

Afficher le fichier

Fichier : StreamFileSaverWrapper.py Projet : chucheng/EmhTwitterCrwaler

    def static_status_to_user_tsv(status_user):
        #insert a new user

        location = ""
        utc_offset = ""
        time_zone = ""
            
        if hasattr(status_user, 'time_zone'):
            if status_user.time_zone != None:
                time_zone = status_user.time_zone
            
        if hasattr(status_user, 'utc_offset'):
            if status_user.utc_offset != None:
                utc_offset = status_user.utc_offset
        
        if hasattr(status_user, 'location'):
            if status_user.location != None:
                location = status_user.location.encode('utf8')
            
        try:        
            line = "\t".join([
                       str(status_user.id_str), #user_id
                       StreamFileSaverWrapper.ensure_escape_for_mysql(str(status_user.screen_name)), #screen_id
                       str(status_user.statuses_count), #tweets_count
                       str(status_user.created_at), #created_at
                       str(status_user.followers_count), #followers_count
                       str(status_user.friends_count), #friends_count
                       StreamFileSaverWrapper.ensure_escape_for_mysql(str(location)), #location
                       str(status_user.listed_count), #listed_count
                       str(time_zone), #time_zone
                       str(utc_offset) #utc_offset
                       ]
            )
            return line + "\n"
        except Exception as e:        
            FileLog.log("FileSaver.log", "FileSaver(pid:{0}): "
                        .format(str(os.getpid())) + str(e), 
                        exception_tb=sys.exc_traceback)
        return None

Exemple #13

0

Afficher le fichier

Fichier : a_source_device.py Projet : cmoghbel/TwitterResearch

def run():
    """Main logic for this analysis."""
    FileLog.set_log_dir()
    Util.ensure_dir_exist(_OUTPUT_DIR)
    if _REGENERATE_DATA:
        deltas = find_deltas()
        cache = Util.load_cache()
        seeds = Util.load_seeds()

        # Find top news
        param_str = '_t%s' % (int(_SIZE_TOP_NEWS * 100))
        gts = ground_truths.get_gt_rankings(seeds, DataSet.ALL)
        top_news = ground_truths.find_target_news(gts, _SIZE_TOP_NEWS)

        # Do analysis for all delta, including sys.max to do analysis with no delta.
        for delta in [sys.maxint] + _DELTAS:
            param_str = _get_param_str(delta)

            (all_counts, original_counts, retweet_counts,
             top_counts) = find_device_counts(delta, deltas, top_news, cache)

            (sorted_all_counts, sorted_original_counts, sorted_retweet_counts,
             sorted_top_counts) = sort_data(all_counts, original_counts,
                                            retweet_counts, top_counts)

            output_data(sorted_all_counts, sorted_original_counts,
                        sorted_retweet_counts, sorted_top_counts, param_str)

    if _REDRAW_GRAPH:
        for delta in [sys.maxint] + _DELTAS:
            param_str = _get_param_str(delta)

            (top, original_dict, retweet_dict) = load_data(param_str)
            log('Drawing graph for delta %s...' % delta)
            draw_graph(top, original_dict, retweet_dict, param_str)

    log('Analysis complete.')

Exemple #14

0

Afficher le fichier

Fichier : aFolkWisdom.py Projet : cmoghbel/TwitterResearch

def run():
    """Contains the main logic for this analysis."""
    FileLog.set_log_dir()

    seeds = Util.load_seeds()
    for category in _CATEGORIES:
        log('Preforming analysis for category: %s' % category)
        size_top_news = _SIZE_TOP_NEWS
        if category:
            size_top_news = .10

        data_set = DataSet.TESTING
        retweets = set()
        if _SWITCHED:
            data_set = DataSet.TRAINING
        if _EXCLUDE_RETWEETS:
            retweets = ground_truths.find_retweets(_TESTING_SET_MONTHS)
        log('Num retweets to exclude: %s' % len(retweets))
        gt_rankings = ground_truths.get_gt_rankings(
            seeds,
            data_set,
            category,
            exclude_tweets_within_delta=_EXCLUDE_TWEETS_WITHIN_DELTA,
            retweets=retweets)
        log('Num ground_truth_rankings: %s' % len(gt_rankings))

        # Format for use later.
        ground_truth_url_to_rank = {}
        for rank, (url, count) in enumerate(gt_rankings):
            ground_truth_url_to_rank[url] = rank

        target_news = ground_truths.find_target_news(gt_rankings,
                                                     size_top_news)
        log('Size target_news: %s' % len(target_news))

        for delta in _DELTAS:
            run_params_str = 'd%s_t%s_e%s_%s' % (delta, int(
                size_top_news * 100), int(_SIZE_EXPERTS * 100), category)
            info_output_dir = '../graph/FolkWisdom/%s/info/' % run_params_str
            Util.ensure_dir_exist(info_output_dir)

            groups, d_num_followers = user_groups.get_all_user_groups(
                delta, category)
            log('Num experts (precision): %s' % len(groups.precision))
            log('Num experts (fscore): %s' % len(groups.fscore))
            log('Num experts (ci): %s' % len(groups.ci))
            log('Num Super Experts: %s' % len(groups.super_experts))
            log('Num Social Bias Experts: %s' % len(groups.social_bias))

            log('Finding rankings with an %s hour delta.' % delta)
            ranks = rankings.get_rankings(delta, seeds, groups, category,
                                          d_num_followers)

            # Output some interesting info to file
            size_market_unfiltered = '0'
            with open('../data/FolkWisdom/size_of_market_unfiltered.txt'
                      ) as in_file:
                size_market_unfiltered = in_file.readline().strip()

            with open(
                    '%suser_demographics_%s.txt' %
                (info_output_dir, run_params_str), 'w') as output_file:
                output_file.write('Number of Newsaholics: %s\n' %
                                  len(groups.newsaholics))
                output_file.write('Number of Active Users: %s\n' %
                                  len(groups.active_users))
                output_file.write('Number of Common Users: %s\n' %
                                  len(groups.common_users))
                output_file.write('\n')
                output_file.write('Number of Precision Experts: %s\n' %
                                  len(groups.precision))
                output_file.write('Number of F-Score Experts: %s\n' %
                                  len(groups.fscore))
                output_file.write('Number of CI Experts: %s\n' %
                                  len(groups.ci))
                output_file.write('Number of Social Bias Experts: %s\n' %
                                  len(groups.social_bias))
                output_file.write('Total number of unique experts: %s\n' %
                                  len(groups.all_experts))
                output_file.write(
                    'Number of Precision and F-Score Experts: %s\n' %
                    len(groups.precision.intersection(groups.fscore)))
                output_file.write(
                    'Number of Precision and CI Experts: %s\n' %
                    len(groups.precision.intersection(groups.ci)))
                output_file.write('Number of F-Score and CI Experts: %s\n' %
                                  len(groups.fscore.intersection(groups.ci)))
                output_file.write('Number of Super Experts: %s\n' %
                                  len(groups.super_experts))
                output_file.write('\n')
                output_file.write(
                    'Number of Users (Total): %s\n' %
                    (len(groups.newsaholics) + len(groups.active_users) +
                     len(groups.common_users)))
                output_file.write('Size of market (unfiltered): %s\n' %
                                  size_market_unfiltered)
                output_file.write('\n')
                # output_file.write('Number of votes by Newsaholics: %s\n'
                #                   % num_votes_newsaholics)
                # output_file.write('Number of votes by Market: %s\n' % num_votes_market)
                # output_file.write('Number of votes by Active Users: %s\n'
                #                   % num_votes_active)
                # output_file.write('Number of votes by Common Users: %s\n'
                #                   % num_votes_common)
                # output_file.write('\n');
                # output_file.write('Number of votes by Expert (Precision) Users: %s\n'
                #         % num_votes_expert_precision)
                # output_file.write('Number of votes by Expert (fscore) Users: %s\n'
                #         % num_votes_expert_fscore)
                # output_file.write('Number of votes by Expert (ci) Users: %s\n'
                #         % num_votes_expert_ci)
                # output_file.write('Number of votes by Super Experts: %s\n'
                #                   % num_votes_expert_s)
                # output_file.write('Number of votes by Social Bias Experts: %s\n'
                #                   % num_votes_expert_sb)
                # output_file.write('\n')
                # output_file.write('Total Number of votes cast: %s\n'
                #                   % (num_votes_newsaholics + num_votes_active
                #                      + num_votes_common))
                # output_file.write('\n')
                output_file.write('Total Number of Good News: %s\n' %
                                  len(target_news))

            log('Ground Truth Top 50')
            for i in range(min(len(gt_rankings), 50)):
                url, count = gt_rankings[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Newsaholic Top 5')
            for i in range(min(len(ranks.newsaholics), 5)):
                url, count = ranks.newsaholics[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Active Top 5')
            for i in range(min(len(ranks.active_users), 5)):
                url, count = ranks.active_users[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Common Top 5')
            for i in range(min(len(ranks.common_users), 5)):
                url, count = ranks.common_users[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('nonexpert Top 5')
            for i in range(min(len(ranks.non_experts), 5)):
                url, count = ranks.non_experts[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Expert (Precision) Top 5')
            for i in range(min(len(ranks.precision), 5)):
                url, count = ranks.precision[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Expert (fscore) Top 5')
            for i in range(min(len(ranks.fscore), 5)):
                url, count = ranks.fscore[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Expert (ci) Top 5')
            for i in range(min(len(ranks.ci), 5)):
                url, count = ranks.ci[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Super Expert Top 5')
            for i in range(min(len(ranks.super_experts), 5)):
                url, count = ranks.super_experts[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))
            log('-----------------------------------')
            log('Social Bias Expert Top 5')
            for i in range(min(len(ranks.social_bias), 5)):
                url, count = ranks.social_bias[i]
                log('[%s] %s\t%s' % (i, url.strip(), count))

            market_rank_to_url = {}
            newsaholic_rank_to_url = {}
            active_rank_to_url = {}
            common_rank_to_url = {}
            expert_p_rank_to_url = {}
            expert_f_rank_to_url = {}
            expert_c_rank_to_url = {}
            expert_s_rank_to_url = {}
            for rank, (url, count) in enumerate(ranks.newsaholics):
                newsaholic_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.population):
                market_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.active_users):
                active_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.common_users):
                common_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.precision):
                expert_p_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.fscore):
                expert_f_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.ci):
                expert_c_rank_to_url[rank] = url
            for rank, (url, count) in enumerate(ranks.super_experts):
                expert_s_rank_to_url[rank] = url

            population_url_to_rank = {}
            market_url_to_rank = {}
            precision_url_to_rank = {}
            fscore_url_to_rank = {}
            ci_url_to_rank = {}
            ci_1_url_to_rank = {}
            ci_2_url_to_rank = {}
            ci_3_url_to_rank = {}
            common_url_to_rank = {}
            for rank, (url, count) in enumerate(ranks.population):
                population_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.non_experts):
                market_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.precision):
                precision_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.fscore):
                fscore_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci):
                ci_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci_1):
                ci_1_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci_2):
                ci_2_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.ci_3):
                ci_3_url_to_rank[url] = rank
            for rank, (url, count) in enumerate(ranks.common_users):
                common_url_to_rank[url] = rank

            precisions, recalls = precision_recall.get_precision_recalls(
                gt_rankings, ranks)

            mixed_rankings = mixed_model.get_mixed_rankings(
                market_url_to_rank, precisions.non_experts,
                precision_url_to_rank, precisions.precision,
                fscore_url_to_rank, precisions.fscore, ci_url_to_rank,
                precisions.ci, ground_truth_url_to_rank)

            mixed_inact_rankings = mixed_model.get_mixed_rankings(
                common_url_to_rank, precisions.common_users,
                precision_url_to_rank, precisions.precision,
                fscore_url_to_rank, precisions.fscore, ci_url_to_rank,
                precisions.ci, ground_truth_url_to_rank)

            mixed_ci_rankings = mixed_model.get_mixed_rankings(
                market_url_to_rank, precisions.non_experts, ci_1_url_to_rank,
                precisions.ci_1, ci_2_url_to_rank, precisions.ci_2,
                ci_3_url_to_rank, precisions.ci_3, ground_truth_url_to_rank)

            mixed_precisions, mixed_recalls = precision_recall.calc_precision_recall(
                gt_rankings, mixed_rankings)

            mixed_inact_precisions, mixed_inact_recalls = precision_recall.calc_precision_recall(
                gt_rankings, mixed_inact_rankings)

            mixed_ci_precisions, mixed_ci_recalls = precision_recall.calc_precision_recall(
                gt_rankings, mixed_ci_rankings)

            log('-----------------------------------')
            log('Mixed (min) Top 5')
            for i in range(min(len(mixed_rankings), 5)):
                url, count = mixed_rankings[i]
                log('[%s] %s\t%s' % (i + 1, url, count))
            log('-----------------------------------')

            with open(
                    '%sranking_comparisons_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as out_file:
                for gt_rank, (gt_url, _) in enumerate(gt_rankings):
                    market_rank = 0
                    precision_rank = 0
                    ci_rank = 0
                    fscore_rank = 0
                    inactive_crowd_rank = 0
                    if gt_url in market_url_to_rank:
                        market_rank = market_url_to_rank[gt_url] + 1
                    if gt_url in precision_url_to_rank:
                        precision_rank = precision_url_to_rank[gt_url] + 1
                    if gt_url in ci_url_to_rank:
                        ci_rank = ci_url_to_rank[gt_url] + 1
                    if gt_url in fscore_url_to_rank:
                        fscore_rank = fscore_url_to_rank[gt_url] + 1
                    if gt_url in common_url_to_rank:
                        inactive_crowd_rank = common_url_to_rank[gt_url] + 1
                    line = '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (
                        gt_url, gt_rank + 1, market_rank, inactive_crowd_rank,
                        precision_rank, ci_rank, fscore_rank)
                    out_file.write(line)

            with open(
                    '%sground_truth_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for url, count in gt_rankings:
                    output_file.write('%s\t%s\n' % (url.strip(), count))
            with open(
                    '%smarket_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.common_users):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%snewsaholic_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.newsaholics):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sactive_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.active_users):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%scommon_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.common_users):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%snonexpert_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.non_experts):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_p_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.precision):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_f_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.fscore):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_c_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.ci):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%sexpert_s_user_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(ranks.super_experts):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))
            with open(
                    '%smixed_rankings_%s.tsv' %
                (info_output_dir, run_params_str), 'w') as output_file:
                for rank, (url, count) in enumerate(mixed_rankings):
                    output_file.write('%s\t%s\t(%s,%s)\n' %
                                      (url.strip(), count, rank,
                                       ground_truth_url_to_rank[url]))

            with open(
                    '../data/FolkWisdom/market_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.common_users:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/nonexpert_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.non_experts:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/expert_p_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.precision:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/expert_f_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.fscore:
                    out_file.write('%s\n' % precision)

            with open(
                    '../data/FolkWisdom/expert_c_precisions_%s.txt' %
                    run_params_str, 'w') as out_file:
                for precision in precisions.ci:
                    out_file.write('%s\n' % precision)

            log('Drawing summary precision-recall graphs...')
            # draw_precision_recall_graph(market_precisions, market_recalls,
            precision_recall.draw([
                precisions.newsaholics, precisions.active_users,
                precisions.common_users, precisions.precision,
                precisions.fscore, precisions.ci, precisions.super_experts
            ], [
                recalls.newsaholics, recalls.active_users,
                recalls.common_users, recalls.precision, recalls.fscore,
                recalls.ci, recalls.super_experts
            ], [
                'Newsaholics', 'Active', 'Common', 'Precision', 'F-score',
                'CI', 'Super Experts'
            ], 'precision_recall_all', run_params_str)

            # Draw via old method because it has fancy markings.
            experts.draw_precision_recall_experts(
                precisions.non_experts, recalls.non_experts,
                precisions.precision, recalls.precision, precisions.fscore,
                recalls.fscore, precisions.ci, recalls.ci, run_params_str)

            log('Drawing experts precision-recall graph...')
            # precision_recall.draw_with_markers([precisions.population, precisions.non_experts, precisions.precision,
            #                                     precisions.fscore, precisions.ci],
            #                                    [recalls.population, recalls.non_experts, recalls.precision,
            #                                     recalls.fscore, recalls.ci],
            #                                    ['Population', 'Crowd', 'Precision', 'F-score', 'CI'],
            #                                    'precision_recall_experts',
            #                                    0, run_params_str)

            log('Drawing mixed + inact graph...')
            precision_recall.draw_with_markers(
                [
                    precisions.non_experts, precisions.common_users,
                    mixed_inact_precisions
                ], [
                    recalls.non_experts, recalls.common_users,
                    mixed_inact_recalls
                ], ['Crowd', 'Inactive Crowd', 'Mixed + Inactive'],
                'precision_recall_mixed_and_inactive',
                3,
                run_params_str,
                zoom=True)

            log('Drawing ci breakdown by followers precisions-recall graph...')
            precision_recall.draw([
                precisions.non_experts, precisions.ci, precisions.ci_hi,
                precisions.ci_li
            ], [recalls.non_experts, recalls.ci, recalls.ci_hi, recalls.ci_li],
                                  ['Crowd', 'CI', 'CI High', 'CI Low'],
                                  'precision_recall_ci_followers_breakdown',
                                  run_params_str)

            log('Drawing social bias precision-recall graph...')
            precision_recall.draw([
                precisions.non_experts, precisions.social_bias,
                precisions.precision, precisions.fscore, precisions.ci
            ], [
                recalls.non_experts, recalls.social_bias, recalls.precision,
                recalls.fscore, recalls.ci
            ], ['Crowd', 'Influence Experts', 'Precision', 'F-score', 'CI'],
                                  'precision_recall_social_bias',
                                  run_params_str)

            log('Drawing basic groups precision-recall graph...')
            precision_recall.draw([
                precisions.newsaholics, precisions.active_users,
                precisions.common_users
            ], [
                recalls.newsaholics, recalls.active_users, recalls.common_users
            ], ['Newsaholics', 'Active Users', 'Common Users'],
                                  'precision_recall_basic_groups',
                                  run_params_str)

            log('Drawing crowd def precision-recall graph...')
            precision_recall.draw(
                [precisions.non_experts, precisions.common_users],
                [recalls.non_experts, recalls.common_users],
                ['Crowd', 'Inactive Crowd'],
                'precision_recall_crowd_def',
                run_params_str,
                zoom=True)

            log('Drawing non_expert_sampling precision-recall graph...')
            precision_recall.draw_with_markers(
                [
                    precisions.non_experts, precisions.non_experts_sampled,
                    precisions.non_experts_10, precisions.non_experts_25,
                    precisions.non_experts_1, precisions.ci
                ], [
                    recalls.non_experts, recalls.non_experts_sampled,
                    recalls.non_experts_10, recalls.non_experts_25,
                    recalls.non_experts_1, recalls.ci
                ], [
                    'Crowd', 'Crowd (33% sample)', 'Crowd (10% sample)',
                    'Crowd (5% sample)', 'Crowd (2% sample)', 'Experts (CI)'
                ],
                'precision_recall_non_expert_sampling',
                3,
                run_params_str,
                ncol=2)

            # TODO: Replace with new method.
            log('Drawing mixed model precision-recall graph...')
            mixed_model.draw_precision_recall_mixed(precisions.non_experts,
                                                    recalls.non_experts,
                                                    mixed_precisions,
                                                    mixed_recalls,
                                                    run_params_str,
                                                    zoom=True)

            log('Drawing mixed ci model precision-recall graph...')
            precision_recall.draw(
                [precisions.non_experts, mixed_ci_precisions],
                [recalls.non_experts, mixed_ci_recalls], ['Crowd', 'Mixed'],
                'precision_recall_mixed_ci', run_params_str)

            log('Drawing weighted followers precision-recall graph...')
            precision_recall.draw([
                precisions.non_experts, precisions.weighted_followers,
                precisions.ci
            ], [recalls.non_experts, recalls.weighted_followers, recalls.ci],
                                  ['Crowd', 'Weighted Followers', 'CI'],
                                  'precision_recall_weighted_followers',
                                  run_params_str)

            log('Drawing ci weighted graph...')
            precision_recall.draw(
                [precisions.population, precisions.ci, precisions.ci_weighted],
                [recalls.population, recalls.ci, recalls.ci_weighted],
                ['Crowd', 'CI', 'CI (Weighted)'],
                'precision_recall_ci_weighted', run_params_str)

            log('Drawing weighted graph...')
            precision_recall.draw([precisions.population, precisions.weighted],
                                  [recalls.population, recalls.weighted],
                                  ['Crowd', 'Crowd (Weighted)'],
                                  'precision_recall_weighted', run_params_str)

            log('Drawing weighted both graph...')
            precision_recall.draw(
                [
                    precisions.population, precisions.weighted,
                    precisions.weighted_both
                ],
                [recalls.population, recalls.weighted, recalls.weighted_both],
                ['Crowd', 'Crowd (Weighted)', 'Crowd (Weighted Both)'],
                'precision_recall_weighted_both', run_params_str)

Exemple #15

0

Afficher le fichier

Fichier : StreamFileSaverWrapper.py Projet : chucheng/EmhTwitterCrwaler

 def log(self, message, tb=None):
     """Log error message and its traceback(optional)"""
     FileLog.log(self.log_filename, "StreamFileSaverWrapper: " + str(message), exception_tb=tb)

Exemple #16

0

Afficher le fichier

Fichier : a_time_constraint.py Projet : cmoghbel/TwitterResearch

def run():
  FileLog.set_log_dir()
  output_dir = '../data/TimeConstraint/'
  Util.ensure_dir_exist(output_dir)

  seeds = Util.load_seeds()

  for category in _CATEGORIES:
    run_params_str = '%s' % (category)
    log('Preforming analysis: Cateogry = %s' % run_params_str)

    # Find counts.
    (num_0_1, num_0_1_common, num_0_1_experts_p, num_0_1_experts_f, num_0_1_experts_ci, num_0_1_experts_all,
     num_1_4, num_1_4_common, num_1_4_experts_p, num_1_4_experts_f, num_1_4_experts_ci, num_1_4_experts_all,
     num_4_8, num_4_8_common, num_4_8_experts_p, num_4_8_experts_f, num_4_8_experts_ci, num_4_8_experts_all,
     num_cu_1_1, num_cu_1_2, num_cu_1_3,
     num_cu_4_1, num_cu_4_2, num_cu_4_3,
     num_cu_8_1, num_cu_8_2, num_cu_8_3,
     num_after_8, num_total) = find_counts(seeds, category)

    # Calculate non-common users.
    num_0_1_noncommon = num_0_1 - num_0_1_common
    num_1_4_noncommon = num_1_4 - num_1_4_common
    num_4_8_noncommon = num_4_8 - num_4_8_common

    with open('%s%s.txt' % (output_dir, run_params_str), 'w') as out_file:
      out_file.write('Common Users\n')
      out_file.write('------------\n')
      out_file.write('0 - 1 hours: %s (%s percent of total)\n'
                     % (num_0_1_common, (100 * (float(num_0_1_common) / num_total))))
      out_file.write('1 - 4 hours: %s (%s percent of total)\n'
                     % (num_1_4_common, (100 * (float(num_1_4_common) / num_total))))
      out_file.write('4 - 8 hours: %s (%s percent of total)\n'
                     % (num_4_8_common, (100 * (float(num_4_8_common) / num_total))))

      out_file.write('\nCommon Users (Breakdown, Delta 1)\n')
      out_file.write('------------\n')
      out_file.write('Common Users 1:  %s (%s percent of total)\n'
                     % (num_cu_1_1, (100 * (float(num_cu_1_1) / num_total))))
      out_file.write('Common Users 2: %s (%s percent of total)\n'
                     % (num_cu_1_2, (100 * (float(num_cu_1_2) / num_total))))
      out_file.write('Common Users 3: %s (%s percent of total)\n'
                     % (num_cu_1_3, (100 * (float(num_cu_1_1) / num_total))))
      out_file.write('\nCommon Users (Breakdown, Delta 4)\n')

      out_file.write('\nCommon Users (Breakdown, Delta 4)\n')
      out_file.write('------------\n')
      out_file.write('Common Users 1:  %s (%s percent of total)\n'
                     % (num_cu_4_1, (100 * (float(num_cu_4_1) / num_total))))
      out_file.write('Common Users 2: %s (%s percent of total)\n'
                     % (num_cu_4_2, (100 * (float(num_cu_4_2) / num_total))))
      out_file.write('Common Users 3: %s (%s percent of total)\n'
                     % (num_cu_4_3, (100 * (float(num_cu_4_3) / num_total))))

      out_file.write('\nCommon Users (Breakdown, Delta 8)\n')
      out_file.write('------------\n')
      out_file.write('Common Users 1:  %s (%s percent of total)\n'
                     % (num_cu_8_1, (100 * (float(num_cu_8_1) / num_total))))
      out_file.write('Common Users 2: %s (%s percent of total)\n'
                     % (num_cu_8_2, (100 * (float(num_cu_8_2) / num_total))))
      out_file.write('Common Users 3: %s (%s percent of total)\n'
                     % (num_cu_8_3, (100 * (float(num_cu_8_3) / num_total))))

      out_file.write('\nNon-Common Users\n')
      out_file.write('----------------\n')
      out_file.write('0 - 1 hours: %s (%s percent of total)\n'
                     % (num_0_1_noncommon, (100 * (float(num_0_1_noncommon) / num_total))))
      out_file.write('1 - 4 hours: %s (%s percent of total)\n'
                     % (num_1_4_noncommon, (100 * (float(num_1_4_noncommon) / num_total))))
      out_file.write('4 - 8 hours: %s (%s percent of total)\n'
                     % (num_4_8_noncommon, (100 * (float(num_4_8_noncommon) / num_total))))

      out_file.write('\nExpert Precision Users\n')
      out_file.write('----------------\n')
      out_file.write('0 - 1 hours: %s (%s percent of total)\n'
                     % (num_0_1_experts_p, (100 * (float(num_0_1_experts_p) / num_total))))
      out_file.write('1 - 4 hours: %s (%s percent of total)\n'
                     % (num_1_4_experts_p, (100 * (float(num_1_4_experts_p) / num_total))))
      out_file.write('4 - 8 hours: %s (%s percent of total)\n'
                     % (num_4_8_experts_p, (100 * (float(num_4_8_experts_p) / num_total))))

      out_file.write('\nExpert Fscore Users\n')
      out_file.write('----------------\n')
      out_file.write('0 - 1 hours: %s (%s percent of total)\n'
                     % (num_0_1_experts_f, (100 * (float(num_0_1_experts_f) / num_total))))
      out_file.write('1 - 4 hours: %s (%s percent of total)\n'
                     % (num_1_4_experts_f, (100 * (float(num_1_4_experts_f) / num_total))))
      out_file.write('4 - 8 hours: %s (%s percent of total)\n'
                     % (num_4_8_experts_f, (100 * (float(num_4_8_experts_f) / num_total))))

      out_file.write('\nExpert CI Users\n')
      out_file.write('----------------\n')
      out_file.write('0 - 1 hours: %s (%s percent of total)\n'
                     % (num_0_1_experts_ci, (100 * (float(num_0_1_experts_ci) / num_total))))
      out_file.write('1 - 4 hours: %s (%s percent of total)\n'
                     % (num_1_4_experts_ci, (100 * (float(num_1_4_experts_ci) / num_total))))
      out_file.write('4 - 8 hours: %s (%s percent of total)\n'
                     % (num_4_8_experts_ci, (100 * (float(num_4_8_experts_ci) / num_total))))

      out_file.write('\nExpert All Users\n')
      out_file.write('----------------\n')
      out_file.write('0 - 1 hours: %s (%s percent of total)\n'
                     % (num_0_1_experts_all, (100 * (float(num_0_1_experts_all) / num_total))))
      out_file.write('1 - 4 hours: %s (%s percent of total)\n'
                     % (num_1_4_experts_all, (100 * (float(num_1_4_experts_all) / num_total))))
      out_file.write('4 - 8 hours: %s (%s percent of total)\n'
                     % (num_4_8_experts_all, (100 * (float(num_4_8_experts_all) / num_total))))

      out_file.write('\nAll Users\n')
      out_file.write('---------\n')
      out_file.write('0 - 1 hours: %s (%s percent of total)\n'
                     % (num_0_1, (100 * (float(num_0_1) / num_total))))
      out_file.write('1 - 4 hours: %s (%s percent of total)\n'
                     % (num_1_4, (100 * (float(num_1_4) / num_total))))
      out_file.write('4 - 8 hours: %s (%s percent of total)\n'
                     % (num_4_8, (100 * (float(num_4_8) / num_total))))
      out_file.write('8 - + hours: %s (%s percent of total)\n'
                     % (num_after_8, (100 * (float(num_after_8) / num_total))))

      out_file.write('\ntotal: %s' % num_total);

Exemple #17

0

Afficher le fichier

Fichier : StreamingCrawler.py Projet : chucheng/EmhTwitterCrwaler

 def log(self, msg, tb=None):
     FileLog.log(self.log_file, msg, exception_tb=tb)

Exemple #18

0

Afficher le fichier

Fichier : aFolkWisdom.py Projet : chucheng/TwitterResearch

def run():
  """Contains the main logic for this analysis."""
  FileLog.set_log_dir()

  seeds = Util.load_seeds()
  for category in _CATEGORIES:
    log('Preforming analysis for category: %s' % category)
    size_top_news = _SIZE_TOP_NEWS
    if category:
      size_top_news = .10

    data_set = DataSet.TESTING
    retweets = set()
    if _SWITCHED:
      data_set = DataSet.TRAINING
    if _EXCLUDE_RETWEETS:
      retweets = ground_truths.find_retweets(_TESTING_SET_MONTHS)
    log('Num retweets to exclude: %s' % len(retweets))
    gt_rankings = ground_truths.get_gt_rankings(seeds, data_set, category,
                                                exclude_tweets_within_delta=_EXCLUDE_TWEETS_WITHIN_DELTA,
                                                retweets=retweets)
    log('Num ground_truth_rankings: %s' % len(gt_rankings))

    # Format for use later.
    ground_truth_url_to_rank = {}
    for rank, (url, count) in enumerate(gt_rankings):
      ground_truth_url_to_rank[url] = rank

    target_news = ground_truths.find_target_news(gt_rankings, size_top_news)
    log('Size target_news: %s' % len(target_news))

    for delta in _DELTAS:
      run_params_str = 'd%s_t%s_e%s_%s' % (delta, int(size_top_news * 100),
                                           int(_SIZE_EXPERTS * 100), category)
      info_output_dir = '../graph/FolkWisdom/%s/info/' % run_params_str
      Util.ensure_dir_exist(info_output_dir)


      groups, d_num_followers  = user_groups.get_all_user_groups(delta, category)
      log('Num experts (precision): %s' % len(groups.precision))
      log('Num experts (fscore): %s' % len(groups.fscore))
      log('Num experts (ci): %s' % len(groups.ci))
      log('Num Super Experts: %s' %len(groups.super_experts))
      log('Num Social Bias Experts: %s' % len(groups.social_bias))

      log('Finding rankings with an %s hour delta.' % delta)
      ranks = rankings.get_rankings(delta, seeds, groups, category, d_num_followers)

      # Output some interesting info to file
      size_market_unfiltered = '0'
      with open('../data/FolkWisdom/size_of_market_unfiltered.txt') as in_file:
        size_market_unfiltered = in_file.readline().strip()

      with open('%suser_demographics_%s.txt'
                % (info_output_dir, run_params_str), 'w') as output_file:
        output_file.write('Number of Newsaholics: %s\n' % len(groups.newsaholics))
        output_file.write('Number of Active Users: %s\n' % len(groups.active_users))
        output_file.write('Number of Common Users: %s\n' % len(groups.common_users))
        output_file.write('\n');
        output_file.write('Number of Precision Experts: %s\n' % len(groups.precision))
        output_file.write('Number of F-Score Experts: %s\n' % len(groups.fscore))
        output_file.write('Number of CI Experts: %s\n' % len(groups.ci))
        output_file.write('Number of Social Bias Experts: %s\n' % len(groups.social_bias))
        output_file.write('Total number of unique experts: %s\n' % len(groups.all_experts))
        output_file.write('Number of Precision and F-Score Experts: %s\n'
                          % len(groups.precision.intersection(groups.fscore)))
        output_file.write('Number of Precision and CI Experts: %s\n'
                          % len(groups.precision.intersection(groups.ci)))
        output_file.write('Number of F-Score and CI Experts: %s\n'
                          % len(groups.fscore.intersection(groups.ci)))
        output_file.write('Number of Super Experts: %s\n' % len(groups.super_experts))
        output_file.write('\n');
        output_file.write('Number of Users (Total): %s\n'
                          % (len(groups.newsaholics) + len(groups.active_users)
                             + len(groups.common_users)))
        output_file.write('Size of market (unfiltered): %s\n'
                          % size_market_unfiltered)
        output_file.write('\n')
        # output_file.write('Number of votes by Newsaholics: %s\n'
        #                   % num_votes_newsaholics)
        # output_file.write('Number of votes by Market: %s\n' % num_votes_market)
        # output_file.write('Number of votes by Active Users: %s\n'
        #                   % num_votes_active)
        # output_file.write('Number of votes by Common Users: %s\n'
        #                   % num_votes_common)
        # output_file.write('\n');
        # output_file.write('Number of votes by Expert (Precision) Users: %s\n'
        #         % num_votes_expert_precision) 
        # output_file.write('Number of votes by Expert (fscore) Users: %s\n'
        #         % num_votes_expert_fscore) 
        # output_file.write('Number of votes by Expert (ci) Users: %s\n'
        #         % num_votes_expert_ci) 
        # output_file.write('Number of votes by Super Experts: %s\n'
        #                   % num_votes_expert_s)
        # output_file.write('Number of votes by Social Bias Experts: %s\n'
        #                   % num_votes_expert_sb)
        # output_file.write('\n')
        # output_file.write('Total Number of votes cast: %s\n'
        #                   % (num_votes_newsaholics + num_votes_active
        #                      + num_votes_common))
        # output_file.write('\n')
        output_file.write('Total Number of Good News: %s\n' % len(target_news))

      log('Ground Truth Top 50')
      for i in range(min(len(gt_rankings), 50)):
        url, count = gt_rankings[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Newsaholic Top 5')
      for i in range(min(len(ranks.newsaholics), 5)):
        url, count = ranks.newsaholics[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Active Top 5')
      for i in range(min(len(ranks.active_users), 5)):
        url, count = ranks.active_users[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Common Top 5')
      for i in range(min(len(ranks.common_users), 5)):
        url, count = ranks.common_users[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('nonexpert Top 5')
      for i in range(min(len(ranks.non_experts), 5)):
        url, count = ranks.non_experts[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (Precision) Top 5')
      for i in range(min(len(ranks.precision), 5)):
        url, count = ranks.precision[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (fscore) Top 5')
      for i in range(min(len(ranks.fscore), 5)):
        url, count = ranks.fscore[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (ci) Top 5')
      for i in range(min(len(ranks.ci), 5)):
        url, count = ranks.ci[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Super Expert Top 5')
      for i in range(min(len(ranks.super_experts), 5)):
        url, count = ranks.super_experts[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Social Bias Expert Top 5')
      for i in range(min(len(ranks.social_bias), 5)):
        url, count = ranks.social_bias[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))

        
      market_rank_to_url = {}
      newsaholic_rank_to_url = {}
      active_rank_to_url = {}
      common_rank_to_url = {}
      expert_p_rank_to_url = {}
      expert_f_rank_to_url = {}
      expert_c_rank_to_url = {}
      expert_s_rank_to_url = {}
      for rank, (url, count) in enumerate(ranks.newsaholics):
        newsaholic_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.population):
        market_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.active_users):
        active_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.common_users):
        common_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.precision):
        expert_p_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.fscore):
        expert_f_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.ci):
        expert_c_rank_to_url[rank] = url
      for rank, (url, count) in enumerate(ranks.super_experts):
        expert_s_rank_to_url[rank] = url

      population_url_to_rank = {}
      market_url_to_rank = {}
      precision_url_to_rank = {}
      fscore_url_to_rank = {}
      ci_url_to_rank = {}
      ci_1_url_to_rank = {}
      ci_2_url_to_rank = {}
      ci_3_url_to_rank = {}
      common_url_to_rank = {}
      for rank, (url, count) in enumerate(ranks.population):
        population_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.non_experts):
        market_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.precision):
        precision_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.fscore):
        fscore_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci):
        ci_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci_1):
        ci_1_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci_2):
        ci_2_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.ci_3):
        ci_3_url_to_rank[url] = rank
      for rank, (url, count) in enumerate(ranks.common_users):
        common_url_to_rank[url] = rank

      precisions, recalls = precision_recall.get_precision_recalls(gt_rankings, ranks)

      mixed_rankings = mixed_model.get_mixed_rankings(market_url_to_rank,
                                                      precisions.non_experts,
                                                      precision_url_to_rank,
                                                      precisions.precision,
                                                      fscore_url_to_rank,
                                                      precisions.fscore,
                                                      ci_url_to_rank,
                                                      precisions.ci,
                                                      ground_truth_url_to_rank)

      mixed_inact_rankings = mixed_model.get_mixed_rankings(common_url_to_rank,
                                                            precisions.common_users,
                                                            precision_url_to_rank,
                                                            precisions.precision,
                                                            fscore_url_to_rank,
                                                            precisions.fscore,
                                                            ci_url_to_rank,
                                                            precisions.ci,
                                                            ground_truth_url_to_rank)

      mixed_ci_rankings = mixed_model.get_mixed_rankings(market_url_to_rank,
                                                         precisions.non_experts,
                                                         ci_1_url_to_rank,
                                                         precisions.ci_1,
                                                         ci_2_url_to_rank,
                                                         precisions.ci_2,
                                                         ci_3_url_to_rank,
                                                         precisions.ci_3,
                                                         ground_truth_url_to_rank)
                                                         

      mixed_precisions, mixed_recalls = precision_recall.calc_precision_recall(gt_rankings, 
                                                                               mixed_rankings)

      mixed_inact_precisions, mixed_inact_recalls = precision_recall.calc_precision_recall(gt_rankings, 
                                                                                           mixed_inact_rankings)

      mixed_ci_precisions, mixed_ci_recalls = precision_recall.calc_precision_recall(gt_rankings, 
                                                                                     mixed_ci_rankings)

      log('-----------------------------------')
      log('Mixed (min) Top 5')
      for i in range(min(len(mixed_rankings), 5)):
        url, count = mixed_rankings[i]
        log('[%s] %s\t%s' %(i + 1, url, count))
      log('-----------------------------------')

      with open('%sranking_comparisons_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as out_file:
        for gt_rank, (gt_url, _) in enumerate(gt_rankings):
          market_rank = 0
          precision_rank = 0
          ci_rank = 0
          fscore_rank = 0
          inactive_crowd_rank = 0
          if gt_url in market_url_to_rank:
            market_rank = market_url_to_rank[gt_url] + 1
          if gt_url in precision_url_to_rank:
            precision_rank = precision_url_to_rank[gt_url] + 1
          if gt_url in ci_url_to_rank:
            ci_rank = ci_url_to_rank[gt_url] + 1
          if gt_url in fscore_url_to_rank:
            fscore_rank = fscore_url_to_rank[gt_url] + 1
          if gt_url in common_url_to_rank:
            inactive_crowd_rank = common_url_to_rank[gt_url] + 1
          line = '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (gt_url, gt_rank + 1,
                                                   market_rank,
                                                   inactive_crowd_rank,
                                                   precision_rank, ci_rank,
                                                   fscore_rank)
          out_file.write(line)


      with open('%sground_truth_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for url, count in gt_rankings:
          output_file.write('%s\t%s\n' % (url.strip(), count))
      with open('%smarket_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.common_users):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%snewsaholic_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.newsaholics):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sactive_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.active_users):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%scommon_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.common_users):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%snonexpert_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.non_experts):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_p_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.precision):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_f_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.fscore):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_c_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.ci):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                               ground_truth_url_to_rank[url]))
      with open('%sexpert_s_user_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(ranks.super_experts):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                            ground_truth_url_to_rank[url]))
      with open('%smixed_rankings_%s.tsv'
                % (info_output_dir, run_params_str), 'w') as output_file:
        for rank, (url, count) in enumerate(mixed_rankings):
          output_file.write('%s\t%s\t(%s,%s)\n'
                            % (url.strip(), count, rank,
                            ground_truth_url_to_rank[url]))

      with open('../data/FolkWisdom/market_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.common_users:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/nonexpert_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.non_experts:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/expert_p_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.precision:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/expert_f_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.fscore:
          out_file.write('%s\n' % precision)

      with open('../data/FolkWisdom/expert_c_precisions_%s.txt'
                % run_params_str, 'w') as out_file:
        for precision in precisions.ci:
          out_file.write('%s\n' % precision)

      log('Drawing summary precision-recall graphs...')
      # draw_precision_recall_graph(market_precisions, market_recalls,
      precision_recall.draw([precisions.newsaholics, precisions.active_users,
                             precisions.common_users, precisions.precision,
                             precisions.fscore, precisions.ci,
                             precisions.super_experts],
                            [recalls.newsaholics, recalls.active_users,
                             recalls.common_users, recalls.precision,
                             recalls.fscore, recalls.ci, recalls.super_experts],
                            ['Newsaholics', 'Active', 'Common', 'Precision',
                             'F-score', 'CI', 'Super Experts'],
                            'precision_recall_all',
                            run_params_str)

      # Draw via old method because it has fancy markings.
      experts.draw_precision_recall_experts(precisions.non_experts, recalls.non_experts,
                                            precisions.precision, recalls.precision,
                                            precisions.fscore, recalls.fscore,
                                            precisions.ci, recalls.ci,
                                            run_params_str)

      log('Drawing experts precision-recall graph...')
      # precision_recall.draw_with_markers([precisions.population, precisions.non_experts, precisions.precision,
      #                                     precisions.fscore, precisions.ci],
      #                                    [recalls.population, recalls.non_experts, recalls.precision,
      #                                     recalls.fscore, recalls.ci],
      #                                    ['Population', 'Crowd', 'Precision', 'F-score', 'CI'],
      #                                    'precision_recall_experts',
      #                                    0, run_params_str)

      log('Drawing mixed + inact graph...')
      precision_recall.draw_with_markers([precisions.non_experts, precisions.common_users, mixed_inact_precisions],
                                         [recalls.non_experts, recalls.common_users, mixed_inact_recalls],
                                         ['Crowd', 'Inactive Crowd', 'Mixed + Inactive'],
                                         'precision_recall_mixed_and_inactive',
                                         3, run_params_str, zoom=True)

      log('Drawing ci breakdown by followers precisions-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.ci,
                             precisions.ci_hi, precisions.ci_li],
                            [recalls.non_experts, recalls.ci,
                             recalls.ci_hi, recalls.ci_li],
                            ['Crowd', 'CI', 'CI High', 'CI Low'],
                            'precision_recall_ci_followers_breakdown',
                            run_params_str)

      log('Drawing social bias precision-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.social_bias,
                             precisions.precision, precisions.fscore,
                             precisions.ci],
                            [recalls.non_experts, recalls.social_bias,
                             recalls.precision, recalls.fscore,
                             recalls.ci],
                            ['Crowd', 'Influence Experts', 'Precision',
                             'F-score', 'CI'],
                            'precision_recall_social_bias',
                            run_params_str)

      log('Drawing basic groups precision-recall graph...')
      precision_recall.draw([precisions.newsaholics, precisions.active_users,
                             precisions.common_users],
                            [recalls.newsaholics, recalls.active_users,
                             recalls.common_users],
                            ['Newsaholics', 'Active Users', 'Common Users'],
                            'precision_recall_basic_groups',
                            run_params_str)

      log('Drawing crowd def precision-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.common_users],
                            [recalls.non_experts, recalls.common_users],
                            ['Crowd', 'Inactive Crowd'],
                            'precision_recall_crowd_def',
                            run_params_str, zoom=True)

      log('Drawing non_expert_sampling precision-recall graph...')
      precision_recall.draw_with_markers([precisions.non_experts, precisions.non_experts_sampled,
                                          precisions.non_experts_10, precisions.non_experts_25,
                                          precisions.non_experts_1, precisions.ci],
                                          [recalls.non_experts, recalls.non_experts_sampled,
                                           recalls.non_experts_10, recalls.non_experts_25,
                                           recalls.non_experts_1, recalls.ci],
                                          ['Crowd', 'Crowd (33% sample)', 'Crowd (10% sample)',
                                           'Crowd (5% sample)', 'Crowd (2% sample)', 'Experts (CI)'],
                                          'precision_recall_non_expert_sampling',
                                          3, run_params_str, ncol=2)

      # TODO: Replace with new method.
      log('Drawing mixed model precision-recall graph...')
      mixed_model.draw_precision_recall_mixed(precisions.non_experts, recalls.non_experts,
                                              mixed_precisions, mixed_recalls,
                                              run_params_str, zoom=True)

      log('Drawing mixed ci model precision-recall graph...')
      precision_recall.draw([precisions.non_experts, mixed_ci_precisions],
                            [recalls.non_experts, mixed_ci_recalls],
                            ['Crowd', 'Mixed'],
                            'precision_recall_mixed_ci',
                            run_params_str)

      log('Drawing weighted followers precision-recall graph...')
      precision_recall.draw([precisions.non_experts, precisions.weighted_followers, precisions.ci],
                            [recalls.non_experts, recalls.weighted_followers, recalls.ci],
                            ['Crowd', 'Weighted Followers', 'CI'],
                            'precision_recall_weighted_followers',
                            run_params_str)

      log('Drawing ci weighted graph...')
      precision_recall.draw([precisions.population, precisions.ci, precisions.ci_weighted],
                            [recalls.population, recalls.ci, recalls.ci_weighted],
                            ['Crowd', 'CI', 'CI (Weighted)'],
                            'precision_recall_ci_weighted',
                            run_params_str)

      log('Drawing weighted graph...')
      precision_recall.draw([precisions.population, precisions.weighted],
                            [recalls.population, recalls.weighted],
                            ['Crowd', 'Crowd (Weighted)'],
                            'precision_recall_weighted',
                            run_params_str)

      log('Drawing weighted both graph...')
      precision_recall.draw([precisions.population, precisions.weighted, precisions.weighted_both],
                            [recalls.population, recalls.weighted, recalls.weighted_both],
                            ['Crowd', 'Crowd (Weighted)', 'Crowd (Weighted Both)'],
                            'precision_recall_weighted_both',
                            run_params_str)

Exemple #19

0

Afficher le fichier

Fichier : StreamingCrawler.py Projet : chucheng/EmhTwitterCrwaler

 def log(self, msg, tb=None, screen_only=False):
     if screen_only: #do not save to log file, only print it to screen
         FileLog.log(None, "StreamingCrawler:" + msg, exception_tb=tb)   
     else:
         FileLog.log(self.log_file, "StreamingCrawler:" + msg, exception_tb=tb)

Exemple #20

0

Afficher le fichier

Fichier : streaming.py Projet : chucheng/EmhTwitterCrwaler

 def log(self, msg, tb=None): #for future debug, not called in this class yet
     FileLog.log(self.log_file, "(pid:{0})".format(self.pid) + str(msg), exception_tb=tb)

Exemple #21

0

Afficher le fichier

Fichier : streaming.py Projet : chucheng/EmhTwitterCrwaler

 def log(self, msg, tb=None): #for debug purpose
     FileLog.log(self.log_file, str(msg), exception_tb=tb)

Exemple #22

0

Afficher le fichier

Fichier : a_crowd_wisdom_def.py Projet : chucheng/TwitterResearch

def run():
  """Contains the main logic for this analysis."""
  global _SIZE_TOP_NEWS
  FileLog.set_log_dir()

  seeds = Util.load_seeds()
  for category in _CATEGORIES:
    log('Preforming analysis for category: %s' % category)
    if category:
      _SIZE_TOP_NEWS = .10
    else:
      _SIZE_TOP_NEWS = .02

    gt_rankings = ground_truths.get_gt_rankings(seeds, DataSet.TESTING,
                                                category)
    log('Num ground_truth_rankings: %s' % len(gt_rankings))


    target_news = ground_truths.find_target_news(gt_rankings, _SIZE_TOP_NEWS)
    log('Size target_news: %s' % len(target_news))

    # for delta in _DELTAS:
    for delta in [4]:
      run_params_str = 'd%s_t%s_e%s_%s' % (delta, int(_SIZE_TOP_NEWS * 100),
                                           int(_SIZE_EXPERTS * 100), category)
      output_dir = '../graph/CrowdWisdomDef/%s/' % run_params_str
      Util.ensure_dir_exist(output_dir)

      info_output_dir = '../graph/CrowdWisdomDef/%s/info/' % run_params_str
      Util.ensure_dir_exist(info_output_dir)

      output_dir = '../graph/CrowdWisdomDef/%s/' % run_params_str
      Util.ensure_dir_exist(output_dir)

      (num_users, newsaholics,
       active_users, common_users) = basic_groups.group_users(delta, category)
      log('Num newsaholics: %s' % len(newsaholics))
      log('Num active: %s' % len(active_users))
      log('Num common: %s' % len(common_users))

      common_user_buckets = common_user_groups.group_users(common_users, _NUM_GROUPS)
      for i, common_user_bucket in enumerate(common_user_buckets):
        print 'Number users in common user bucket %s: %s' % (i, len(common_user_bucket))

      experts_precision = experts.select_experts_precision(
          newsaholics.union(active_users), num_users, delta, _SIZE_EXPERTS,
          category)
      experts_fscore = experts.select_experts_fscore(len(target_news),
                                                     num_users,
                                                     delta, _SIZE_EXPERTS,
                                                     category)
      experts_ci = experts.select_experts_ci(num_users, delta, _SIZE_EXPERTS,
                                             category)
      super_experts = experts.select_super_experts(experts_precision,
                                                   experts_fscore,
                                                   experts_ci)

      log('Num experts (precision): %s' % len(experts_precision))
      log('Num experts (fscore): %s' % len(experts_fscore))
      log('Num experts (ci): %s' % len(experts_ci))

      log('Finding rankings with an %s hour delta.' % delta)
      (market_rankings, newsaholic_rankings,
       active_rankings,
       common_rankings) = basic_groups.get_rankings(delta, seeds, newsaholics,
                                                    active_users, category)
      (expert_precision_rankings, expert_fscore_rankings,
       expert_ci_rankings,
       expert_s_rankings) = experts.get_rankings(delta, seeds,
                                                 experts_precision,
                                                 experts_fscore,
                                                 experts_ci,
                                                 super_experts,
                                                 category)

      common_groups_rankings = common_user_groups.get_rankings(delta, seeds,
                                                               common_user_buckets,
                                                               category)

      num_votes_common = 0
      for url, count in common_rankings:
        num_votes_common += count
      log('Num common_rankings: %s' % len(common_rankings))
      log('Num common votes: %s' % num_votes_common)
      num_votes_expert_precision = 0
      for url, count in expert_precision_rankings:
        num_votes_expert_precision += count
      log('Num expert_precision rankings: %s' % len(expert_precision_rankings))
      log('Num expert_precision votes: %s' % num_votes_expert_precision)
      num_votes_expert_fscore = 0
      for url, count in expert_fscore_rankings:
        num_votes_expert_fscore += count
      log('Num expert_fscore rankings: %s' % len(expert_fscore_rankings))
      log('Num expert_fscore votes: %s' % num_votes_expert_fscore)
      num_votes_expert_ci = 0
      for url, count in expert_ci_rankings:
        num_votes_expert_ci += count
      log('Num expert_ci rankings: %s' % len(expert_ci_rankings))
      log('Num expert_ci votes: %s' % num_votes_expert_ci)
      num_votes_buckets = []
      for i, common_group_rankings in enumerate(common_groups_rankings):
        num_votes = 0
        for url, count in common_group_rankings:
          num_votes += count
        num_votes_buckets.append(num_votes)
        log('Num common rankings (%s buckets): %s' % (i, len(common_group_rankings)))
        log('Num expert_ci votes (%s buckets): %s' % (i, num_votes))

      with open('%suser_demographics_%s.txt'
                % (info_output_dir, run_params_str), 'w') as output_file:
        output_file.write('Number of Common Users: %s\n' % len(common_users))
        output_file.write('\n');
        output_file.write('Number of Precision Experts: %s\n' % len(experts_precision))
        output_file.write('Number of F-Score Experts: %s\n' % len(experts_fscore))
        output_file.write('Number of CI Experts: %s\n' % len(experts_ci))
        output_file.write('Number users per common user bucket: %s\n' %len(common_user_buckets[0]))
        output_file.write('Number of Precision and F-Score Experts: %s\n'
                          % len(experts_precision.intersection(experts_fscore)))
        output_file.write('Number of Precision and CI Experts: %s\n'
                          % len(experts_precision.intersection(experts_ci)))
        output_file.write('Number of F-Score and CI Experts: %s\n'
                          % len(experts_fscore.intersection(experts_ci)))
        output_file.write('\n');
        output_file.write('Number of Users (Total): %s\n'
                          % (len(newsaholics) + len(active_users)
                             + len(common_users)))
        output_file.write('\n')
        output_file.write('Number of votes by Common Users: %s\n'
                          % num_votes_common)
        output_file.write('\n');
        output_file.write('Number of votes by Expert (Precision) Users: %s\n'
                % num_votes_expert_precision) 
        output_file.write('Number of votes by Expert (fscore) Users: %s\n'
                % num_votes_expert_fscore) 
        output_file.write('Number of votes by Expert (ci) Users: %s\n'
                % num_votes_expert_ci) 
        output_file.write('Number of votes per bucket: %s\n' % num_votes_buckets)
        output_file.write('\n')
        output_file.write('Total Number of Good News: %s\n' % len(target_news))

      log('Ground Truth Top 5')
      for i in range(min(len(gt_rankings), 5)):
        url, count = gt_rankings[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Common Top 5')
      for i in range(min(len(common_rankings), 5)):
        url, count = common_rankings[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (Precision) Top 5')
      for i in range(min(len(expert_precision_rankings), 5)):
        url, count = expert_precision_rankings[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (fscore) Top 5')
      for i in range(min(len(expert_fscore_rankings), 5)):
        url, count = expert_fscore_rankings[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
      log('Expert (ci) Top 5')
      for i in range(min(len(expert_ci_rankings), 5)):
        url, count = expert_ci_rankings[i]
        log('[%s] %s\t%s' %(i, url.strip(), count))
      log('-----------------------------------')
        

      common_precisions, common_recalls = calc_precision_recall(gt_rankings,
                                                                common_rankings)
      (expert_p_precisions,
       expert_p_recalls) = calc_precision_recall(gt_rankings,
                                                 expert_precision_rankings)
      (expert_f_precisions,
       expert_f_recalls) = calc_precision_recall(gt_rankings,
                                                 expert_fscore_rankings)
      (expert_c_precisions,
       expert_c_recalls) = calc_precision_recall(gt_rankings,
                                                 expert_ci_rankings)

      common_group_ps = []
      common_group_rs = []
      for common_group_ranking in common_groups_rankings:
        common_group_p, common_group_r = calc_precision_recall(gt_rankings,
                                                               common_group_ranking)
        common_group_ps.append(common_group_p)
        common_group_rs.append(common_group_r)
                                                

      log('Drawing common group model precision-recall graph...')
      common_user_groups.draw_precision_recall(common_group_ps, common_group_rs,
                                               expert_p_precisions, expert_p_recalls,
                                               expert_f_precisions, expert_f_recalls,
                                               expert_c_precisions, expert_c_recalls,
                                               run_params_str)

      log('Drawing common group model precision graph...')
      common_user_groups.draw_precision(common_group_ps, expert_p_precisions,
                                        expert_f_precisions, expert_c_precisions,
                                        run_params_str)

Python FileLog, ops Exemples