def reducer2(self, utm_id, it_utm_id_and_hashtags):
        ltuo_neighbor_utm_id_and_neighbor_hashtags = []
        hashtags = None
        for neighbor_utm_id, neighbor_hashtags in it_utm_id_and_hashtags:
            if neighbor_utm_id == utm_id: hashtags = set(neighbor_hashtags)
            elif utm_id<neighbor_utm_id:
                ltuo_neighbor_utm_id_and_neighbor_hashtags.append([neighbor_utm_id, set(neighbor_hashtags)])
        if hashtags:
            for neighbor_utm_id, neighbor_hashtags in ltuo_neighbor_utm_id_and_neighbor_hashtags:
                num_common_hashtags = len(hashtags.intersection(neighbor_hashtags))+0.0
                total_hashtags = len(hashtags.union(neighbor_hashtags))
                if num_common_hashtags/total_hashtags >= 0.10:
                    observed_hashtag_pattern = [1 for i in range(num_common_hashtags)] +\
                                                                    [0 for i in range(total_hashtags - num_common_hashtags)]
                    mean_probability = np.mean([
                                                MonteCarloSimulation.mean_probability(
                                                         MonteCarloSimulation.probability_of_data_extracted_from_same_sample,
                                                         observed_hashtag_pattern,
                                                         [random.sample([0,1], 1)[0] for i in range(total_hashtags)]
                                                     )
                                               for i in range(3)])
#                    print utm_id, neighbor_utm_id
#                    print observed_hashtag_pattern, mean_probability
#                    print [random.sample([0,1], 1)[0] for i in range(total_hashtags)]
                    if mean_probability <= 0.05: yield '', {
                                                            'utm_id': utm_id,
                                                            'neighbor_utm_id': neighbor_utm_id,
                                                            'mean_probability':mean_probability,
                                                            'num_common_hashtags': num_common_hashtags
                                                        }
 def reducer_with_monte_carlo_simulation(self, location_pair, it_propagation_statuses):
     propagation_statuses = list(chain(*it_propagation_statuses))
     for min_common_hashtag in ImpactOfUsingLocationsToPredict.MIN_COMMON_HASHTAGS:
         if len(propagation_statuses) > min_common_hashtag:
             mean_probability = MonteCarloSimulation.mean_probability(
                                              MonteCarloSimulation.probability_of_data_extracted_from_same_sample,
                                              propagation_statuses,
                                              [random.sample([
                                                              ImpactOfUsingLocationsToPredict.STATUS_BEFORE,
                                                              ImpactOfUsingLocationsToPredict.STATUS_AFTER
                                                              ],
                                                             1)[0] 
                                               for i in range(len(propagation_statuses))]
                                          )
             yield min_common_hashtag, {
                                        'location_pair': location_pair,
                                        'mean_probability': mean_probability,
                                        'len_propagation_statuses': len(propagation_statuses),
                                        'propagation_statuses': np.mean(propagation_statuses)
                                        }
         else: break