def significant_nei_utm_ids(): output_folder = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'/%s.png' for i, data in enumerate(FileIO.iterateJsonFromFile(f_significant_nei_utm_ids, remove_params_dict=True)): utm_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(data['utm_id']) nei_utm_lat_longs = map( lambda nei_utm_id: UTMConverter.getLatLongUTMIdInLatLongForm(nei_utm_id), data['nei_utm_ids'] ) if nei_utm_lat_longs: output_file = output_folder%('%s_%s'%(utm_lat_long)) plotPointsOnWorldMap(nei_utm_lat_longs, blueMarble=False, bkcolor='#CFCFCF', lw = 0, color = '#EA00FF', alpha=1.) _, m = plotPointsOnWorldMap([utm_lat_long], blueMarble=False, bkcolor='#CFCFCF', lw = 0, color = '#2BFF00', s = 40, returnBaseMapObject=True, alpha=1.) for nei_utm_lat_long in nei_utm_lat_longs: m.drawgreatcircle(utm_lat_long[1], utm_lat_long[0], nei_utm_lat_long[1], nei_utm_lat_long[0], color='#FFA600', lw=1.5, alpha=1.0) print 'Saving %s'%(i+1) savefig(output_file)
def mapper(self, key, hashtag_object): hashtag = hashtag_object['hashtag'] ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location'] if ltuo_occ_time_and_occ_location: ltuo_intvl_time_and_occ_location = [( GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS), occ_location ) for occ_time, occ_location in ltuo_occ_time_and_occ_location] points = [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location] ltuo_intvl_time_and_items =\ GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0)) ltuo_intvl_time_and_items.sort(key=itemgetter(0)) first_time = ltuo_intvl_time_and_items[0][0] ltuo_iid_and_occ_count = map(lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, len(it)), ltuo_intvl_time_and_items) ltuo_location_and_items =\ GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1)) mf_location_to_occ_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items)) spatial_metrics = { 'hashtag': hashtag, 'num_of_occurrenes': len(ltuo_occ_time_and_occ_location), 'peak_iid': max(ltuo_iid_and_occ_count, key=itemgetter(1))[0], 'focus': focus(mf_location_to_occ_count), 'entropy': entropy(mf_location_to_occ_count, as_bits=False), 'spread': getRadiusOfGyration(points) } yield hashtag, spatial_metrics
def mapper(self, key, line): if False: yield # I'm a generator! for hashtag, (location, occ_time) in iterateHashtagObjectInstances(line): location = UTMConverter.getUTMIdInLatLongFormFromLatLong( location[0], location[1], accuracy=LOCATION_ACCURACY ) self.mf_hastag_to_ltuo_occ_time_and_occ_location[hashtag].append((occ_time, location))
def map_tweet_to_hashtag_object_at_varying_accuracies(self, key, line): if False: yield # I'm a generator! for hashtag, (location, occ_time) in \ iterateHashtagObjectInstances(line): for accuracy in ACCURACIES: utm_id = UTMConverter.getUTMIdInLatLongFormFromLatLong(location[0], location[1], accuracy=accuracy) self.mf_hastag_to_ltuo_occ_time_and_occ_utm_id[hashtag].append((occ_time, utm_id))
def mapper(self, hashtag, hashtag_object): def distance_from_overall_locality_stat(overall_stat, current_stat): return overall_stat-current_stat ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location'] if ltuo_occ_time_and_occ_location: ltuo_intvl_time_and_occ_location = [( GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS), occ_location ) for occ_time, occ_location in ltuo_occ_time_and_occ_location] ltuo_intvl_time_and_items =\ GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0)) ltuo_intvl_time_and_items.sort(key=itemgetter(0)) first_time = ltuo_intvl_time_and_items[0][0] intvl_method = lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, (t, map(itemgetter(1), it))) ltuo_iid_and_tuo_interval_and_lids = map(intvl_method, ltuo_intvl_time_and_items) peak_tuo_iid_and_tuo_interval_and_lids = \ max(ltuo_iid_and_tuo_interval_and_lids, key=lambda (_, (__, lids)): len(lids)) peak_iid = peak_tuo_iid_and_tuo_interval_and_lids[0] ltuo_location_and_items =\ GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1)) overall_mf_lid_to_occurrence_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items)) overall_points =\ [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location] overall_entropy = entropy(overall_mf_lid_to_occurrence_count, False) overall_focus = focus(overall_mf_lid_to_occurrence_count)[1] overall_coverage = getRadiusOfGyration(overall_points) total_occurrences = sum(len(lids) for (iid, (interval, lids)) in ltuo_iid_and_tuo_interval_and_lids) for iid, (_, lids) in ltuo_iid_and_tuo_interval_and_lids: mf_lid_to_occurrence_count = defaultdict(float) for lid in lids: mf_lid_to_occurrence_count[lid]+=1 points = [UTMConverter.getLatLongUTMIdInLatLongForm(lid) for lid in lids] current_entropy = entropy(mf_lid_to_occurrence_count, False) current_focus = focus(mf_lid_to_occurrence_count)[1] current_coverage = getRadiusOfGyration(points) yield iid-peak_iid, [len(lids)/total_occurrences, current_entropy, current_focus, current_coverage, distance_from_overall_locality_stat(overall_entropy, current_entropy), distance_from_overall_locality_stat(overall_focus, current_focus), distance_from_overall_locality_stat(overall_coverage, current_coverage),]
def utm_ids_on_map(): ''' Plots utm ids on world map. The color indicates the log(total_hashtag_count) ''' output_file = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'.png' ltuo_point_and_total_hashtag_count = [] for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, remove_params_dict=True): point = UTMConverter.getLatLongUTMIdInLatLongForm(utm_object['utm_id']) total_hashtag_count = log(utm_object['total_hashtag_count']) ltuo_point_and_total_hashtag_count.append((point, total_hashtag_count)) points, total_hashtag_counts = zip(*sorted(ltuo_point_and_total_hashtag_count, key=itemgetter(1))) plotPointsOnWorldMap(points, blueMarble=False, bkcolor='#CFCFCF', c=total_hashtag_counts, cmap=matplotlib.cm.cool, lw = 0, alpha=1.) savefig(output_file)
def mapper(self, key, line): if False: yield # I'm a generator! for hashtag, (location, occ_time) in iterateHashtagObjectInstances(line): location = UTMConverter.getUTMIdFromLatLong(location[0], location[1], accuracy=1000) if location in self.valid_locations: self.mf_location_to_occ_times[location].append(GeneralMethods.approximateEpoch(occ_time, 60*60))
def get_time_and_lat_long((t, lid)): return {'t': t, 'loc': UTMConverter.getLatLongUTMIdInLatLongForm(lid)}
def _haversine_distance(self, location, neighbor_location): loc_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(location) nei_loc_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(neighbor_location) return getHaversineDistance(loc_lat_long, nei_loc_lat_long)
def red_tuo_utm_id_and_hashtag_counts_to_accuracy_and_hashtag_dist(self, utm_id, hashtag_counts): hashtags_dist = sum(hashtag_counts) if hashtags_dist >= MIN_HASHTAG_OCCURRENCES_PER_UTM_ID: accuracy = UTMConverter.getAccuracyFromUTMIdInLatLongForm(utm_id) yield accuracy, hashtags_dist