コード例 #1
0
ファイル: plots.py プロジェクト: kykamath/hashtags_and_geo
 def significant_nei_utm_ids():
     output_folder = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'/%s.png'
     for i, data in enumerate(FileIO.iterateJsonFromFile(f_significant_nei_utm_ids, remove_params_dict=True)):
         utm_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(data['utm_id'])
         nei_utm_lat_longs = map(
                           lambda nei_utm_id: UTMConverter.getLatLongUTMIdInLatLongForm(nei_utm_id),
                           data['nei_utm_ids']
                         )
         if nei_utm_lat_longs:
             output_file = output_folder%('%s_%s'%(utm_lat_long))
             plotPointsOnWorldMap(nei_utm_lat_longs,
                                  blueMarble=False,
                                  bkcolor='#CFCFCF',
                                  lw = 0,
                                  color = '#EA00FF',
                                  alpha=1.)
             _, m = plotPointsOnWorldMap([utm_lat_long],
                                  blueMarble=False,
                                  bkcolor='#CFCFCF',
                                  lw = 0,
                                  color = '#2BFF00',
                                  s = 40,
                                  returnBaseMapObject=True,
                                  alpha=1.)
             for nei_utm_lat_long in nei_utm_lat_longs:
                 m.drawgreatcircle(utm_lat_long[1],
                                   utm_lat_long[0],
                                   nei_utm_lat_long[1],
                                   nei_utm_lat_long[0],
                                   color='#FFA600',
                                   lw=1.5,
                                   alpha=1.0)
             print 'Saving %s'%(i+1)
             savefig(output_file)
コード例 #2
0
 def mapper(self, key, hashtag_object):
     hashtag = hashtag_object['hashtag']
     ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
     if ltuo_occ_time_and_occ_location:
         ltuo_intvl_time_and_occ_location = [(
                                            GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS),
                                            occ_location
                                             ) 
                                           for occ_time, occ_location in ltuo_occ_time_and_occ_location]
         points = [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location]
         ltuo_intvl_time_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0))
         ltuo_intvl_time_and_items.sort(key=itemgetter(0))
         first_time = ltuo_intvl_time_and_items[0][0]
         ltuo_iid_and_occ_count = map(lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, len(it)), ltuo_intvl_time_and_items)
         ltuo_location_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1))
         mf_location_to_occ_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items))
         spatial_metrics = {
                              'hashtag': hashtag,
                              'num_of_occurrenes': len(ltuo_occ_time_and_occ_location),
                              'peak_iid': max(ltuo_iid_and_occ_count, key=itemgetter(1))[0],
                              'focus': focus(mf_location_to_occ_count),
                              'entropy': entropy(mf_location_to_occ_count, as_bits=False),
                              'spread': getRadiusOfGyration(points)
                          }
         yield hashtag, spatial_metrics
コード例 #3
0
 def mapper(self, key, line):
     if False: yield # I'm a generator!
     for hashtag, (location, occ_time) in iterateHashtagObjectInstances(line):
         location = UTMConverter.getUTMIdInLatLongFormFromLatLong(
                                                              location[0], location[1], accuracy=LOCATION_ACCURACY
                                                          )
         self.mf_hastag_to_ltuo_occ_time_and_occ_location[hashtag].append((occ_time, location))
コード例 #4
0
 def map_tweet_to_hashtag_object_at_varying_accuracies(self, key, line):
     if False: yield # I'm a generator!
     for hashtag, (location, occ_time) in \
             iterateHashtagObjectInstances(line):
         for accuracy in ACCURACIES:
             utm_id = UTMConverter.getUTMIdInLatLongFormFromLatLong(location[0], location[1], accuracy=accuracy)
             self.mf_hastag_to_ltuo_occ_time_and_occ_utm_id[hashtag].append((occ_time, utm_id))
コード例 #5
0
 def mapper(self, hashtag, hashtag_object):
     def distance_from_overall_locality_stat(overall_stat, current_stat): return overall_stat-current_stat
     ltuo_occ_time_and_occ_location = hashtag_object['ltuo_occ_time_and_occ_location']
     if ltuo_occ_time_and_occ_location:
         ltuo_intvl_time_and_occ_location = [(
                                            GeneralMethods.approximateEpoch(occ_time, TIME_UNIT_IN_SECONDS),
                                            occ_location
                                             ) 
                                           for occ_time, occ_location in ltuo_occ_time_and_occ_location]
         ltuo_intvl_time_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(0))
         ltuo_intvl_time_and_items.sort(key=itemgetter(0))
         first_time = ltuo_intvl_time_and_items[0][0]
         intvl_method = lambda (t, it): ((t-first_time)/TIME_UNIT_IN_SECONDS, (t, map(itemgetter(1), it)))
         ltuo_iid_and_tuo_interval_and_lids = map(intvl_method, ltuo_intvl_time_and_items)
         peak_tuo_iid_and_tuo_interval_and_lids = \
             max(ltuo_iid_and_tuo_interval_and_lids, key=lambda (_, (__, lids)): len(lids))
         peak_iid = peak_tuo_iid_and_tuo_interval_and_lids[0]
         ltuo_location_and_items =\
                                 GeneralMethods.group_items_by(ltuo_intvl_time_and_occ_location, key=itemgetter(1))
         overall_mf_lid_to_occurrence_count = dict(map(lambda (l, it): (l, len(it)), ltuo_location_and_items))
         overall_points =\
                     [UTMConverter.getLatLongUTMIdInLatLongForm(loc) for _, loc in ltuo_occ_time_and_occ_location]
         overall_entropy = entropy(overall_mf_lid_to_occurrence_count, False)
         overall_focus = focus(overall_mf_lid_to_occurrence_count)[1]
         overall_coverage = getRadiusOfGyration(overall_points)
         total_occurrences = sum(len(lids) for (iid, (interval, lids)) in ltuo_iid_and_tuo_interval_and_lids)
         for iid, (_, lids) in ltuo_iid_and_tuo_interval_and_lids:
             mf_lid_to_occurrence_count = defaultdict(float)
             for lid in lids: mf_lid_to_occurrence_count[lid]+=1
             points = [UTMConverter.getLatLongUTMIdInLatLongForm(lid) for lid in lids]
             current_entropy = entropy(mf_lid_to_occurrence_count, False)
             current_focus = focus(mf_lid_to_occurrence_count)[1]
             current_coverage = getRadiusOfGyration(points)
             
             yield iid-peak_iid, [len(lids)/total_occurrences, current_entropy, current_focus, current_coverage, 
                                     distance_from_overall_locality_stat(overall_entropy, current_entropy),
                                     distance_from_overall_locality_stat(overall_focus, current_focus),
                                     distance_from_overall_locality_stat(overall_coverage, current_coverage),]
コード例 #6
0
ファイル: plots.py プロジェクト: kykamath/hashtags_and_geo
 def utm_ids_on_map():
     ''' Plots utm ids on world map. The color indicates the
     log(total_hashtag_count)
     '''
     output_file = fld_google_drive_data_analysis%GeneralMethods.get_method_id()+'.png'
     ltuo_point_and_total_hashtag_count = []
     for utm_object in FileIO.iterateJsonFromFile(f_hashtags_by_utm_id, remove_params_dict=True):
         point = UTMConverter.getLatLongUTMIdInLatLongForm(utm_object['utm_id'])
         total_hashtag_count = log(utm_object['total_hashtag_count'])
         ltuo_point_and_total_hashtag_count.append((point, total_hashtag_count))
     points, total_hashtag_counts = zip(*sorted(ltuo_point_and_total_hashtag_count, key=itemgetter(1)))
     plotPointsOnWorldMap(points,
                          blueMarble=False,
                          bkcolor='#CFCFCF',
                          c=total_hashtag_counts,
                          cmap=matplotlib.cm.cool,
                          lw = 0,
                          alpha=1.)
     
     savefig(output_file)
コード例 #7
0
 def mapper(self, key, line):
     if False: yield # I'm a generator!
     for hashtag, (location, occ_time) in iterateHashtagObjectInstances(line):
         location = UTMConverter.getUTMIdFromLatLong(location[0], location[1], accuracy=1000)
         if location in self.valid_locations:
             self.mf_location_to_occ_times[location].append(GeneralMethods.approximateEpoch(occ_time, 60*60))
コード例 #8
0
 def get_time_and_lat_long((t, lid)):
     return {'t': t, 'loc': UTMConverter.getLatLongUTMIdInLatLongForm(lid)}
コード例 #9
0
 def _haversine_distance(self, location, neighbor_location):
     loc_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(location)
     nei_loc_lat_long = UTMConverter.getLatLongUTMIdInLatLongForm(neighbor_location)
     return getHaversineDistance(loc_lat_long, nei_loc_lat_long)
コード例 #10
0
 def red_tuo_utm_id_and_hashtag_counts_to_accuracy_and_hashtag_dist(self, utm_id, hashtag_counts):
     hashtags_dist = sum(hashtag_counts)
     if hashtags_dist >= MIN_HASHTAG_OCCURRENCES_PER_UTM_ID:
         accuracy = UTMConverter.getAccuracyFromUTMIdInLatLongForm(utm_id)
         yield accuracy, hashtags_dist