def _divideAndCount(self, n): # devide the region into n*n grids to compute the entropy # p(i) = # of photos in that grid, to the total number of grids # it returns the list of subregions associated with the number photos falling into that region photo_number = self.getPhotoNumber() region = Region(self._event['region']) subregions = region.divideRegions(n, n) # Laplacian smoothed pro = [1.0] * n * n s = n * n photos = self._event['photos'] for photo in photos: lat = photo['location']['latitude'] lng = photo['location']['longitude'] flag = False i = 0 for subregion in subregions: if subregion.insideRegion([lat, lng]): pro[i] += 1.0 s += 1 if flag == True: raise Exception('bad data') flag = True i += 1 for i in xrange(0, n * n): pro[i] /= s return pro
def _divideAndCount(self, n): # devide the region into n*n grids to compute the entropy # p(i) = # of photos in that grid, to the total number of grids # it returns the list of subregions associated with the number photos falling into that region photo_number = self.getPhotoNumber() region = Region(self._event["region"]) subregions = region.divideRegions(n, n) # Laplacian smoothed pro = [1.0 / n / n] * n * n photos = self._event["photos"] for photo in photos: lat = photo["location"]["latitude"] lng = photo["location"]["longitude"] flag = False i = 0 for subregion in subregions: if subregion.insideRegion([lat, lng]): pro[i] += 1.0 / n / n if flag == True: raise Exception("bad data") flag = True i += 1 return pro
def _divideAndCount(self, n): # devide the region into n*n grids to compute the entropy # p(i) = # of photos in that grid, to the total number of grids # it returns the list of subregions associated with the number photos falling into that region photo_number = self.getPhotoNumber() region = Region(self._event['region']) subregions = region.divideRegions(n, n) # Laplacian smoothed pro = [1.0 / n / n] * n * n photos = self._event['photos'] for photo in photos: lat = photo['location']['latitude'] lng = photo['location']['longitude'] flag = False i = 0 for subregion in subregions: if subregion.insideRegion([lat, lng]): pro[i] += 1.0 / n / n if flag == True: raise Exception('bad data') flag = True i += 1 return pro
def _divideAndCount(self, n): # devide the region into n*n grids to compute the entropy # p(i) = # of elements in that grid, to the total number of grids # it returns the list of subregions associated with the number elements falling into that region element_number = self.getElementNumber() region = Region(self._event["region"]) subregions = region.divideRegions(n, n) # Laplacian smoothed pro = [1.0] * n * n s = n * n elements = self._event[self._element_type] for element in elements: lat = element["location"]["latitude"] lng = element["location"]["longitude"] flag = False i = 0 for subregion in subregions: if subregion.insideRegion([lat, lng]): pro[i] += 1.0 s += 1 if flag == True: raise Exception("bad data") flag = True i += 1 for i in xrange(0, n * n): pro[i] /= s return pro
def buildAllCorpus(element_type='photos', time_interval_length=14, debug=False, paras={}): # return a dict = {region : its local corpus} assert element_type in ['photos', 'tweets'] all_corpus = {} if element_type == 'photos': config = InstagramConfig else: config = TwitterConfig coordinates = [config.min_lat, config.min_lng, config.max_lat, config.max_lng] nyc = Region(coordinates) region_list = nyc.divideRegions(25, 25) region_list = nyc.filterRegions(region_list, test=True, n=25, m=25, element_type=element_type) # 14 days ago now = int(tool.getCurrentStampUTC()) num = 0 for region in region_list: if debug and num > 0: # speed up the debugging pass else: cor = Corpus() cor.buildCorpus(region, [now - time_interval_length * 3600 * 24, now], element_type, paras) all_corpus[region.getKey()] = cor num += 1 print 'build corpus %d' % (num) return all_corpus
def test(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) regions = huge_region.divideRegions(5,5) #Warning: DO NOT SET THIS BELOW 5 OR MEMORY OVERFLOW for i in range(25): test_region = regions[i] test_region.display() ts = InstagramTimeSeries(test_region, 1355765315, 1355765315+30*24*3600) print ts.buildTimeSeries()
def test(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) regions = huge_region.divideRegions( 5, 5) #Warning: DO NOT SET THIS BELOW 5 OR MEMORY OVERFLOW for i in range(25): test_region = regions[i] test_region.display() ts = InstagramTimeSeries(test_region, 1355765315, 1355765315 + 30 * 24 * 3600) print ts.buildTimeSeries()
def test(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) regions = huge_region.divideRegions(5, 5) #Warning: DO NOT SET THIS BELOW 5 OR MEMORY OVERFLOW for i in range(25): test_region = regions[i] test_region.display() test_region._region['min_lat'] = 40.7329 test_region._region['min_lng'] = -73.9957 test_region._region['max_lat'] = 40.7383 test_region._region['max_lng'] = -73.9844 ts = InstagramTimeSeries(test_region, str(1360519908), str(1365519908)) ts = ts.buildTimeSeries() for t in ts: print t break
def getRegions(self): plaza_squares = Region(self.coordinates) plaza_squares = plaza_squares.divideRegions(25,25) valid_squares = [] ei = ElementInterface('citybeat_production', 'photos', 'photos') bad_number = 0 all_number = 0 for region in plaza_squares: all_number += 1 mid_point = region.getMidCoordinates() point = Point( mid_point ) if not point.within( self.valid_poly ): print 'not valid ' continue cnt = 0 bad_number += 1 for p in ei.rangeQuery(region): cnt += 1 valid_squares.append( (region, cnt) ) print 'cnt = ',cnt self.plaza_squares = valid_squares print "all number = ",all_number, " bad_number = ",bad_number
def test(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(25, 25) filtered_regions = huge_region.filterRegions(region_list=regions, test=True, n=alarm_region_size, m=alarm_region_size) for i in range(1): test_region = regions[i] test_region._region['min_lat'] = 40.7329 test_region._region['min_lng'] = -73.9957 test_region._region['max_lat'] = 40.7383 test_region._region['max_lng'] = -73.9844 test_region.display() ts = TwitterTimeSeries(test_region, '1364829908', '1365693908') ts = ts.buildTimeSeries() for d in ts: print d
from region import Region import sys import pymongo import os import datetime coordinates = {"min_lat": 40.7485, "min_lng": -74.0140, "max_lat": 40.7930, "max_lng": -73.9530} reg = Region(coordinates) print ("Regions Created") div = reg.divideRegions(25, 25) print ("Regions divided") print reg.toJSON() """ for i in div: i._region["p1lat"] = i._region["max_lat"] i._region["p1lng"] = i._region["min_lng"] i._region["p2lat"] = i._region["max_lat"] i._region["p2lng"] = i._region["max_lng"] i._region["p3lat"] = i._region["min_lat"] i._region["p3lng"] = i._region["max_lng"] i._region["p4lat"] = i._region["min_lat"] i._region["p4lng"] = i._region["min_lng"] """
def getRegions(self): plaza_squares = Region(self.coordinates) plaza_squares = plaza_squares.divideRegions(25,25) valid_squares = [] ei = ElementInterface('citybeat_production', 'photos', 'photos') non_local_users = set([u.strip() for u in open(self.file_name_prefix+'all_users.txt','r').readlines()]) local_users = set([u.strip() for u in open(self.file_name_prefix+'local_users.txt','r').readlines()]) f_local = file(self.file_name_prefix+'local_distribution.csv', 'w') f_non_local = file(self.file_name_prefix+'non_local_distribution.csv','w') f_merge = file(self.file_name_prefix+'merged.csv','w') bad_number = 0 all_number = 0 all_photo_number = 0 user_photos_cnt = {} for region in plaza_squares: all_number += 1 mid_point = region.getMidCoordinates() point = Point( mid_point ) if not point.within( self.valid_poly ): continue cnt = 0 bad_number += 1 for p in ei.rangeQuery(region): un = p['user']['username'] if un in user_photos_cnt: user_photos_cnt[un] += 1 else: user_photos_cnt[un] = 0 if p['user']['username'] in local_users: f_w = f_local f_merge.write(str(p['location']['latitude'])+","+str(p['location']['longitude'])+','+p['images']['standard_resolution']['url']+',0'+'\n') elif p['user']['username'] in non_local_users: if random.uniform(0,1)>0.9999: continue else: try: f_merge.write(str(p['location']['latitude'])+","+str(p['location']['longitude'])+','+p['images']['standard_resolution']['url']+',1'+'\n') except: continue f_w = f_non_local try: f_w.write(str(p['location']['latitude'])+","+str(p['location']['longitude'])+','+p['images']['standard_resolution']['url']+'\n') except: continue cnt += 1 if cnt>5000: region.display() continue valid_squares.append( (region, cnt) ) print 'cnt = ',cnt all_photo_number+=cnt self.plaza_squares = valid_squares print "all number = ",all_number, " bad_number = ",bad_number print 'all photos = ',all_photo_number larger_than_two = 0 for u in user_photos_cnt: if user_photos_cnt[u]>=2: larger_than_two += 1 print 'larger than 2 = ',larger_than_two