예제 #1
0
    def _divideAndCount(self, n):
        # devide the region into n*n grids to compute the entropy
        # p(i) = # of photos in that grid, to the total number of grids
        # it returns the list of subregions associated with the number photos falling into that region
        photo_number = self.getPhotoNumber()
        region = Region(self._event['region'])
        subregions = region.divideRegions(n, n)

        # Laplacian smoothed
        pro = [1.0] * n * n
        s = n * n
        photos = self._event['photos']
        for photo in photos:
            lat = photo['location']['latitude']
            lng = photo['location']['longitude']
            flag = False
            i = 0
            for subregion in subregions:
                if subregion.insideRegion([lat, lng]):
                    pro[i] += 1.0
                    s += 1
                    if flag == True:
                        raise Exception('bad data')
                    flag = True
                i += 1
        for i in xrange(0, n * n):
            pro[i] /= s
        return pro
예제 #2
0
    def _divideAndCount(self, n):
        # devide the region into n*n grids to compute the entropy
        # p(i) = # of photos in that grid, to the total number of grids
        # it returns the list of subregions associated with the number photos falling into that region
        photo_number = self.getPhotoNumber()
        region = Region(self._event["region"])
        subregions = region.divideRegions(n, n)

        # Laplacian smoothed
        pro = [1.0 / n / n] * n * n

        photos = self._event["photos"]
        for photo in photos:
            lat = photo["location"]["latitude"]
            lng = photo["location"]["longitude"]
            flag = False
            i = 0
            for subregion in subregions:
                if subregion.insideRegion([lat, lng]):
                    pro[i] += 1.0 / n / n
                    if flag == True:
                        raise Exception("bad data")
                    flag = True
                i += 1
        return pro
예제 #3
0
    def _divideAndCount(self, n):
        # devide the region into n*n grids to compute the entropy
        # p(i) = # of photos in that grid, to the total number of grids
        # it returns the list of subregions associated with the number photos falling into that region
        photo_number = self.getPhotoNumber()
        region = Region(self._event['region'])
        subregions = region.divideRegions(n, n)

        # Laplacian smoothed
        pro = [1.0 / n / n] * n * n

        photos = self._event['photos']
        for photo in photos:
            lat = photo['location']['latitude']
            lng = photo['location']['longitude']
            flag = False
            i = 0
            for subregion in subregions:
                if subregion.insideRegion([lat, lng]):
                    pro[i] += 1.0 / n / n
                    if flag == True:
                        raise Exception('bad data')
                    flag = True
                i += 1
        return pro
예제 #4
0
    def _divideAndCount(self, n):
        # devide the region into n*n grids to compute the entropy
        # p(i) = # of elements in that grid, to the total number of grids
        # it returns the list of subregions associated with the number elements falling into that region
        element_number = self.getElementNumber()
        region = Region(self._event["region"])
        subregions = region.divideRegions(n, n)

        # Laplacian smoothed
        pro = [1.0] * n * n
        s = n * n
        elements = self._event[self._element_type]
        for element in elements:
            lat = element["location"]["latitude"]
            lng = element["location"]["longitude"]
            flag = False
            i = 0
            for subregion in subregions:
                if subregion.insideRegion([lat, lng]):
                    pro[i] += 1.0
                    s += 1
                    if flag == True:
                        raise Exception("bad data")
                    flag = True
                i += 1
        for i in xrange(0, n * n):
            pro[i] /= s
        return pro
예제 #5
0
def buildAllCorpus(element_type='photos', time_interval_length=14, debug=False, paras={}):
    # return a dict = {region : its local corpus}
    assert element_type in ['photos', 'tweets']

    all_corpus = {}
    if element_type == 'photos':
        config = InstagramConfig
    else:
        config = TwitterConfig

    coordinates = [config.min_lat, config.min_lng,
                   config.max_lat, config.max_lng]

    nyc = Region(coordinates)
    region_list = nyc.divideRegions(25, 25)
    region_list = nyc.filterRegions(region_list, test=True, n=25, m=25, element_type=element_type)

    # 14 days ago
    now = int(tool.getCurrentStampUTC())

    num = 0
    for region in region_list:
        if debug and num > 0:
            # speed up the debugging
            pass
        else:
            cor = Corpus()
            cor.buildCorpus(region, [now - time_interval_length * 3600 * 24, now], element_type, paras)
        all_corpus[region.getKey()] = cor
        num += 1
        print 'build corpus %d' % (num)
    return all_corpus
예제 #6
0
def test():
    coordinates = [InstagramConfig.photo_min_lat,
            InstagramConfig.photo_min_lng,
            InstagramConfig.photo_max_lat,
            InstagramConfig.photo_max_lng
            ]
    huge_region = Region(coordinates)
    regions = huge_region.divideRegions(5,5)  #Warning: DO NOT SET THIS BELOW 5 OR MEMORY OVERFLOW
    
    for i in range(25):
        test_region = regions[i]
        test_region.display()
        ts = InstagramTimeSeries(test_region, 1355765315, 1355765315+30*24*3600)
        print ts.buildTimeSeries()
예제 #7
0
def test():
    coordinates = [
        InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng,
        InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng
    ]
    huge_region = Region(coordinates)
    regions = huge_region.divideRegions(
        5, 5)  #Warning: DO NOT SET THIS BELOW 5 OR MEMORY OVERFLOW

    for i in range(25):
        test_region = regions[i]
        test_region.display()
        ts = InstagramTimeSeries(test_region, 1355765315,
                                 1355765315 + 30 * 24 * 3600)
        print ts.buildTimeSeries()
def test():
    coordinates = [InstagramConfig.photo_min_lat,
                   InstagramConfig.photo_min_lng,
                   InstagramConfig.photo_max_lat,
                   InstagramConfig.photo_max_lng
    ]
    huge_region = Region(coordinates)
    regions = huge_region.divideRegions(5, 5)  #Warning: DO NOT SET THIS BELOW 5 OR MEMORY OVERFLOW

    for i in range(25):
        test_region = regions[i]
        test_region.display()
        test_region._region['min_lat'] = 40.7329
        test_region._region['min_lng'] = -73.9957
        test_region._region['max_lat'] = 40.7383
        test_region._region['max_lng'] = -73.9844
        ts = InstagramTimeSeries(test_region, str(1360519908), str(1365519908))
        ts = ts.buildTimeSeries()
        for t in ts:
            print t
        break
예제 #9
0
파일: plot_plaza.py 프로젝트: oeddyo/plaza
 def getRegions(self):
     plaza_squares = Region(self.coordinates)
     plaza_squares = plaza_squares.divideRegions(25,25)
     valid_squares = []
     ei = ElementInterface('citybeat_production', 'photos', 'photos')
     bad_number = 0
     all_number = 0
     for region in plaza_squares:
         all_number += 1
         mid_point = region.getMidCoordinates()
         point = Point( mid_point )
         if not point.within( self.valid_poly ):
             print 'not valid ' 
             continue
         cnt = 0
         bad_number += 1
         for p in ei.rangeQuery(region):
             cnt += 1
         valid_squares.append( (region, cnt) )
         print 'cnt = ',cnt 
     self.plaza_squares = valid_squares
     print "all number = ",all_number, " bad_number = ",bad_number
예제 #10
0
def test():
    coordinates = [InstagramConfig.photo_min_lat,
                   InstagramConfig.photo_min_lng,
                   InstagramConfig.photo_max_lat,
                   InstagramConfig.photo_max_lng
    ]
    huge_region = Region(coordinates)
    alarm_region_size = 25
    regions = huge_region.divideRegions(25, 25)
    filtered_regions = huge_region.filterRegions(region_list=regions, test=True, n=alarm_region_size,
                                                 m=alarm_region_size)

    for i in range(1):
        test_region = regions[i]
        test_region._region['min_lat'] = 40.7329
        test_region._region['min_lng'] = -73.9957
        test_region._region['max_lat'] = 40.7383
        test_region._region['max_lng'] = -73.9844
        test_region.display()
        ts = TwitterTimeSeries(test_region, '1364829908', '1365693908')
        ts = ts.buildTimeSeries()
        for d in ts:
            print d
예제 #11
0
from region import Region
import sys
import pymongo
import os
import datetime


coordinates = {"min_lat": 40.7485, "min_lng": -74.0140, "max_lat": 40.7930, "max_lng": -73.9530}
reg = Region(coordinates)
print ("Regions Created")
div = reg.divideRegions(25, 25)
print ("Regions divided")

print reg.toJSON()

"""
for i in div:
    i._region["p1lat"] = i._region["max_lat"]
    i._region["p1lng"] = i._region["min_lng"]

    i._region["p2lat"] = i._region["max_lat"]
    i._region["p2lng"] = i._region["max_lng"]

    i._region["p3lat"] = i._region["min_lat"]
    i._region["p3lng"] = i._region["max_lng"]

    i._region["p4lat"] = i._region["min_lat"]
    i._region["p4lng"] = i._region["min_lng"]
"""

예제 #12
0
파일: plaza_data.py 프로젝트: oeddyo/plaza
    def getRegions(self):
        plaza_squares = Region(self.coordinates)
        plaza_squares = plaza_squares.divideRegions(25,25)
        valid_squares = []
        ei = ElementInterface('citybeat_production', 'photos', 'photos')

        non_local_users = set([u.strip() for u in open(self.file_name_prefix+'all_users.txt','r').readlines()])
        local_users = set([u.strip() for u in open(self.file_name_prefix+'local_users.txt','r').readlines()])
        f_local = file(self.file_name_prefix+'local_distribution.csv', 'w')
        f_non_local = file(self.file_name_prefix+'non_local_distribution.csv','w')
        f_merge = file(self.file_name_prefix+'merged.csv','w')

        bad_number = 0
        all_number = 0
        all_photo_number = 0
        
        user_photos_cnt = {}


        for region in plaza_squares:
            all_number += 1
            mid_point = region.getMidCoordinates()
            point = Point( mid_point )
            if not point.within( self.valid_poly ):
                continue
            cnt = 0
            bad_number += 1

            for p in ei.rangeQuery(region):
                un = p['user']['username']
                if un in user_photos_cnt:
                    user_photos_cnt[un] += 1
                else:
                    user_photos_cnt[un] = 0

                if p['user']['username'] in local_users:
                    f_w = f_local
                    f_merge.write(str(p['location']['latitude'])+","+str(p['location']['longitude'])+','+p['images']['standard_resolution']['url']+',0'+'\n')
                elif p['user']['username'] in non_local_users:
                    if random.uniform(0,1)>0.9999:
                        continue
                    else:
                        try:
                            f_merge.write(str(p['location']['latitude'])+","+str(p['location']['longitude'])+','+p['images']['standard_resolution']['url']+',1'+'\n')
                        except:
                            continue
                    f_w = f_non_local
                try:
                    f_w.write(str(p['location']['latitude'])+","+str(p['location']['longitude'])+','+p['images']['standard_resolution']['url']+'\n')
                except:
                    continue
                cnt += 1
            if cnt>5000:
                region.display()
                continue
            valid_squares.append( (region, cnt) )
            print 'cnt = ',cnt
            all_photo_number+=cnt
        self.plaza_squares = valid_squares
        print "all number = ",all_number, " bad_number = ",bad_number
        print 'all photos = ',all_photo_number
        
        larger_than_two = 0
        for u in user_photos_cnt:
            if user_photos_cnt[u]>=2:
                larger_than_two += 1
                print 'larger than 2 = ',larger_than_two