def buildAllCorpus(element_type='photos', time_interval_length=14, debug=False, paras={}): # return a dict = {region : its local corpus} assert element_type in ['photos', 'tweets'] all_corpus = {} if element_type == 'photos': config = InstagramConfig else: config = TwitterConfig coordinates = [config.min_lat, config.min_lng, config.max_lat, config.max_lng] nyc = Region(coordinates) region_list = nyc.divideRegions(25, 25) region_list = nyc.filterRegions(region_list, test=True, n=25, m=25, element_type=element_type) # 14 days ago now = int(tool.getCurrentStampUTC()) num = 0 for region in region_list: if debug and num > 0: # speed up the debugging pass else: cor = Corpus() cor.buildCorpus(region, [now - time_interval_length * 3600 * 24, now], element_type, paras) all_corpus[region.getKey()] = cor num += 1 print 'build corpus %d' % (num) return all_corpus
def test(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(25, 25) filtered_regions = huge_region.filterRegions(region_list=regions, test=True, n=alarm_region_size, m=alarm_region_size) for i in range(1): test_region = regions[i] test_region._region['min_lat'] = 40.7329 test_region._region['min_lng'] = -73.9957 test_region._region['max_lat'] = 40.7383 test_region._region['max_lng'] = -73.9844 test_region.display() ts = TwitterTimeSeries(test_region, '1364829908', '1365693908') ts = ts.buildTimeSeries() for d in ts: print d