def run(): coordinates = [ InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng, ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size, alarm_region_size) # filtered_regions = huge_region.filterRegions( regions, test=True) # regions = filtered_regions test_cnt = 0 print "all regions", len(regions) pi = PhotoInterface("tmp_citybeat", "photos") for region in regions: # delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1364571565 - 7 * 24 * 3600 # + 7*24*3600 end_of_time = 1364571565 # + 7*24*3600 res = pi.rangeQuery(region, [str(start_of_time), str(end_of_time)]) for r in res: try: print r["location"]["latitude"], ",", r["location"]["longitude"] except: continue
def run(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] huge_region = Region(coordinates) alarm_region_size = 25 regions = huge_region.divideRegions(alarm_region_size,alarm_region_size) #filtered_regions = huge_region.filterRegions( regions, test=True) #regions = filtered_regions test_cnt = 0 print 'all regions',len(regions) pi = PhotoInterface('tmp_citybeat', 'photos'); for region in regions: #delete the last 7*24*3600 to set it back to Dec 1st start_of_time = 1364571565 - 7*24*3600 #+ 7*24*3600 end_of_time = 1364571565 #+ 7*24*3600 res = pi.rangeQuery(region, [str(start_of_time), str(end_of_time)]); for r in res: try: print r['location']['latitude'],',',r['location']['longitude'] except: continue
def save_mogo(res, mid_lat, mid_lng): photo_interface = PhotoInterface() for r in res: logging.warning("type = "+str(type(r))) r['mid_lat'] = mid_lat r['mid_lng'] = mid_lng r['_id'] = r['id'] #filter dup using instagram internal id logging.warning('inserting photo to mongodb') photo_interface.saveDocument(r) logging.warning("r = "+str(r))
def _getFiftenMiniutesPhotos(self): pi = PhotoInterface("citybeat", "photos") _fifteen_minutes_ago = 15 * 60 cursor = pi.rangeQuery(self.region, (str(self.cur_time - _fifteen_minutes_ago), str(self.cur_time))) _photos = [] for p in cursor: _photos.append(p) _photos = sorted(_photos, key=lambda k: k["created_time"]) before = len(_photos) _photos = processAsPeopleCount(_photos) after = len(_photos) self.current_value = after self.photos = _photos
def _getFiftenMiniutesPhotos(self): pi = PhotoInterface('tmp_citybeat', 'photos') _fifteen_minutes_ago = 15*60 cursor = pi.rangeQuery( self.region , (str( self.cur_time - _fifteen_minutes_ago), str(self.cur_time)) ) _photos = [] for p in cursor: _photos.append( p ) print 'in fiften minutes there are ',len(_photos) _photos = sorted( _photos, key=lambda k:k['created_time'] ) before = len(_photos) _photos = processAsPeopleCount(_photos) after = len(_photos) self.current_value = after self.photos = _photos
def _getFiftenMiniutesPhotos(self): pi = PhotoInterface('citybeat', 'photos_no_duplicate') _fifteen_minutes_ago = 15 * 60 cursor = pi.rangeQuery( self.region, (str(self.cur_time - _fifteen_minutes_ago), str(self.cur_time))) _photos = [] for p in cursor: _photos.append(p) _photos = sorted(_photos, key=lambda k: k['created_time']) before = len(_photos) _photos = processAsPeopleCount(_photos) after = len(_photos) self.current_value = after self.photos = _photos
def run(): coordinates = [InstagramConfig.photo_min_lat, InstagramConfig.photo_min_lng, InstagramConfig.photo_max_lat, InstagramConfig.photo_max_lng ] pi = PhotoInterface()
from utility.config import InstagramConfig from utility.photo_interface import PhotoInterface start = 1365644367 - 14*3600*24 end = 1365644367 region = {'min_lat': 40.75419436, 'max_lng': -73.978088200000002, 'min_lng': -73.986094480000006, 'max_lat': 40.759499640000001} pi = PhotoInterface() pi.setDB('citybeat_production') pi.setCollection('photos') cursor = pi.rangeQuery(region, (str(start), str(end))) cnt = 0 print 'here' for p in cursor: cnt += 1 print cnt
from utility.config import InstagramConfig from utility.photo_interface import PhotoInterface start = 1365644367 - 14 * 3600 * 24 end = 1365644367 region = { 'min_lat': 40.75419436, 'max_lng': -73.978088200000002, 'min_lng': -73.986094480000006, 'max_lat': 40.759499640000001 } pi = PhotoInterface() pi.setDB('citybeat_production') pi.setCollection('photos') cursor = pi.rangeQuery(region, (str(start), str(end))) cnt = 0 print 'here' for p in cursor: cnt += 1 print cnt
def __init__(self): # emty dictionary self._tweet_interface = TweetInterface() self._photo_interface = PhotoInterface()
class Stats(object): def __init__(self): # emty dictionary self._tweet_interface = TweetInterface() self._photo_interface = PhotoInterface() def getTweetAndPhotoStats(self): stats = {} tweet_basic_count = {} photo_basic_count = {} photo_basic_count['last_minute'] = self._getCurrentCountStats('photos') photo_basic_count['last_24_hour'] = self._get24HoursCountStats('photos') tweet_basic_count['last_minute'] = self._getCurrentCountStats('tweets') tweet_basic_count['last_24_hour'] = self._get24HoursCountStats('tweets') res = self._extractMostPopularTweet() stats['photo_basic_count'] = photo_basic_count stats['tweet_basic_count'] = tweet_basic_count stats['created_time'] = str(getCurrentStampUTC()) stats['tweet_top_mentions'] = self._extractTweetTopMentions() stats['most_popular_tweet'] = res[0] stats['tweet_vs_retweet'] = res[1] return stats def _getCurrentCountStats(self, type): assert type in ['photos', 'tweets'] stats = {} if type == 'photos': res = self._extractPhotoCount() else: res = self._extractTweetCount() stats['count'] = res[0] stats['delta'] = res[1] return stats def _get24HoursCountStats(self, type): assert type in ['photos', 'tweets'] stats = {} stats['current_count'] = self._extract24HoursCountsStats(type=type) stats['last_week_count'] = self._extract24HoursCountsStats(past_week=True, type=type) return stats def _extractTweetCount(self): now = int(getCurrentStampUTC()) # 5 seconds as the latency current_count = self._tweet_interface.rangeQuery(period=[now - 65, now - 5]).count() baseline_count = self._tweet_interface.rangeQuery(period=[now - 65 - 60 * 20, now - 65]).count() / 20.0 if baseline_count == 0.0: return [current_count, stats_config.NO_BASE_LINE] else: return [current_count, (current_count - baseline_count) / baseline_count] def _extractPhotoCount(self): now = int(getCurrentStampUTC()) offset = 4 * 60 current_count = self._photo_interface.rangeQuery(period=[now - offset - 60, now - offset]).count() baseline_count = self._photo_interface.rangeQuery(period=[now - 60 * 21 - offset, now - offset - 60]).count() / 20.0 if baseline_count == 0.0: return [current_count, stats_config.NO_BASE_LINE] else: return [current_count, (current_count - baseline_count) / baseline_count] def _extract24HoursCountsStats(self, past_week=False, type='tweets'): now = int(getCurrentStampUTC()) offset = 0 if past_week: offset = 7 * 24 count_during_past_24_hours = [] for hour in xrange(24): end_time = now - 3600 * (hour + offset) begin_time = end_time - 3600 if type == 'tweets': count_during_past_24_hours.append(self._tweet_interface.rangeQuery(period=[begin_time, end_time]).count()) else: count_during_past_24_hours.append(self._photo_interface.rangeQuery(period=[begin_time, end_time]).count()) return count_during_past_24_hours def _extractTweetTopMentions(self, k=10): # 60 minutes now = int(getCurrentStampUTC()) time_span = 60 * 60 end_time = now begin_time = end_time - time_span cur = self._tweet_interface.rangeQuery(period=[begin_time, end_time], fields=['text']) users = {} twitter_username_re = re.compile(r'(?<=^|(?<=[^a-zA-Z0-9-_\.]))@([A-Za-z]+[A-Za-z0-9_-]+)') for tweet in cur: text = tweet['text'] mentions = twitter_username_re.findall(text) for mention in mentions: count = users.get(mention, 0) + 1 users[mention] = count users = sorted(users.iteritems(), key=operator.itemgetter(1), reverse=True) res = [] for key, value in users: res_pair = {} res_pair['user_name'] = key res_pair['count'] = value res.append(res_pair) if len(res) >= 10: break return res def _extractMostPopularTweet(self): ti = TweetInterface(collection=TwitterConfig.extended_tweet_collection) tweets = {} most_popular_tweet_text = '' max_retweet_count = -1 user_name = '' # 60 minutes now = int(getCurrentStampUTC()) time_span = 60 * 60 end_time = now begin_time = end_time - time_span for tweet in ti.rangeQuery(period=[begin_time, end_time], fields=['text', 'user.screen_name']): text = tweet['text'] count = tweets.get(text, 0) + 1 tweets[text] = count if count > max_retweet_count: max_retweet_count = count most_popular_tweet_text = text user_name = tweet['user']['screen_name'] single_tweet_count = 0 retweet_count = 0 for key, value in tweets.items(): if value == 1: single_tweet_count += 1 else: retweet_count += value most_popular_tweet = {} most_popular_tweet['user_name'] = user_name most_popular_tweet['text'] = most_popular_tweet_text most_popular_tweet['count'] = max_retweet_count tweets_count = {} tweets_count['tweet_percentage'] = 1.0 * single_tweet_count / (single_tweet_count + retweet_count) tweets_count['retweet_percentage'] = 1.0 * retweet_count / (single_tweet_count + retweet_count) return [most_popular_tweet, tweets_count]