def update_group_posts(self, crawler, gid): active_posts = list(Post.objects.filter(closed=False, group=gid)) # some rounding or conversion error has occured rarely from putting post.date fetched from to database (1-2 secs) timestamp = time.mktime(get_earliest_post_time(active_posts).timetuple()) - 10 found_posts_ids = {} try: vk_posts = [p for p in crawler.get_posts_from_group("-" + gid, timestamp)] except FailedRequestError: print "failed to fetch posts for group " + str(gid) return for p in vk_posts: found_posts_ids[str(p['id'])] = p # cleansing posts_to_update = [] for p in active_posts: if not p.pid in found_posts_ids: print "Deleting probably spam post " + p.pid p.delete() else: posts_to_update.append((p, found_posts_ids[p.pid])) try: comments_and_likes = crawler.get_comments_and_likes_for_posts([p[1] for p in posts_to_update], "-" + gid) except FailedRequestError: print "failed to fetch comments and likes for posts of group " + str(gid) return for (db_post, vk_post) in posts_to_update: comments = comments_and_likes[vk_post['id']]['comments'] self.update_post(db_post, vk_post, comments) self.update_user_activity_for_post(db_post, comments, comments_and_likes[vk_post['id']]['likes'])
def scan_auditory_activity(self, crawler, active_posts, gid): uids = {} active_posts_ids = [p.pid for p in active_posts] min_time = time.mktime(get_earliest_post_time(active_posts).timetuple()) - 10 vk_posts = [p for p in crawler.get_posts_from_group("-" + gid, min_time)] active_vk_posts = [p for p in vk_posts if str(p['id']) in active_posts_ids] comments_and_likes = crawler.get_comments_and_likes_for_posts(active_vk_posts, "-" + gid).values() comments_user_ids = [str(c['uid']) for sublist in comments_and_likes for c in sublist['comments']] likes_user_ids = [str(l) for sublist in comments_and_likes for l in sublist['likes']] total_ids = comments_user_ids + likes_user_ids for uid in total_ids: if not uid in uids: uids[uid] = 0 uids[uid] += 1 result = {'users_1': 0, 'users_3': 0} for uid in uids: if uids[uid] >= 3: result['users_3'] += 1 result['users_1'] += 1 try: self.analyze_demogeo(gid, [profile for profile in crawler.get_profiles([uid for uid in uids.keys()])], 'active', crawler) except (FailedRequestError, DatabaseError) as e: print "failed to analyze demogeo for active auditory of group " + str(gid) + ": " + str(e) return result