def save_results(self, scrape_result: MediaLikersScraping, mysql_helper: MySQLHelper):
     """Persist media records, taggees and media interactions. Returns tuple with counts of inserted records"""
     media_records = []
     media_interactions_records = []
     taggees_records = []
     cursor = mysql_helper.get_cursor()
     for media in scrape_result.media:
         media_records.append(
             MediaRecord(scrape_result.scrape_id, scrape_result.scrape_ts, media.id, media.media_type,
                         media.taken_at_timestamp, media.owner.username, media.owner.user_id,
                         media.display_url, media.comments_amount, media.likes_amount, len(media.taggees)))
         taggees_records.extend([TaggeeRecord(scrape_result.scrape_id, scrape_result.scrape_ts, media.id,
                                              media.media_type, media.owner.user_id, media.owner.username,
                                              media.taken_at_timestamp, taggee.user_id, taggee.username) for taggee
                                 in media.taggees])
         for liker in media.likers:
             media_interactions_records.append(MediaLikeRecord(
                 scrape_result.scrape_id, scrape_result.scrape_ts, media.id, media.owner.user_id,
                 media.owner.username, liker.user_id, liker.username
             ))
         for commenter in media.comments:
             media_interactions_records.append(MediaCommentRecord(
                 scrape_result.scrape_id, scrape_result.scrape_ts, media.id, media.owner.user_id,
                 media.owner.username, commenter.user_id, commenter.username
             ))
     media_cnt = taggees_cnt = operations_cnt = 0
     if len(media_records) > 0:
         media_cnt = mysql_helper.insert_on_duplicate_update(self.MEDIA_TABLE, media_records, cursor)
     if len(taggees_records) > 0:
         taggees_cnt = mysql_helper.insert_on_duplicate_update(self.TAGGEES_TABLE, taggees_records, cursor)
     if len(media_interactions_records) > 0:
         operations_cnt = mysql_helper.insert_on_duplicate_update(self.MEDIA_INTERACTIONS_TABLE,
                                                                  media_interactions_records, cursor)
     return media_cnt, taggees_cnt, operations_cnt
    def save_results(self, scrape_result: StoryScraping,
                     mysql_helper: MySQLHelper):
        story_records = []
        viewer_records = []
        cursor = mysql_helper.get_cursor()
        for story in scrape_result.stories:
            story_record = StoryRecord(
                story.story_id, story.story_owner_id,
                story.story_owner_user_name, story.display_url,
                datetime.fromtimestamp(story.taken_at_timestamp),
                datetime.fromtimestamp(story.expiring_at_timestamp))
            story_records.append(story_record)
            for viewer in story.viewers:
                v_record = ViewerRecord(scrape_result.scrape_id,
                                        scrape_result.scrape_ts,
                                        story.story_id, story.story_owner_id,
                                        story.story_owner_user_name,
                                        story.story_view_count, viewer.user_id,
                                        viewer.username, viewer.rank)
                viewer_records.append(v_record)

        story_cnt = mysql_helper.insert_ignore(self.STORIES_TABLE,
                                               story_records, cursor)
        viewer_cnt = mysql_helper.insert_on_duplicate_update(
            self.VIEWERS_TABLE, viewer_records, cursor)
        return story_cnt, viewer_cnt
Exemple #3
0
    def update_agg_followers(self, mysql: MySQLHelper, cursor: Cursor,
                             follows: List[UserFollows],
                             analyzed: (List[dict], Dict[str, InstaUser]),
                             users: Dict[int, InstaUser],
                             unfollowers: Set[InstaUser], scrape_ts: datetime):
        self.logger.info("Inserting following records into agg table...")
        records = []
        src_user = follows[0].user
        for f in analyzed:
            dst_user = users[f['dst_id']]
            unfollow_ts = scrape_ts if dst_user in unfollowers else None
            records.append(
                FollowRecord(src_user.user_id, src_user.username,
                             dst_user.user_id, dst_user.username,
                             f['src_follows'], scrape_ts, scrape_ts,
                             f['dst_follows'], scrape_ts, scrape_ts,
                             unfollow_ts))

        mysql.insert_on_duplicate_update(self.FOLLOWS_TABLE, records, cursor)
        self.logger.info("done insert follows to agg table")
Exemple #4
0
 def persist_user(self, mysql: MySQLHelper, cursor: Cursor, user: InstaUser,
                  scrape_ts: datetime):
     self.logger.debug("persisting user to mysql...")
     user_record = InstaUserRecord.from_insta_user(scrape_ts, user)
     mysql.insert_on_duplicate_update(self.USER_INFO_TABLE, [user_record],
                                      cursor)