def main(self): mysql = MySQLHelper('mysql-insta-local') scraper = StoryViewersScraper() scrape_result = scraper.scrape_viewers() self.save_results(scrape_result, mysql) mysql.commit() mysql.close()
def main(self, user: InstaUser, scraper: Optional[MediaScraper] = None, scrape_likers: bool = False, scrape_comments: bool = False, likers_scrape_threshold: Optional[int] = None, media_max_likers_amount: Optional[int] = None, max_media_limit: Optional[int] = None): scraper = scraper or MediaScraper() if not user.from_full_profile: user = scraper.scrape_user(user.username) if user.is_private and not user.followed_by_viewer: self.logger.warning("user is private and not followed by viewer. skipping scraping...") else: mysql = MySQLHelper('mysql-insta-local') scrape_result = scraper.scrape_media(user, scrape_likers, scrape_comments, likers_scrape_threshold, media_max_likers_amount, max_media_limit) self.save_results(scrape_result, mysql) mysql.commit() mysql.close()
def main(): with open('/opt/InstaProfiler/logs/user-follows.log') as fp: txt = fp.read() matches = LOG_RECORD_REGEX.findall(txt) all_queries = [] unfollow_users_distinct = set() for match in matches: if 'UPDATE follows' in match[5]: unfollow_params = UNFOLLOW_PARAMS_REGEX.search(match[5]) ts_params = [ unfollow_params.group('year'), unfollow_params.group('month'), unfollow_params.group('day'), unfollow_params.group('hour'), unfollow_params.group('minute'), unfollow_params.group('second'), unfollow_params.group('frac') ] unfollow_ts = datetime(*[int(x) for x in ts_params]) src_user = unfollow_params.group('src_user') unfollow_users = UNFOLLOW_USERS_REGEX.search( match[5]).group('users').split(', ') if len(unfollow_users) < 8: for u in unfollow_users: unfollow_users_distinct.add(u.strip("'")) query = "UPDATE follows set dst_follows=0, dst_unfollows_latest_timestamp=? where src_user_name=? and dst_user_id in ({users})".format( users=','.join(unfollow_users)) params = (unfollow_ts, src_user) print(query) all_queries.append((query, params)) else: print("Too much users", len(unfollow_users)) print(','.join(unfollow_users_distinct)) odbc_helper = MySQLHelper('mysql-insta-local') cursor = odbc_helper.get_cursor() for query in all_queries: odbc_helper.execute(query[0], query[1], cursor) odbc_helper.commit() odbc_helper.close()
def main(self, user: Union[InstaUser, str] = DEFAULT_USER_NAME, only_mutual: bool = False, scrape_follows: bool = True, scrape_followers: bool = True, max_follow_amount: Optional[int] = None, scraper: Optional[UserFollowsScraper] = None): """ :param user: User to parse its follows. :param only_mutual: If set to True, will save only people that are both followers and follows. Useful when src has many followers. This will make sure only "relevant" people are saved :param scrape_follows: If given, will only scrape user's follow :param scrape_followers: If given, will only scrape user's followers :param max_follow_amount: If given, will only scrape follows (follows/followers apart) if amount is under max_follow_amount :return: """ scraper = scraper or UserFollowsScraper() if isinstance(user, str): user = scraper.scrape_user(user) if user is None: raise UserDoesNotExist() if scrape_followers and max_follow_amount is not None and user.followed_by_amount > max_follow_amount: self.logger.warning( "user is followed by too many people (followed by %d, max %d), skipping followers...", user.followed_by_amount, max_follow_amount) scrape_followers = False if scrape_follows and max_follow_amount is not None and user.follows_amount > max_follow_amount: self.logger.warning( "user follows too many people (follows %d, max %d), skipping follows...", user.follows_amount, max_follow_amount) scrape_follows = False mysql = MySQLHelper('mysql-insta-local') cursor = mysql.get_cursor() if user.is_private and not user.followed_by_viewer: self.logger.warning( "user is private and not followed by viewer. skipping scraping..." ) scrape_ts = datetime.now() else: follow_scrape = scraper.parse_user_follows([user], scrape_follows, scrape_followers) follows, scrape_id, scrape_ts = follow_scrape.follows, follow_scrape.scrape_id, follow_scrape.scrape_ts current_follows = self.get_current_follows(mysql, user.username, cursor) analyzed, users = UserFollowsAnalyzer.analyze_follows( follows, only_mutual) # Update unfollowers if scrape_follows: # No followers are scraped so they could all be considered as unfollowed dst_has_unfollowed = set( ) if current_follows is None else current_follows.followers.difference( follows[0].followers) if len(dst_has_unfollowed) > 0: self.update_agg_dst_unfollowers(mysql, cursor, dst_has_unfollowed, user.username, scrape_ts) src_has_unfollowed = set( ) if current_follows is None else current_follows.follows.difference( follows[0].follows) if len(src_has_unfollowed): self.handle_unfollowers(mysql, cursor, src_has_unfollowed, user, scrape_ts) # Insert new records if sum(len(x.follows) for x in follows) > 0: self.update_agg_followers(mysql, cursor, follows, analyzed, users, src_has_unfollowed, scrape_ts) new_follows = follows[ 0].follows if current_follows is None else follows[ 0].follows.difference(current_follows.follows) if len(new_follows) > 0: self.insert_raw_followers(mysql, cursor, new_follows, user, scrape_ts) # Update user info self.persist_user(mysql, cursor, user, scrape_ts) mysql.commit() cursor.close() mysql.close() self.logger.info("Done UserFollowsScraper main")