def like_loop(self, what: str, base_link: str, amount: int, users_validated: False) -> Interactions: try_again = 0 sc_rolled = 0 scroll_nap = 1.5 already_interacted_links = [] interactions = Interactions() likes = 0 try: while likes in range(0, amount): if self.aborting or (self.until_time and datetime.now() > self.until_time): break if self.jumps.check_likes(): self.logger.warning( "Like quotient reached its peak, leaving Like By {} activity" .format(what)) self.quotient_breach = True # reset jump counter after a breach report self.jumps.likes = 0 break if sc_rolled > 100: try_again += 1 if try_again > 2: self.logger.info("'{}' possibly has less images than " "desired ({}), found: {}".format( what, amount, len(already_interacted_links))) break delay_random = random.randint(400, 600) self.logger.info( "Scrolled too much. Sleeping {} minutes and {} seconds". format(int(delay_random / 60), delay_random % 60)) sleep(delay_random) sc_rolled = 0 main_elem = self.browser.find_element_by_tag_name("main") posts = nf_get_all_posts_on_element(main_elem) # Interact with links for post in posts: link = post.get_attribute("href") if link not in already_interacted_links: sleep(1) nf_scroll_into_view(self, post) sleep(1) nf_click_center_of_element(self, post, link) msg, post_interactions = interact_with_post( self, link, users_validated) interactions += post_interactions if post_interactions.liked_img > 0: likes += 1 self.logger.info("[{}] - Like [{}/{}]".format( what, likes, amount)) if msg == "block on likes": raise SoftBlockedException(msg) else: sleep(1) nf_find_and_press_back(self, base_link) already_interacted_links.append(link) break else: # For loop ended means all posts in screen has been interacted with # will scroll the screen a bit and reload for i in range(3): self.browser.execute_script(JS.SCROLL_SCREEN) self.quota_supervisor.add_server_call() sc_rolled += 1 sleep(scroll_nap) except SoftBlockedException: sleep_while_blocked(self) except Exception as err: self.logger.error("Unexpected Exception: {}".format(err)) finally: return interactions
def follow_user_follow( self, follow: str, usernames: List[str], amount: int = 10, randomize: bool = False ): if self.aborting: return self valid = {"followers", "followings"} if follow not in valid: raise ValueError( "nf_follow_user_follow: follow must be one of %r." % valid) self.logger.info("Starting to follow user {}".format(follow)) for index, username in enumerate(usernames): state = { 'liked_img': 0, 'already_liked': 0, 'inap_img': 0, 'commented': 0, 'followed': 0, 'not_valid_users': 0, } self.logger.info("User [{}/{}]".format(index + 1, len(usernames))) self.logger.info("--> {}".format(username.encode("utf-8"))) nf_go_to_user_page(self, username) sleep(1) # TODO: get follow count follow_count = 10 actual_amount = amount if follow_count < amount: actual_amount = follow_count self.logger.info("About to go to {} page".format(follow)) nf_go_to_follow_page(self, follow, username) sleep(2) sc_rolled = 0 scroll_nap = 1.5 already_interacted_links = [] random_chance = 50 try: while state['followed'] in range(0, actual_amount): if self.quotient_breach: self.logger.warning( "--> Follow quotient reached its peak!" "\t~leaving Follow-User-Follow_ activity\n" ) break if sc_rolled > 100: self.logger.info("Scrolled too much! ~ sleeping 10 minutes") sleep(600) sc_rolled = 0 users = nf_get_all_users_on_element(self) # Interact with links instead of just storing them for user in users: link = user.get_attribute("href") if link not in already_interacted_links: msg = "" try: self.logger.info("about to scroll to user") sleep(1) nf_scroll_into_view(self, user) self.logger.info("about to click to user") sleep(1) nf_click_center_of_element(self, user) sleep(2) valid = False if ( user.text not in self.dont_include and not follow_restriction( "read", user.text, self.follow_times, self.logger and random.randint(0, 100) <= random_chance ) ): valid, details = nf_validate_user_call(self, user.text) self.logger.info("Valid User: {}, details: {}".format(valid, details)) if valid: self.logger.info("about to follow user") follow_state, msg = follow_user( self.browser, "profile", self.username, user.text, None, self.blacklist, self.logger, self.logfolder, ) if follow_state is True: state['followed'] += 1 self.logger.info("user followed") else: self.logger.info("--> Not following") sleep(1) if random.randint(0, 100) <= self.user_interact_percentage: self.logger.info( "--> User gonna be interacted: '{}'".format( user.text ) ) # disable re-validating user in like_by_users like_by_users( self, [user.text], None, True, ) else: state["not_valid_users"] += 1 finally: sleep(5) user_link = "https://www.instagram.com/{}".format(username) follow_link = "https://www.instagram.com/{}/{}".format(username, follow) nf_find_and_press_back(self, follow_link) sleep(3) if check_if_in_correct_page(self, user_link): nf_go_to_follow_page(self, follow, username) already_interacted_links.append(link) if msg == "block on follow": pass # TODO deal with block on follow break else: # For loop ended means all users in screen has been interacted with scrolled_to_bottom = self.browser.execute_script( "return window.scrollMaxY == window.scrollY" ) if scrolled_to_bottom and randomize and random_chance < 100: random_chance += 25 self.browser.execute_script( "window.scrollTo(0, 0);" ) update_activity(self.browser, state=None) sc_rolled += 1 sleep(scroll_nap) elif scrolled_to_bottom: # already followed all possibles users break # will scroll the screen a bit and reload for i in range(3): self.browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);" ) update_activity(self.browser, state=None) sc_rolled += 1 sleep(scroll_nap) except Exception: raise sleep(4) self.logger.info("User [{}/{}]".format(index + 1, len(usernames))) self.logger.info("Liked: {}".format(state['liked_img'])) self.logger.info("Already Liked: {}".format(state['already_liked'])) self.logger.info("Commented: {}".format(state['commented'])) self.logger.info("Followed: {}".format(state['followed'])) self.logger.info("Inappropriate: {}".format(state['inap_img'])) self.logger.info("Not valid users: {}\n".format(state['not_valid_users'])) self.liked_img += state['liked_img'] self.already_liked += state['already_liked'] self.commented += state['commented'] self.followed += state['followed'] self.inap_img += state['inap_img'] self.not_valid_users += state['not_valid_users'] return self
def db_store_comments(self, posts: List[Post], post_link: str): """Stores all comments of open post then goes back to post page""" try: comments_button = self.browser.find_elements_by_xpath( '//article//div[2]/div[1]//a[contains(@href,"comments")]') if comments_button: nf_scroll_into_view(self, comments_button[0]) nf_click_center_of_element(self, comments_button[0]) sleep(2) comments_link = post_link + 'comments/' if not check_if_in_correct_page(self, comments_link): self.logger.error( "Failed to go to comments page, navigating there") # TODO: retry to get there naturally web_address_navigator(self.browser, comments_link) more_comments = self.browser.find_elements_by_xpath( '//span[@aria-label="Load more comments"]') counter = 1 while more_comments and counter <= 10: self.logger.info("Loading comments ({}/10)...".format(counter)) nf_scroll_into_view(self, more_comments[0]) self.browser.execute_script("arguments[0].click();", more_comments[0]) more_comments = self.browser.find_elements_by_xpath( '//span[@aria-label="Load more comments"]') counter += 1 comments = self.browser.find_elements_by_xpath( '/html/body/div[1]/section/main/div/ul/ul[@class="Mr508"]') for comment in comments: inner_container = comment.find_element_by_xpath( './/div[@class="C4VMK"]') username = inner_container.find_element_by_xpath( './/h3/div/a').text text, _ = deform_emojis( inner_container.find_element_by_xpath('.//span').text) post_date = inner_container.find_element_by_xpath( './/time').get_attribute('datetime') post_date = datetime.fromisoformat(post_date[:-1]) user = db_get_or_create_user(self, username) self.db.session.add(user) self.db.session.commit() for post in posts: comment = Comment( date_posted=post_date, text=text, user=user, post=post, ) self.db.session.add(comment) self.db.session.commit() else: self.logger.error("No comments found") except SQLAlchemyError: self.db.session.rollback() raise finally: self.db.session.commit() nf_find_and_press_back(self, post_link)
def like_by_users( self, usernames: List[str], amount: int = None, users_validated: bool = False ): """Likes some amounts of images for each usernames""" if self.aborting: return self amount = amount or self.user_interact_amount usernames = usernames or [] self.quotient_breach = False for index, username in enumerate(usernames): if self.quotient_breach: break state = { 'liked_img': 0, 'already_liked': 0, 'inap_img': 0, 'commented': 0, 'followed': 0, 'not_valid_users': 0, } self.logger.info( "Username [{}/{}]".format(index + 1, len(usernames)) ) self.logger.info("--> {}".format(username.encode("utf-8"))) if len(usernames) == 1 and users_validated: nf_go_from_post_to_profile(self, username) else: nf_go_to_user_page(self, username) if not users_validated: validation, details = nf_validate_user_call(self, username) if not validation: self.logger.info( "--> Not a valid user: {}".format(details) ) state["not_valid_users"] += 1 continue try_again = 0 sc_rolled = 0 scroll_nap = 1.5 already_interacted_links = [] try: while state['liked_img'] in range(0, amount): if self.jumps["consequent"]["likes"] >= self.jumps["limit"]["likes"]: self.logger.warning( "--> Like quotient reached its peak!\t~leaving " "Like-By-Users activity\n" ) self.quotient_breach = True # reset jump counter after a breach report self.jumps["consequent"]["likes"] = 0 break if sc_rolled > 100: try_again += 1 if try_again > 2: # you can try again as much as you want by changing this number self.logger.info( "'{}' user POSSIBLY has less valid images than " "desired:{} found:{}...".format( username, amount, len(already_interacted_links)) ) break self.logger.info( "Scrolled too much! ~ sleeping 10 minutes") sleep(600) sc_rolled = 0 main_elem = self.browser.find_element_by_tag_name("main") # feed = main_elem.find_elements_by_xpath('//div[@class=" _2z6nI"]') posts = nf_get_all_posts_on_element(main_elem) # Interact with links instead of just storing them for post in posts: link = post.get_attribute("href") if link not in already_interacted_links: self.logger.info("about to scroll to post") sleep(1) nf_scroll_into_view(self, post) self.logger.info("about to click to post") sleep(1) nf_click_center_of_element(self, post) success, msg, state = nf_interact_with_post( self, link, amount, state, users_validated, ) self.logger.info( "Returned from liking, should still be in post page") sleep(5) nf_find_and_press_back( self, "https://www.instagram.com/{}/".format(username) ) already_interacted_links.append(link) if success: break if msg == "block on likes": # TODO deal with block on likes break else: # For loop ended means all posts in screen has been interacted with # will scroll the screen a bit and reload for i in range(3): self.browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);" ) update_activity(self.browser, state=None) sc_rolled += 1 sleep(scroll_nap) except Exception: raise sleep(4) self.logger.info("Username [{}/{}]".format(index + 1, len(usernames))) self.logger.info("--> {} ended".format(username.encode("utf-8"))) self.logger.info("Liked: {}".format(state['liked_img'])) self.logger.info("Already Liked: {}".format(state['already_liked'])) self.logger.info("Commented: {}".format(state['commented'])) self.logger.info("Followed: {}".format(state['followed'])) self.logger.info("Inappropriate: {}".format(state['inap_img'])) self.logger.info("Not valid users: {}\n".format(state['not_valid_users'])) self.liked_img += state['liked_img'] self.already_liked += state['already_liked'] self.commented += state['commented'] self.followed += state['followed'] self.inap_img += state['inap_img'] self.not_valid_users += state['not_valid_users'] return self
def like_by_tags( self, tags: List[str] = None, amount: int = 50, skip_top_posts: bool = True, use_smart_hashtags: bool = False, use_smart_location_hashtags: bool = False, ): """Likes (default) 50 images per given tag""" if self.aborting: return self # if smart hashtag is enabled if use_smart_hashtags is True and self.smart_hashtags != []: self.logger.info("Using smart hashtags") tags = self.smart_hashtags elif use_smart_location_hashtags is True and self.smart_location_hashtags != []: self.logger.info("Using smart location hashtags") tags = self.smart_location_hashtags # deletes white spaces in tags tags = [tag.strip() for tag in tags] tags = tags or [] self.quotient_breach = False for index, tag in enumerate(tags): if self.quotient_breach: break state = { 'liked_img': 0, 'already_liked': 0, 'inap_img': 0, 'commented': 0, 'followed': 0, 'not_valid_users': 0, } self.logger.info("Tag [{}/{}]".format(index + 1, len(tags))) self.logger.info("--> {}".format(tag.encode("utf-8"))) tag = tag[1:] if tag[:1] == "#" else tag nf_go_to_tag_page(self, tag) # get amount of post with this hashtag try: possible_posts = self.browser.execute_script( "return window._sharedData.entry_data." "TagPage[0].graphql.hashtag.edge_hashtag_to_media.count" ) except WebDriverException: try: possible_posts = self.browser.find_element_by_xpath( read_xpath("get_links_for_tag", "possible_post") ).text if possible_posts: possible_posts = format_number(possible_posts) else: self.logger.info( "Failed to get the amount of possible posts in '{}' tag " "~empty string".format(tag) ) possible_posts = None except NoSuchElementException: self.logger.info( "Failed to get the amount of possible posts in {} tag".format(tag) ) possible_posts = None self.logger.info( "desired amount: {} | top posts [{}] | possible posts: " "{}".format( amount, "enabled" if not skip_top_posts else "disabled", possible_posts, ) ) if possible_posts is not None: amount = possible_posts if amount > possible_posts else amount # sometimes pages do not have the correct amount of posts as it is # written there, it may be cos of some posts is deleted but still keeps # counted for the tag sleep(1) try_again = 0 sc_rolled = 0 scroll_nap = 1.5 already_interacted_links = [] try: while state['liked_img'] in range(0, amount): if sc_rolled > 100: try_again += 1 if try_again > 2: self.logger.info( "'{}' tag POSSIBLY has less images than " "desired:{} found:{}...".format( tag, amount, len(already_interacted_links) ) ) break self.logger.info("Scrolled too much! ~ sleeping 10 minutes") sleep(600) sc_rolled = 0 main_elem = self.browser.find_element_by_tag_name("main") posts = nf_get_all_posts_on_element(main_elem) # Interact with links instead of just storing them for post in posts: link = post.get_attribute("href") if link not in already_interacted_links: self.logger.info("about to scroll to post") sleep(1) nf_scroll_into_view(self, post) self.logger.info("about to click to post") sleep(1) nf_click_center_of_element(self, post) success, msg, state = nf_interact_with_post( self, link, amount, state, ) self.logger.info("Returned from liking, should still be in post page") sleep(2) nf_find_and_press_back(self, "https://www.instagram.com/explore/tags/{}/".format(tag)) already_interacted_links.append(link) if success: break if msg == "block on likes": # TODO deal with block on likes break else: # For loop ended means all posts in screen has been interacted with # will scroll the screen a bit and reload for i in range(3): self.browser.execute_script( "window.scrollTo(0, document.body.scrollHeight);" ) update_activity(self.browser, state=None) sc_rolled += 1 sleep(scroll_nap) except Exception: raise sleep(2) self.logger.info("Tag [{}/{}]".format(index + 1, len(tags))) self.logger.info("--> {} ended".format(tag.encode("utf-8"))) self.logger.info("Liked: {}".format(state['liked_img'])) self.logger.info("Already Liked: {}".format(state['already_liked'])) self.logger.info("Commented: {}".format(state['commented'])) self.logger.info("Followed: {}".format(state['followed'])) self.logger.info("Inappropriate: {}".format(state['inap_img'])) self.logger.info("Not valid users: {}\n".format(state['not_valid_users'])) self.liked_img += state['liked_img'] self.already_liked += state['already_liked'] self.commented += state['commented'] self.followed += state['followed'] self.inap_img += state['inap_img'] self.not_valid_users += state['not_valid_users'] return self
def follow_user_follow(self, relation: str, usernames: List[str], amount: int = 10, randomize: bool = False, random_chance: int = 50): """ Follows 'amount' users of 'relation' ("following" or "followers") list of each user in usernames :param relation: what list to use, "following" or "followers" :param usernames: list of usernames to follow relations of :param amount: amount of users to follow for each user in 'usernames' :param randomize: if the bot will include a random factor to choose who to follow or follow the first 'amount' of usernames on the list :param random_chance: chance a user will be followed if using 'randomize' """ if self.aborting: return self valid = {"followers", "following"} if relation not in valid: self.logger.info( '{} is not a valid relation, using "followers"'.format( relation)) relation = "followers" self.logger.info("Starting to follow users {}".format(relation)) # for each username for index, username in enumerate(usernames): # if aborting or quota was breached or its past time according to settings break the loop if self.aborting or self.quotient_breach or ( self.until_time and datetime.now() > self.until_time): break interactions = Interactions() self.logger.info("Follow User {} [{}/{}]: {} - started".format( relation, index + 1, len(usernames), username)) user_link = "https://www.instagram.com/{}".format(username) follow_link = "https://www.instagram.com/{}/{}".format( username, relation) # navigate to user page if not check_if_in_correct_page(self, user_link): nf_go_to_user_page(self, username) sleep(1) # get followers & following counts and change amount if less than desired followers_count, following_count = get_relationship_counts( self, username) follow_count = following_count if relation == "following" else followers_count follow_count = follow_count if follow_count else 0 actual_amount = amount if follow_count < amount: actual_amount = follow_count # go to relation page nf_go_to_follow_page(self, relation, username) sleep(2) # follow users sc_rolled = 0 scroll_nap = 1.5 already_interacted_links = [] while interactions.followed in range(actual_amount): # if aborting or quota was breached or its past time according to settings break the loop if self.aborting or (self.until_time and datetime.now() > self.until_time): break # if quotient was breached break the loop if self.jumps.check_follows(): self.logger.warning( "Follow quotient reached its peak, leaving Follow User {} activity" .format(relation)) # reset jump counter before breaking the loop self.jumps.follows = 0 self.quotient_breach = True break # if scrolled too much sleep for 5-10 minutes if sc_rolled > 100: delay_random = random.randint(300, 600) self.logger.info( "Scrolled too much, sleeping {} minutes and {} seconds" .format(int(delay_random / 60), delay_random % 60)) sleep(delay_random) sc_rolled = 0 # get loaded usernames users = nf_get_all_users_on_element(self) # if no users were grabbed try to go back and load the relation page again while len(users) == 0: nf_find_and_press_back(self, user_link) in_user_page = check_if_in_correct_page(self, user_link) if not in_user_page: nf_go_to_user_page(self, username) nf_go_to_follow_page(self, relation, username) # get loaded usernames users = nf_get_all_users_on_element(self) # If after rechecking we are in the correct page there still no are users # the bot is most surely soft blocked from seeing relations, that block doesnt last long usually. # sleep for 5-10 minutes if len(users) == 0: delay_random = random.randint(300, 600) self.logger.info( "Soft block on see followers, " "sleeping {} minutes and {} seconds".format( int(delay_random / 60), delay_random % 60)) sleep(delay_random) self.logger.debug("Grabbed {} usernames".format(len(users))) # first one in the list is un-clickable by bad design on browser instagram, its behind the top bar for user in users[1:]: link = user.get_attribute("href") # try to follow first not already interacted user if link not in already_interacted_links: msg = "" try: user_text = user.text user_link2 = "https://www.instagram.com/{}".format( user_text) self.logger.info("Followed [{}/{}]".format( interactions.followed, actual_amount)) # Go to user page self.logger.info( "Trying user {}".format(user_text)) nf_scroll_into_view(self, user) sleep(1) nf_click_center_of_element(self, user, user_link2) sleep(2) # validate user valid = False if (user_text not in self.settings.dont_include and not is_follow_restricted(self, user_text) and random.randint(0, 100) <= random_chance): valid, details = nf_validate_user_call( self, user_text, self.quota_supervisor.FOLLOW) self.logger.info( "Valid User: {}, details: {}".format( valid, details)) # follow user if valid: follow_state, msg = follow_user( self, "profile", user_text) if follow_state is True: interactions.followed += 1 elif msg == "already followed": interactions.already_followed += 1 elif msg == "jumped": # will break the loop after certain consecutive jumps self.jumps.follows += 1 # interact with user if (self.settings.do_like and random.randint(0, 100) <= self.settings.user_interact_percentage ): self.logger.info( "Interacting with user '{}'".format( user_text)) if not check_if_in_correct_page( self, user_link2): nf_go_from_post_to_profile( self, user_text) interactions += like_loop( self, "Interact with user '{}'".format( user_text), user_link2, self.settings.user_interact_amount, True) else: interactions.not_valid_users += 1 except Exception as e: self.logger.error(e) finally: # go back to relation page and start the loop again sleep(1) nf_find_and_press_back(self, follow_link) in_follow_page = check_if_in_correct_page( self, follow_link) if not in_follow_page: in_user_page = check_if_in_correct_page( self, user_link) if not in_user_page: nf_go_to_user_page(self, username) nf_go_to_follow_page(self, relation, username) already_interacted_links.append(link) if msg == "block on follow": # raise SoftBlockedException(msg) pass # TODO: deal with block on follow break else: # For loop ended means all users in screen has been interacted with scrolled_to_bottom = self.browser.execute_script( JS.SCROLLED_TO_BOTTOM) # even if we are at the bottom if we were using randomize some users were ignored # so the bot can go back and look again with a higher random chance to foollow the users if scrolled_to_bottom and randomize and random_chance < 100: random_chance += 25 self.browser.execute_script(JS.SCROLL_TO_TOP) self.quota_supervisor.add_server_call() sc_rolled += 1 sleep(scroll_nap) elif scrolled_to_bottom: # already followed all possibles users break # if not at the bottom of the list # will scroll the screen a bit and look again for i in range(3): self.browser.execute_script(JS.SCROLL_SCREEN) self.quota_supervisor.add_server_call() sc_rolled += 1 sleep(scroll_nap) sleep(3) self.logger.info("Follow User {} [{}/{}] - ended".format( relation, index + 1, len(usernames))) self.logger.info(str(interactions)) self.interactions += interactions return self
def get_follow( self, username: str, follow: str ) -> Set[str]: # set of followers or following of given username valid = {"followers", "following"} if follow not in valid: raise ValueError("get_follow: follow must be one of %r." % valid) user_link = "https://www.instagram.com/{}/".format(username) if not check_if_in_correct_page(self, user_link): if self.username == username: go_to_bot_user_page(self) else: nf_go_to_user_page(self, username) if follow == 'followers': query = self.instauser.followers.all() else: query = self.instauser.following.all() usernames = set([instauser.username for instauser in query]) followers_count, following_count = get_relationship_counts(self, username) count = followers_count if follow == 'followers' else following_count if count == len(usernames): return usernames sleep(2) nf_go_to_follow_page(self, follow, username) sleep(2) sc_rolled = 0 scroll_nap = 1.5 seen_usernames = [] try: while True: if self.aborting or (self.until_time and datetime.now() > self.until_time): break if sc_rolled > 100: delay_random = random.randint(400, 600) self.logger.info( "Scrolled too much, sleeping {} minutes and {} seconds". format(int(delay_random / 60), delay_random % 60)) sleep(delay_random) sc_rolled = 0 users = nf_get_all_users_on_element(self) while len(users) == 0: nf_find_and_press_back(self, user_link) in_user_page = check_if_in_correct_page(self, user_link) if not in_user_page: nf_go_to_user_page(self, username) nf_go_to_follow_page(self, follow, username) users = nf_get_all_users_on_element(self) if len(users) == 0: delay_random = random.randint(200, 300) self.logger.info( "Soft block on see {}, " "sleeping {} minutes and {} seconds".format( follow, int(delay_random / 60), delay_random % 60)) sleep(300) for user in users[1:]: link = user.get_attribute("href") user_text = user.text if user_text not in seen_usernames: seen_usernames.append(user_text) if user_text not in usernames: add_follow_times(self, user_text) else: # For loop ended means all users in screen have been saved scrolled_to_bottom = self.browser.execute_script( JS.SCROLLED_TO_BOTTOM) if scrolled_to_bottom: # already saved all possibles users break # will scroll the screen a bit and grab usernames again for i in range(3): self.browser.execute_script(JS.SCROLL_SCREEN) self.quota_supervisor.add_server_call() sc_rolled += 1 sleep(scroll_nap) if scrolled_to_bottom: # already saved all possibles users break except Exception: raise usernames = usernames.union(set(seen_usernames)) self.logger.info("Grabbed {} {} names".format(len(usernames), follow)) return usernames
def nf_validate_user_call(self, username: str, post_link: str = None) -> Tuple[bool, str]: """Checks if user can be liked according to declared settings Also stores post data in database if appropriate :returns: valid, reason """ followers_count = None following_count = None number_of_posts = None if username == self.username: reason = "---> Username '{}' is yours!\t~skipping user\n".format( self.username) return False, reason if username in self.ignore_users: reason = ("---> '{}' is in the `ignore_users` list\t~skipping " "user\n".format(username)) return False, reason blacklist_file = "{}blacklist.csv".format(self.logfolder) blacklist_file_exists = os.path.isfile(blacklist_file) if blacklist_file_exists: with open("{}blacklist.csv".format(self.logfolder), "rt") as f: reader = csv.reader(f, delimiter=",") for row in reader: for field in row: if field == username: return ( False, "---> {} is in blacklist ~skipping " "user\n".format(username), ) potency_ratio = self.potency_ratio delimit_by_numbers = self.delimit_by_numbers max_followers = self.max_followers max_following = self.max_following min_followers = self.min_followers min_following = self.min_following min_posts = self.min_posts max_posts = self.max_posts skip_private = self.skip_private skip_private_percentage = self.skip_private_percentage skip_no_profile_pic = self.skip_no_profile_pic skip_no_profile_pic_percentage = self.skip_no_profile_pic_percentage skip_business = self.skip_business skip_non_business = self.skip_non_business skip_business_percentage = self.skip_business_percentage skip_business_categories = self.skip_business_categories dont_skip_business_categories = self.dont_skip_business_categories skip_bio_keyword = self.skip_bio_keyword if not any([ potency_ratio, delimit_by_numbers, max_followers, max_following, min_followers, min_following, min_posts, max_posts, skip_private, skip_private_percentage, skip_no_profile_pic, skip_no_profile_pic_percentage, skip_business, skip_non_business, skip_business_percentage, skip_business_categories, skip_bio_keyword ]): # Nothing to check, skip going to user page and then back for nothing return True, "Valid user" try: if post_link: nf_go_from_post_to_profile(self, username) else: post_link = get_current_url(self.browser) self.logger.info("about to start checking user page") # Checks the potential of target user by relationship status in order # to delimit actions within the desired boundary if (potency_ratio or delimit_by_numbers and (max_followers or max_following or min_followers or min_following)): relationship_ratio = None reverse_relationship = False # get followers & following counts self.logger.info("About to get relationship counts") followers_count, following_count = get_relationship_counts( self.browser, username, self.logger) if potency_ratio and potency_ratio < 0: potency_ratio *= -1 reverse_relationship = True # division by zero is bad followers_count = 1 if followers_count == 0 else followers_count following_count = 1 if following_count == 0 else following_count if followers_count and following_count: relationship_ratio = (float(followers_count) / float(following_count) if not reverse_relationship else float(following_count) / float(followers_count)) self.logger.info( "User: '******' |> followers: {} |> following: {} |> relationship " "ratio: {}".format( username, followers_count if followers_count else "unknown", following_count if following_count else "unknown", truncate_float(relationship_ratio, 2) if relationship_ratio else "unknown", )) if followers_count or following_count: if potency_ratio and not delimit_by_numbers: if relationship_ratio and relationship_ratio < potency_ratio: reason = ( "'{}' is not a {} with the relationship ratio of {} " "~skipping user\n".format( username, "potential user" if not reverse_relationship else "massive follower", truncate_float(relationship_ratio, 2), )) return False, reason elif self.delimit_by_numbers: if followers_count: if max_followers: if followers_count > max_followers: reason = ( "User '{}'s followers count exceeds maximum " "limit ~skipping user\n".format(username)) return False, reason if min_followers: if followers_count < min_followers: reason = ( "User '{}'s followers count is less than " "minimum limit ~skipping user\n".format( username)) return False, reason if following_count: if max_following: if following_count > max_following: reason = ( "User '{}'s following count exceeds maximum " "limit ~skipping user\n".format(username)) return False, reason if min_following: if following_count < min_following: reason = ( "User '{}'s following count is less than " "minimum limit ~skipping user\n".format( username)) return False, reason if potency_ratio: if relationship_ratio and relationship_ratio < potency_ratio: reason = ( "'{}' is not a {} with the relationship ratio of " "{} ~skipping user\n".format( username, "potential user" if not reverse_relationship else "massive " "follower", truncate_float(relationship_ratio, 2), )) return False, reason if min_posts or max_posts: # if you are interested in relationship number of posts boundaries try: number_of_posts = getUserData( "graphql.user.edge_owner_to_timeline_media.count", self.browser) except WebDriverException: self.logger.error("~cannot get number of posts for username") reason = "---> Sorry, couldn't check for number of posts of " "username\n" return False, reason if max_posts: if number_of_posts > max_posts: reason = ( "Number of posts ({}) of '{}' exceeds the maximum limit " "given {}\n".format(number_of_posts, username, max_posts)) return False, reason if min_posts: if number_of_posts < min_posts: reason = ( "Number of posts ({}) of '{}' is less than the minimum " "limit given {}\n".format(number_of_posts, username, min_posts)) return False, reason # Skip users # skip private if skip_private: try: self.browser.find_element_by_xpath( "//*[contains(text(), 'This Account is Private')]") is_private = True except NoSuchElementException: is_private = False if is_private and (random.randint(0, 100) <= skip_private_percentage): return False, "{} is private account, by default skip\n".format( username) # skip no profile pic if skip_no_profile_pic: try: profile_pic = getUserData("graphql.user.profile_pic_url", self.browser) except WebDriverException: self.logger.error("~cannot get the post profile pic url") return False, "---> Sorry, couldn't get if user profile pic url\n" if (profile_pic in default_profile_pic_instagram or str(profile_pic).find( "11906329_960233084022564_1448528159_a.jpg") > 0) and ( random.randint( 0, 100) <= skip_no_profile_pic_percentage): return False, "{} has default instagram profile picture\n".format( username) # skip business if skip_business or skip_non_business: # if is business account skip under conditions try: is_business_account = getUserData( "graphql.user.is_business_account", self.browser) except WebDriverException: self.logger.error( "~cannot get if user has business account active") return ( False, "---> Sorry, couldn't get if user has business " "account active\n", ) if skip_non_business and not is_business_account: return ( False, "---> Skipping non business because skip_non_business set to True", ) if is_business_account: try: category = getUserData( "graphql.user.business_category_name", self.browser) except WebDriverException: self.logger.error("~cannot get category name for user") return False, "---> Sorry, couldn't get category name for " "user\n" if len(skip_business_categories) == 0: # skip if not in dont_include if category not in dont_skip_business_categories: if len(dont_skip_business_categories) == 0 and ( random.randint( 0, 100) <= skip_business_percentage): return False, "'{}' has a business account\n".format( username) else: return ( False, ("'{}' has a business account in the " "undesired category of '{}'\n".format( username, category)), ) else: if category in skip_business_categories: return ( False, ("'{}' has a business account in the " "undesired category of '{}'\n".format( username, category)), ) if len(skip_bio_keyword) != 0: # if contain stop words then skip try: profile_bio = getUserData("graphql.user.biography", self.browser) except WebDriverException: self.logger.error("~cannot get user bio") return False, "---> Sorry, couldn't get get user bio " "account active\n" for bio_keyword in skip_bio_keyword: if bio_keyword.lower() in profile_bio.lower(): return ( False, "{} has a bio keyword of {}, by default skip\n".format( username, bio_keyword), ) # if everything is ok return True, "Valid user" except NoSuchElementException: return False, "Unable to locate element" finally: if self.store_in_database: try: user = db_get_or_create_user(self, username) self.db.session.add(user) user.date_checked = datetime.now() if followers_count: user.followers_count = followers_count if following_count: user.following_count = following_count if number_of_posts: user.posts_count = number_of_posts except SQLAlchemyError: self.db.session.rollback() finally: self.db.session.commit() if post_link: nf_find_and_press_back(self, post_link)