session.set_quota_supervisor(enabled=True, sleep_after=['server_calls'], sleepyhead=True, stochastic_flow=True, notify_me=True, peak_server_calls_hourly=150, peak_server_calls_daily=3000) # session.set_quota_supervisor(enabled=True, sleep_after=['server_calls'], sleepyhead=True, stochastic_flow=True, notify_me=True, # peak_server_calls_daily=152 ) print('[INFO]: Logging in..') try: with smart_run(session): #get follower, following count followers_count, following_count = get_relationship_counts( session.browser, targetAccount, session.logger) amount = getPercentageCount(int(followers_count), percentage) print('[INFO] Going to extract ' + str(amount) + ' followers out of ' + str(followers_count)) followers_list = session.grab_followers(username=targetAccount, amount=amount, live_match=False, store_locally=True) print('[INFO]: Followers grabbed successfully. Saving to file..') result = '' for el in followers_list: result = result + el + '\n' with open("./" + str(targetAccount) + "_followers_new2.txt", "w") as text_file: text_file.write(result)
def OnePass(username, password, username1, amountToQuickscan, amountToLookup, iCnt, iMax): # start the scan session = InstaPy(username=username, password=password) session.login() prepare_my_database(session.logger) randomize = False media = MEDIA_PHOTO taggedImages = True links = [] try: # build list of links to ignore from the data base: those recently queried (less than 20 hours old) and those posted more than 72 hours ago. ignore_links = getFreshTaggedLinks(username1) ignore_links = list(dict.fromkeys(ignore_links)) fresh_links = get_links_for_username(session.browser, session.username, username1, amountToQuickscan, session.logger, session.logfolder, randomize, media, taggedImages) links = list(set(fresh_links) - set(ignore_links)) session.logger.info("Links quickscanned: {}".format(len(fresh_links))) session.logger.info("Links to ignore: {}".format(len(ignore_links))) links = links[:amountToLookup] if len( links) > amountToLookup else links session.logger.info("Links to lookup: {}".format(len(links))) except NoSuchElementException: session.logger.error("Element not found, skipping this username") i = 0 # for each link in the list, follow it and grab lots of statistics. This consumes instagram resources. # then follow the posting username and get their statistics. This consumes instragram resources. sleep(1.2) for i, posted_link in enumerate(links): sleep(3.0) # session.logger.info("Processing link {}:{}".format(i+1,len(links))) session.logger.info( "Processing link:{} {}/{}:{}/{} totalLinks: {}/{}".format( posted_link, iCnt, iMax, username, username1, i + 1, len(links))) try: try: inappropriate, posted_by_username, posted_link_likes_count, posted_link_comments_count, posted_link_datetime_str, posted_link_location_name, posted_link_image_text, is_video, reason, scope = check_link2( session.browser, posted_link, session.dont_like, session.mandatory_words, session.mandatory_language, session.is_mandatory_character, session.mandatory_character, session.check_character_set, session.ignore_if_contains, session.logger, ) except: inappropriate = True if not inappropriate: sleep(1.5) try: posted_by_followers_count, posted_by_following_count = get_relationship_counts( session.browser, posted_by_username, session.logger) # posted_by_posts_count = get_number_of_posts(session.browser) posted_by_posts_count = None except: # if you can't get follower count, then you're probably out of daily page pulls # Might as well quit! session.logger.warning( "Might be out of daily page pulls. Closing session.") posted_by_posts_count = None posted_by_followers_count = None posted_by_following_count = None session.end() return False session.logger.info( "-Posted on: {} by: {} posts: {} followers: {} following: {}" .format(posted_link_datetime_str, posted_by_username, posted_by_posts_count, posted_by_followers_count, posted_by_following_count)) now = datetime.now(timezone.utc) last_checked_datetime_str = now.strftime( "%Y-%m-%dT%H:%M:%S.%fZ") posted_link_datetime = datetime.strptime( posted_link_datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ") last_checked_datetime = datetime.strptime( last_checked_datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ") deltatime = (last_checked_datetime - posted_link_datetime) last_checked_age_in_hrs = deltatime.seconds / 3600.00 storeRecord(username1, last_checked_datetime, posted_link, posted_link_datetime, posted_link_location_name, posted_link_likes_count, posted_link_comments_count, posted_by_username, posted_by_posts_count, posted_by_followers_count, posted_by_following_count) storeTaggedActivitytoDB( session.logger, username1, last_checked_datetime, posted_link, posted_link_datetime, posted_link_location_name, posted_link_likes_count, posted_link_comments_count, posted_by_username, posted_by_posts_count, posted_by_followers_count, posted_by_following_count) sleep(1.0) except NoSuchElementException as err: session.logger.error("Invalid Page: {}".format(err)) session.end() return True
def nf_validate_user_call(self, username: str, post_link: str = None) -> Tuple[bool, str]: """Checks if user can be liked according to declared settings Also stores post data in database if appropriate :returns: valid, reason """ followers_count = None following_count = None number_of_posts = None if username == self.username: reason = "---> Username '{}' is yours!\t~skipping user\n".format( self.username) return False, reason if username in self.ignore_users: reason = ("---> '{}' is in the `ignore_users` list\t~skipping " "user\n".format(username)) return False, reason blacklist_file = "{}blacklist.csv".format(self.logfolder) blacklist_file_exists = os.path.isfile(blacklist_file) if blacklist_file_exists: with open("{}blacklist.csv".format(self.logfolder), "rt") as f: reader = csv.reader(f, delimiter=",") for row in reader: for field in row: if field == username: return ( False, "---> {} is in blacklist ~skipping " "user\n".format(username), ) potency_ratio = self.potency_ratio delimit_by_numbers = self.delimit_by_numbers max_followers = self.max_followers max_following = self.max_following min_followers = self.min_followers min_following = self.min_following min_posts = self.min_posts max_posts = self.max_posts skip_private = self.skip_private skip_private_percentage = self.skip_private_percentage skip_no_profile_pic = self.skip_no_profile_pic skip_no_profile_pic_percentage = self.skip_no_profile_pic_percentage skip_business = self.skip_business skip_non_business = self.skip_non_business skip_business_percentage = self.skip_business_percentage skip_business_categories = self.skip_business_categories dont_skip_business_categories = self.dont_skip_business_categories skip_bio_keyword = self.skip_bio_keyword if not any([ potency_ratio, delimit_by_numbers, max_followers, max_following, min_followers, min_following, min_posts, max_posts, skip_private, skip_private_percentage, skip_no_profile_pic, skip_no_profile_pic_percentage, skip_business, skip_non_business, skip_business_percentage, skip_business_categories, skip_bio_keyword ]): # Nothing to check, skip going to user page and then back for nothing return True, "Valid user" try: if post_link: nf_go_from_post_to_profile(self, username) else: post_link = get_current_url(self.browser) self.logger.info("about to start checking user page") # Checks the potential of target user by relationship status in order # to delimit actions within the desired boundary if (potency_ratio or delimit_by_numbers and (max_followers or max_following or min_followers or min_following)): relationship_ratio = None reverse_relationship = False # get followers & following counts self.logger.info("About to get relationship counts") followers_count, following_count = get_relationship_counts( self.browser, username, self.logger) if potency_ratio and potency_ratio < 0: potency_ratio *= -1 reverse_relationship = True # division by zero is bad followers_count = 1 if followers_count == 0 else followers_count following_count = 1 if following_count == 0 else following_count if followers_count and following_count: relationship_ratio = (float(followers_count) / float(following_count) if not reverse_relationship else float(following_count) / float(followers_count)) self.logger.info( "User: '******' |> followers: {} |> following: {} |> relationship " "ratio: {}".format( username, followers_count if followers_count else "unknown", following_count if following_count else "unknown", truncate_float(relationship_ratio, 2) if relationship_ratio else "unknown", )) if followers_count or following_count: if potency_ratio and not delimit_by_numbers: if relationship_ratio and relationship_ratio < potency_ratio: reason = ( "'{}' is not a {} with the relationship ratio of {} " "~skipping user\n".format( username, "potential user" if not reverse_relationship else "massive follower", truncate_float(relationship_ratio, 2), )) return False, reason elif self.delimit_by_numbers: if followers_count: if max_followers: if followers_count > max_followers: reason = ( "User '{}'s followers count exceeds maximum " "limit ~skipping user\n".format(username)) return False, reason if min_followers: if followers_count < min_followers: reason = ( "User '{}'s followers count is less than " "minimum limit ~skipping user\n".format( username)) return False, reason if following_count: if max_following: if following_count > max_following: reason = ( "User '{}'s following count exceeds maximum " "limit ~skipping user\n".format(username)) return False, reason if min_following: if following_count < min_following: reason = ( "User '{}'s following count is less than " "minimum limit ~skipping user\n".format( username)) return False, reason if potency_ratio: if relationship_ratio and relationship_ratio < potency_ratio: reason = ( "'{}' is not a {} with the relationship ratio of " "{} ~skipping user\n".format( username, "potential user" if not reverse_relationship else "massive " "follower", truncate_float(relationship_ratio, 2), )) return False, reason if min_posts or max_posts: # if you are interested in relationship number of posts boundaries try: number_of_posts = getUserData( "graphql.user.edge_owner_to_timeline_media.count", self.browser) except WebDriverException: self.logger.error("~cannot get number of posts for username") reason = "---> Sorry, couldn't check for number of posts of " "username\n" return False, reason if max_posts: if number_of_posts > max_posts: reason = ( "Number of posts ({}) of '{}' exceeds the maximum limit " "given {}\n".format(number_of_posts, username, max_posts)) return False, reason if min_posts: if number_of_posts < min_posts: reason = ( "Number of posts ({}) of '{}' is less than the minimum " "limit given {}\n".format(number_of_posts, username, min_posts)) return False, reason # Skip users # skip private if skip_private: try: self.browser.find_element_by_xpath( "//*[contains(text(), 'This Account is Private')]") is_private = True except NoSuchElementException: is_private = False if is_private and (random.randint(0, 100) <= skip_private_percentage): return False, "{} is private account, by default skip\n".format( username) # skip no profile pic if skip_no_profile_pic: try: profile_pic = getUserData("graphql.user.profile_pic_url", self.browser) except WebDriverException: self.logger.error("~cannot get the post profile pic url") return False, "---> Sorry, couldn't get if user profile pic url\n" if (profile_pic in default_profile_pic_instagram or str(profile_pic).find( "11906329_960233084022564_1448528159_a.jpg") > 0) and ( random.randint( 0, 100) <= skip_no_profile_pic_percentage): return False, "{} has default instagram profile picture\n".format( username) # skip business if skip_business or skip_non_business: # if is business account skip under conditions try: is_business_account = getUserData( "graphql.user.is_business_account", self.browser) except WebDriverException: self.logger.error( "~cannot get if user has business account active") return ( False, "---> Sorry, couldn't get if user has business " "account active\n", ) if skip_non_business and not is_business_account: return ( False, "---> Skipping non business because skip_non_business set to True", ) if is_business_account: try: category = getUserData( "graphql.user.business_category_name", self.browser) except WebDriverException: self.logger.error("~cannot get category name for user") return False, "---> Sorry, couldn't get category name for " "user\n" if len(skip_business_categories) == 0: # skip if not in dont_include if category not in dont_skip_business_categories: if len(dont_skip_business_categories) == 0 and ( random.randint( 0, 100) <= skip_business_percentage): return False, "'{}' has a business account\n".format( username) else: return ( False, ("'{}' has a business account in the " "undesired category of '{}'\n".format( username, category)), ) else: if category in skip_business_categories: return ( False, ("'{}' has a business account in the " "undesired category of '{}'\n".format( username, category)), ) if len(skip_bio_keyword) != 0: # if contain stop words then skip try: profile_bio = getUserData("graphql.user.biography", self.browser) except WebDriverException: self.logger.error("~cannot get user bio") return False, "---> Sorry, couldn't get get user bio " "account active\n" for bio_keyword in skip_bio_keyword: if bio_keyword.lower() in profile_bio.lower(): return ( False, "{} has a bio keyword of {}, by default skip\n".format( username, bio_keyword), ) # if everything is ok return True, "Valid user" except NoSuchElementException: return False, "Unable to locate element" finally: if self.store_in_database: try: user = db_get_or_create_user(self, username) self.db.session.add(user) user.date_checked = datetime.now() if followers_count: user.followers_count = followers_count if following_count: user.following_count = following_count if number_of_posts: user.posts_count = number_of_posts except SQLAlchemyError: self.db.session.rollback() finally: self.db.session.commit() if post_link: nf_find_and_press_back(self, post_link)
def gatherAndStoreLinkData(session, username1, posted_link): "Given a posted_link, gather it's info and update the database" # session.logger.info("Processing link: {}".format(posted_link)) try: inappropriate = True sleep(1.0) try: inappropriate, posted_by_username, posted_link_likes_count, posted_link_comments_count, posted_link_datetime_str, posted_link_location_name, posted_link_image_text, is_video, reason, scope = check_link2( session.browser, posted_link, session.dont_like, session.mandatory_words, session.mandatory_language, session.is_mandatory_character, session.mandatory_character, session.check_character_set, session.ignore_if_contains, session.logger, ) session.logger.info("-Posted on: {} by: {} ".format( posted_link_datetime_str, posted_by_username)) except: inappropriate = True updateSessionActivityToDB(session) if not inappropriate: sleep(1.0) try: posted_by_followers_count, posted_by_following_count = get_relationship_counts( session.browser, posted_by_username, session.logger) # posted_by_posts_count = get_number_of_posts(session.browser) posted_by_posts_count = None except: session.logger.warning( "Might be out of daily page pulls. Closing session.") posted_by_posts_count = None posted_by_followers_count = None posted_by_following_count = None return False session.logger.info( "-Posts: {} followers: {} following: {}".format( posted_by_posts_count, posted_by_followers_count, posted_by_following_count)) now = datetime.now(timezone.utc) last_checked_datetime_str = now.strftime("%Y-%m-%dT%H:%M:%S.%fZ") posted_link_datetime = datetime.strptime(posted_link_datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ") last_checked_datetime = datetime.strptime( last_checked_datetime_str, "%Y-%m-%dT%H:%M:%S.%fZ") deltatime = (last_checked_datetime - posted_link_datetime) # last_checked_age_in_hrs = deltatime.seconds / 3600.00 storeRecord(username1, last_checked_datetime, posted_link, posted_link_datetime, posted_link_location_name, posted_link_likes_count, posted_link_comments_count, posted_by_username, posted_by_posts_count, posted_by_followers_count, posted_by_following_count) storeTaggedActivitytoDB( session.logger, username1, last_checked_datetime, posted_link, posted_link_datetime, posted_link_location_name, posted_link_likes_count, posted_link_comments_count, posted_by_username, posted_by_posts_count, posted_by_followers_count, posted_by_following_count) if reason == "Unavailable Page": # delete link from data base deleteLinkFromDB(session, username1, posted_link) sleep(1.0) except NoSuchElementException as err: session.logger.error("Invalid Page: {}".format(err)) return True
def delta_followers(session, user_name, max_amount, past_followers=None): """ Given an instagram username and an optional list of past_followers, retrieves the list of new followers. :param session: :param user_name: :param max_amount: :param old_followers: :return: """ if past_followers is None: past_followers = [] session.quotient_breach = False try: user_name = user_name.strip() user_link = "https://www.instagram.com/{}/".format(user_name) web_address_navigator(session.browser, user_link) if not is_page_available(browser=session.browser, logger=session.logger): return [] # check how many people are following this user. allfollowers, allfollowing = get_relationship_counts( browser=session.browser, username=user_name, logger=session.logger) # print(allfollowers) # print(allfollowing) # skip early for no followers if not allfollowers: session.logger.info("'{}' has no followers".format(user_name)) return [] elif allfollowers < max_amount: session.logger.warning( "'{}' has less followers- {}, than the given amount of {}". format(user_name, allfollowers, max_amount)) # locate element to user's followers try: followers_link = session.browser.find_elements_by_xpath( '//a[@href="/{}/followers/"]'.format(user_name)) if len(followers_link) > 0: click_element(session.browser, followers_link[0]) else: session.logger.error("'{} is private'".format(user_name)) return [] except NoSuchElementException: session.logger.error( 'Could not find followers\' link for {}'.format(user_name)) return [] except BaseException as e: session.logger.error("`followers_link` error {}".format(str(e))) return [] person_list, _ = get_users_through_dialog( browser=session.browser, login=session.username, user_name=user_name, amount=max_amount, users_count=allfollowers, randomize=False, dont_include=[], blacklist=session.blacklist, follow_times=session.follow_times, simulation={ "enabled": False, "percentage": 100 }, channel="Follow", jumps=session.jumps, logger=session.logger, logfolder=session.logfolder, past_followers=past_followers, wait_seconds=10, ) except (TypeError, RuntimeWarning) as err: session.logger.error('Sorry, an error occurred: {}'.format(err)) session.aborting = True return [] return person_list