Beispiel #1
0
def scrape_followers(driver, account, members):
    # Load account page
    driver.get("https://www.instagram.com/{}/".format(account))

    sleep(4)
    driver.find_element_by_partial_link_text("follower").click()

    # Wait for the followers modal to load
    waiter.find_element(driver, "//div[@role='dialog']", by=XPATH)
    allfoll = int(
        (driver.find_element_by_xpath("//li[2]/a/span").text).replace(",", ""))
    posts = driver.find_element_by_xpath(
        '//*[@id="react-root"]/section/main/div/header/section/ul/li[1]/span/span'
    ).text
    print(allfoll)
    print("Posts ", posts)

    follower_css = "ul div li:nth-child({}) a.notranslate"  # Taking advange of CSS's nth-child functionality
    for group in itertools.count(start=1, step=12):
        for follower_index in range(group, group + 12):
            if follower_index > members:
                return None

            yield [
                waiter.find_element(driver,
                                    follower_css.format(follower_index)).text,
                allfoll, posts
            ]
        last_follower = waiter.find_element(driver,
                                            follower_css.format(group + 11))
        driver.execute_script("arguments[0].scrollIntoView();", last_follower)
Beispiel #2
0
def get_targets(self):
    # conta alvo: uma lista de contas que pegamos nos likers de  outras instituições
    with open('alvo.txt') as f:
        alvo = [line.rstrip() for line in f]
        print(alvo)

    # acessar uma a uma
    for account in alvo:
        self.driver.get("https://www.instagram.com/{}/".format(account))
        time.sleep(random.uniform(5, 7))

        #Followers
        #try:
        #   allfoll = int(self.driver.find_element_by_xpath("//li[2]/a/span").text)
        #except ValueError:
        allfoll = 10

        self.driver.find_element_by_partial_link_text("follower").click()

        time.sleep(random.uniform(1.5, 2))
        trick_css = "ul div li:nth-child({}) a.notranslate"  # Taking advange of CSS's nth-child functionality

        wait = WebDriverWait(self.driver, 20)

        for group in itertools.count(start=1, step=12):
            for follower_index in range(group, group + 12):
                if follower_index > allfoll:
                    break  #TEM ERRO AQUI
                    #time.sleep (random.uniform (5, 7))
                yield waiter.find_element(
                    self.driver, trick_css.format(follower_index)).text
            last_follower = waiter.find_element(self.driver,
                                                trick_css.format(group + 11))
            self.driver.execute_script("arguments[0].scrollIntoView();",
                                       last_follower)
Beispiel #3
0
 def __getuserfollowingcript(self, targetaccount):
     # The link we must go to
     linkprofile = "https://www.instagram.com/" + targetaccount
     self.driver.get(linkprofile)
     time.sleep(round(random.uniform(4, 6), 2))
     self.driver.find_element_by_xpath(
         '//*[@id="react-root"]/section/main/div/header/section/ul/li[3]/a'
     ).click()
     time.sleep(round(random.uniform(4, 6), 2))
     waiter.find_element(self.driver, "//div[@role='dialog']", by=XPATH)
     following_css = "ul div li:nth-child({}) a.notranslate"
     for group in itertools.count(start=1, step=12):
         # A sleep is necessary so that instagram doesen't block us, FollowDelay is give to the constructor
         # time.sleep(round(random.uniform(self.FollowerDelay-1, self.FollowerDelay+1), 2))
         for following_index in range(group, group + 12):
             try:
                 current_following = waiter.find_element(
                     self.driver, following_css.format(following_index)
                 )
                 self.driver.execute_script(
                     "arguments[0].scrollIntoView(true);", current_following
                 )
                 time.sleep(0.1)
                 yield waiter.find_element(
                     self.driver, following_css.format(following_index)
                 ).text
             except:
                 return -1
         last_following = waiter.find_element(
             self.driver, following_css.format(following_index)
         )
         self.driver.execute_script(
             "arguments[0].scrollIntoView(true);", last_following
         )
         time.sleep(0.5)
Beispiel #4
0
def list_following(self):

    trick_css = "ul div li:nth-child({}) a.notranslate"  # Taking advange of CSS's nth-child functionality
    #allfow = int(self.driver.find_element_by_xpath("//li[3]/a/span").text)

    #a cada 20 vamos checar quem nos segue de volta
    allfow = 20

    self.driver.find_element_by_partial_link_text("following").click()
    time.sleep(random.uniform(4, 6))

    #
    for group1 in itertools.count(start=1, step=12):
        for following_index in range(group1, group1 + 12):
            if following_index > allfow:
                break
            yield waiter.find_element(self.driver,
                                      trick_css.format(following_index)).text
        #             following.append(waiter.find_element(self.driver, trick_css.format(following_index)).text)
        #         else:
        #             raise StopIteration
        last_following = waiter.find_element(self.driver,
                                             trick_css.format(group1 + 11))
        self.driver.execute_script("arguments[0].scrollIntoView();",
                                   last_following)
Beispiel #5
0
def getWords(driver, list):

    index = []

    for follower in list:
        url = 'https://www.instagram.com/' + follower + '/'
        driver.get(url)

        WebDriverWait(driver, 20).until(
            EC.url_changes('https://www.instagram.com/accounts/login/?next=/' +
                           follower + '/'))

        waiter.find_element(driver, '//*[@class="-vDIg"]', by=XPATH)
        try:
            accountWords = re.split(
                '[^a-zA-Z]',
                driver.find_element_by_xpath('//*[@class="-vDIg"]/span').text)
            accountWords = [
                word.translate(str.maketrans('', '',
                                             string.punctuation)).lower()
                for word in accountWords
                if ((word not in stopwords.words('english')) and word != "")
            ]
        except NoSuchElementException:
            continue

        accountWordsD = {
            "account": follower,
            "wordsTotal": len(accountWords),
            "words": []
        }
        for word in accountWords:
            newWordAccount = {"word": word, "count": 1}

            temp1 = findWord(accountWordsD, newWordAccount["word"])
            if (temp1 == -1):
                accountWordsD["words"].append(newWordAccount)
            else:
                temp1["count"] += 1

            newWord = {"word": word, "count": 1}

            if (('words' not in locals())):
                words = [newWord]
            else:
                temp = next((item for item in words if item["word"] == word),
                            -1)
                if (temp == -1):
                    words.append(newWord)
                else:
                    temp['count'] += 1

        index.append(accountWordsD)

    words = (sorted(words, key=itemgetter('count'), reverse=True))

    print(words)
    print(index)

    return words, index
Beispiel #6
0
def list_followers(driver, account):
    # profile
    driver.get("https://www.instagram.com/{0}/".format(account))
    time.sleep(random.uniform(1.5, 2))
    # Followers

    allfoll = int(driver.find_element_by_xpath("//li[2]/a/span").text)
    driver.find_element_by_partial_link_text("follower").click()

    time.sleep(random.uniform(1.5, 2))
    trick_css = "ul div li:nth-child({}) a.notranslate"  # Taking advange of CSS's nth-child functionality

    wait = WebDriverWait(driver, 20)

    for group in itertools.count(start=1, step=12):
        for follower_index in range(group, group + 12):
            if follower_index > allfoll:
                raise StopIteration
            yield waiter.find_element(driver,
                                      trick_css.format(follower_index)).text

            # followers.append(waiter.find_element(driver, trick_css.format(follower_index)).text)
        # https://stackoverflow.com/questions/37233803/how-to-web-scrape-followers-from-instagram-web-browser
        # Instagram loads followers 12 at a time. Find the last follower element
        # and scroll it into view, forcing instagram to load another 12
        # Even though we just found this elem in the previous for loop, there can
        # potentially be large amount of time between that call and this one,
        # and the element might have gone stale. Lets just re-acquire it to avoid
        # that
        last_follower = waiter.find_element(driver,
                                            trick_css.format(group + 11))
        driver.execute_script("arguments[0].scrollIntoView();", last_follower)
def insta_scrape(driver, account):
    print('Started Scraping URLs')
    for profile in account:
        href_list = []
        driver.get("https://www.instagram.com/{0}/".format(profile))
        sleep(10)
        driver.find_element_by_partial_link_text("follower").click()
        waiter.find_element(driver, "//div[@role='dialog']", by=XPATH)

        try:
            follower_css = "ul div li:nth-child({}) a.notranslate"
            for group in itertools.count(start=1, step=12):
                for follower_index in range(group, group + 12):
                    get_href = waiter.find_element(
                        driver, follower_css.format(follower_index)).text
                    name = get_href.replace('_', ' ')
                    href_list.append('/' + name + '/')
                    data = {'Links': href_list}
                    df = pd.DataFrame(data)
                    df.to_csv('href_list.csv', index=False)
                    yield name
                last_follower = waiter.find_element(
                    driver, follower_css.format(group + 11))
                driver.execute_script("arguments[0].scrollIntoView();",
                                      last_follower)
        except:
            print('Scraping Done')
Beispiel #8
0
def getFollowing(account, driver):
    data["url"] = 'https://www.instagram.com/' + account + '/followers/'
    driver.get(data["url"])
    driver.maximize_window()

    WebDriverWait(driver, 20).until(
        EC.url_changes('https://www.instagram.com/accounts/login/?next=/' +
                       account + '/followers/'))

    # click on following
    followers_btn = driver.find_elements_by_class_name('g47SY')
    followers_btn[2].click()

    waiter.find_element(driver, "//div[@role='dialog']", by=XPATH)

    # find the following window
    dialog = driver.find_element_by_xpath('/html/body/div[3]/div/div/div[2]')

    # find number of following
    follamount = driver.find_element_by_xpath("//li[3]/a/span").text

    follamount = follamount.replace(',', "")

    if 'k' in follamount:
        follamount = follamount[:-1]
        follamount = int(float(follamount)) * 1000

    else:
        follamount = int(float(follamount))

    elem = driver.find_element_by_xpath(
        '//*[@class="FPmhX notranslate _0imsa "]')

    time.sleep(2)
    for i in range(0, 6):
        elem.send_keys(Keys.PAGE_UP)
        elem.send_keys(Keys.PAGE_UP)
        elem.send_keys(Keys.PAGE_DOWN)

    # scroll down the page
    num = driver.find_elements_by_xpath(
        '//*[@class="FPmhX notranslate _0imsa "]').__len__()
    while (int(follamount) != num):
        driver.execute_script(
            "arguments[0].scrollTop = arguments[0].scrollHeight", dialog)
        time.sleep(random.randint(500, 1000) / 1000)
        num = driver.find_elements_by_xpath(
            '//*[@class="FPmhX notranslate _0imsa "]').__len__()
        a = os.system('CLS')
        print("Extracting friends {} of {} ({}%)".format(
            num, follamount, round((num / follamount) * 100, 2)))

    followers = BeautifulSoup(driver.page_source, features="lxml").find_all(
        "a", {'class': 'FPmhX notranslate _0imsa '})
    followers_arr = []
    for follower in followers:
        followers_arr.append(follower.get_text())

    return followers_arr
Beispiel #9
0
def login(driver, username, password):
	driver.get("https://www.instagram.com/accounts/login/")

	waiter.find_write(driver, "//div/input[@name='username']", username, by=XPATH)
	waiter.find_write(driver, "//div/input[@name='password']", password, by=XPATH)
	waiter.find_element(driver, "//div/button[@type='submit']", by=XPATH).click()

	waiter.find_element(driver, "//a/span[@aria-label='Find People']", by=XPATH)
def getFollowers(driver):
    driver.get("https://www.instagram.com/ali.alaoui12/")

    numFollowers = int(
        waiter.find_element(
            driver, "//li[2]/a/span",
            by=XPATH).text)  # li[2] refers to second li in structure
    print("You have " + str(numFollowers) + " followers")

    waiter.find_element(driver,
                        "//a[@href='/ali.alaoui12/followers/']",
                        by=XPATH).click()
    dialog = waiter.find_element(
        driver, "//div[@role='dialog']/div[2]/ul", by=XPATH
    )  # makes sure followers have loaded. select ul in div with role of dialog
    dialog.click()

    units = waiter.find_elements(driver,
                                 "//div[@role='dialog']/div[2]/ul/div/li",
                                 by=XPATH)
    numUnits = len(units)
    print("There are " + str(numUnits) + " followers per scroll.")

    scrollFinished = False
    sameCounter = 0  # makes sure we should truly stop scrolling, once this reaches 5 we stop

    while (scrollFinished is False and (numFollowers > 12)
           ):  # no need to scroll if numfollowers is 12 or less
        followers = waiter.find_elements(
            driver, "//div[@role='dialog']/div[2]/ul/div/li", by=XPATH)
        lastFollower = len(followers) - 1

        dialog.click()  # ensures dialog is focused
        try:
            driver.execute_script("arguments[0].scrollIntoView();",
                                  followers[lastFollower])
        except StaleElementReferenceException:  # in case an element reloads, we're "taking a breather"
            time.sleep(3.5)
            continue

        if (lastFollower > 12 and lastFollower == oldLastFollower
            ):  # only stop when it has loaded all followers
            sameCounter = sameCounter + 1
            if (sameCounter == 5):
                scrollFinished = True  # to avoid any bugs

        oldLastFollower = lastFollower
        time.sleep(.5)

    units = waiter.find_elements(driver,
                                 "//div[@role='dialog']/div[2]/ul/div/li",
                                 by=XPATH)
    numUnits = len(units)
    print("To confirm, you have " + str(numUnits) + " followers.")

    followers = parseHTML()
    print(len(followers))
    return followers
Beispiel #11
0
def getFollowg(driver, instaUsr):
    findField = driver.find_element_by_xpath(
        f"//a[@href='/{instaUsr}/following/']")
    findField.click()
    print('> Catching usernames in "Following" list ...')
    sleep(2.5)
    getFollowingCSS = 'ul div li:nth-child({}) a.notranslate'
    for listedUsers in count(start=1, step=12):
        for usrIndex in range(listedUsers, listedUsers + 12):
            yield find_element(driver, getFollowingCSS.format(usrIndex)).text
        lastUserListed = find_element(driver, getFollowingCSS.format(usrIndex))
        driver.execute_script("arguments[0].scrollIntoView()", lastUserListed)
def login(driver):
    username = #insert username here
    password = #insert password here

    # Load page
    driver.get("https://www.instagram.com/accounts/login/")

    # Login
    waiter.find_write(driver, "//input[@name='username']", username, by=XPATH)
    waiter.find_write(driver, "//input[@name='password']", password, by=XPATH)
    waiter.find_element(driver, "//div/button[@type='submit']", by=XPATH).click()

    # Wait for the user dashboard page to load
    waiter.find_element(driver, "//a/span[@aria-label='Find People']", by=XPATH)
def getFollowing(driver):
    driver.get("https://www.instagram.com/ali.alaoui12/")
    numFollowing = int(
        waiter.find_element(driver, "//li[3]/a/span", by=XPATH).text)

    print("You are following " + str(numFollowing) + " people")

    waiter.find_element(driver,
                        "//a[@href='/ali.alaoui12/following/']",
                        by=XPATH).click()
    dialog = waiter.find_element(driver,
                                 "//div[@role='dialog']/div[2]/ul",
                                 by=XPATH)
    dialog.click()

    units = waiter.find_elements(driver,
                                 "//div[@role='dialog']/div[2]/ul/div/li",
                                 by=XPATH)
    numUnits = len(units)
    print("There are " + str(numUnits) + " followers per scroll.")

    scrollFinished = False
    sameCounter = 0

    while (scrollFinished is False and (numFollowing > 12)
           ):  # no need to scroll if numfollowers is 12 or less
        following = waiter.find_elements(
            driver, "//div[@role='dialog']/div[2]/ul/div/li", by=XPATH)
        lastFollowing = len(following) - 1

        dialog.click()

        try:
            driver.execute_script("arguments[0].scrollIntoView();",
                                  following[lastFollowing])
        except StaleElementReferenceException:  # in case an element reloads, we're "taking a breather"
            time.sleep(3.5)
            continue

        if (lastFollowing > 12 and lastFollowing == oldLastFollowing):
            sameCounter = sameCounter + 1
            if (sameCounter == 5):
                scrollFinished = True  # to avoid any bugs

        oldLastFollowing = lastFollowing
        time.sleep(.5)

    following = parseHTML()
    print(len(following))
    return following
def login(driver, user, pwd):

    driver.get("https://www.instagram.com/accounts/login/")

    # find and write in input fields to login
    waiter.find_write(driver, "//div/input[@name='username']", user, by=XPATH)
    waiter.find_write(driver, "//div/input[@name='password']", pwd, by=XPATH)
    # submit
    waiter.find_element(driver, "//div/button[@type='submit']",
                        by=XPATH).click()

    # waits until the homepage loads before exiting function
    waiter.find_element(driver,
                        "//a/span[@aria-label='Find People']",
                        by=XPATH)
Beispiel #15
0
def scrape(driver, account, type, max):
    '''
    Scrapes an instagram page follower or following modal.
    Parameters:
        driver: web driver
        account: instagram account of interest
        type: modal type (i.e. "follower" or "following")
        max: max count of followers/followed to scrape
    Returns:
        generator of users (either followers or followed)
    '''

    # load account page
    driver.get("https://www.instagram.com/{0}/".format(account))

    # grab modal
    driver.find_element_by_partial_link_text(type).click()
    followers_modal = WebDriverWait(driver, 2).until(
        EC.element_to_be_clickable((By.XPATH, "//div[@class='isgrP']")))

    # scroll through list
    follower_index = 1
    follower_css = "ul div li:nth-child({}) a.notranslate"  # Taking advange of CSS's nth-child functionality
    while follower_index < num_followers:
        driver.execute_script(
            'arguments[0].scrollTop = arguments[0].scrollTop + arguments[0].offsetHeight;',
            followers_modal)

        yield waiter.find_element(driver,
                                  follower_css.format(follower_index)).text
        follower_index += 1
Beispiel #16
0
def login(driver):
    username = ""
    password = ""

    driver.get("http://danharoo.com/member/login.html")

    waiter.find_write(driver, "//*[@id='member_id']", username, by=XPATH)
    waiter.find_write(driver, "//*[@id='member_passwd']", password, by=XPATH)
    submit = driver.find_element_by_xpath(
        "/html/body/div[4]/div/div/form/div/div/fieldset/a")
    submit.click()

    waiter.find_element(
        driver,
        "//*[@id='contents_main']/div[1]/div[1]/ul/li[11]/a/img",
        by=XPATH)
Beispiel #17
0
def scrape(url: str, league: str, year: int, n_teams=20) -> None:
    # get ChromeDriver
    driver = webdriver.Chrome('E:/USDE/scrapers/chromedriver.exe')

    # this snippet of code gets all the match urls, it's pretty slow so I don't want to do it everytime
    if os.path.exists(f'{league}_{year}_urls.pickle'):
        with open(f'{league}_{year}_urls.pickle', 'rb') as f:
            list_url = pickle.load(f)
    else:
        print(f'Non esiste {league}_{year}_urls.pickle')
        driver.get(url)
        driver.implicitly_wait(3)
        list_url = []

        for i in tqdm(range(2, (n_teams - 1) * 2 + 2)):
            table = driver.find_element_by_xpath(
                f'//*[@id="main"]/div[10]/div[{i}]/div/table')
            for row in table.find_elements_by_css_selector('tr'):
                if row.get_attribute('class') != 'bg_blau_20':
                    for td in row.find_elements_by_css_selector('td'):
                        if td.get_attribute('class') == 'zentriert hauptlink':
                            list_url.append(
                                td.find_elements_by_css_selector('a')
                                [0].get_attribute('href'))
        with open(f'{league}_{year}_urls.pickle', 'wb') as f:
            pickle.dump(list_url, f)
    print(f'Total matches: {len(list_url)}')

    if os.path.exists(f'{league}_{year}_lineup_urls.pickle'):
        with open(f'{league}_{year}_lineup_urls.pickle', 'rb') as f:
            lineup_url = pickle.load(f)
    else:
        lineup_url = []
        for u in tqdm(list_url):
            time.sleep(2)
            driver.get(u)
            lineup_url.append(
                waiter.find_element(driver, '//*[@id="line-ups"]/a',
                                    by=XPATH).get_attribute('href'))
        with open(f'{league}_{year}_lineup_urls.pickle', 'wb') as f:
            pickle.dump(lineup_url, f)
    print(f'Total matches lineups: {len(lineup_url)}')

    for lu in lineup_url[:2]:
        driver.get(lu)
        print('Home Lineup:')
        for i in range(1, 12):
            a = driver.find_element_by_xpath(
                f'//*[@id="main"]/div[12]/div[1]/div/div[2]/table/tbody/tr[{i}]/td[2]/table/tbody/tr[1]/td[2]/a'
            )
            print(
                f"{a.text} {a.get_attribute('id')} {a.get_attribute('href')}")
        print('Away Lineup:')
        for i in range(1, 12):
            a = driver.find_element_by_xpath(
                f'//*[@id="main"]/div[12]/div[2]/div/div[2]/table/tbody/tr[{i}]/td[2]/table/tbody/tr[1]/td[2]/a'
            )
            print(
                f"{a.text} {a.get_attribute('id')} {a.get_attribute('href')}")
Beispiel #18
0
 def check_for_following(self):
     driver = self.driver
     time.sleep(5)
     driver.get('https://www.instagram.com/' + self.username + '/')
     followers_string = "//a[@href='/" + self.username + "/followers/']"
     waiter.find_element(driver, followers_string, by=XPATH).click()
     time.sleep(random.randint(2, 4))
     user_hrefs = []
     all_hrefs = ""
     # hrefs_scraped = 0
     counter = 0
     while counter < 4:
         try:
             driver.execute_script(
                 "window.scrollTo(0, document.body.scrollHeight);")
             time.sleep(random.randint(4, 5))
             all_hrefs = driver.find_elements_by_tag_name('a')
             all_hrefs = [
                 elem.get_attribute('href') for elem in all_hrefs
                 if '.com/' in elem.get_attribute('href')
             ]
             all_user_hrefs = []
             for ref in all_hrefs[19:]:
                 u = list(ref.split("/"))
                 if u.__len__() == 5:
                     if (u[3] != 'explore') or (u[3] != self.username):
                         all_user_hrefs.append(u)
             [
                 user_hrefs.append(href) for href in all_user_hrefs
                 if href not in user_hrefs
             ]
             # hrefs_scraped = user_hrefs.__len__()
         except Exception:
             # self.errorFile.write('Exception thrown when getting hrefs for people that follow you')
             continue
         counter += 1
     href = []
     counter = 0
     for h in all_hrefs:
         if counter < 100:
             if not href.__contains__(h):
                 href.append(h)
                 counter += 1
         else:
             break
     return href
Beispiel #19
0
    def log_in_username(self):
        """ Returns the current text in the username field

        Returns:
            str
        """
        username_elem = waiter.find_element(self.driver, 'username', by=NAME)
        return username_elem.get_attribute('value')
Beispiel #20
0
    def log_in_password(self):
        """ Returns the current text in the password field

        Returns:
            str
        """
        password_elem = waiter.find_element(self.driver, 'password', by=NAME)
        return password_elem.get_attribute('value')
Beispiel #21
0
    def log_in_link_click(self, wait_until_displayed=True):
        """ Click the `log in` link on the log in page
        Args:
            wait_until_displayed (bool): Block until all the log in fields are
                                         displayed

        Returns:
            None

        Raises:
            TimeoutException: Raised if the link isn't found, and if the fields
                              aren't displayed
        """
        waiter.find_element(self.driver, LOG_IN_LINK_XPATH, XPATH).click()

        if wait_until_displayed:
            Wait(self, 30).until(lambda obj: obj.log_in_fields_displayed)
 def Login(self):
     # Check that if we already logged in there is no need to do it again
     if self.loggedin is True:
         print("Already Logged in")
         return 1
     try:
         # Open driver and load cookies
         self.driver.get("https://www.instagram.com/accounts/login")
         cookies = pickle.load(open(self.cookiepath, "rb"))
         for cookie in cookies:
             if "expiry" in cookie:
                 del cookie["expiry"]
             self.driver.add_cookie(cookie)
         time.sleep(round(random.uniform(5, 7), 2))
         # Check if we are not already logged in on last session
         cururl = self.driver.current_url
         if cururl == "https://www.instagram.com/":
             print("Already Logged In no need To Login")
             return -1
         # Everything is fine we are logged in Let's fill the form
         waiter.find_write(self.driver, "//input", self.username, by=XPATH)
         time.sleep(round(random.uniform(2, 3), 2))
         waiter.find_write(self.driver,
                           "//div[3]/div/label/input",
                           self.password,
                           by=XPATH)
         time.sleep(round(random.uniform(2, 3), 2))
         waiter.find_element(self.driver, "//button/div", by=XPATH).click()
         time.sleep(round(random.uniform(4, 6), 2))
         # Check if login was successful
         cururl = self.driver.current_url
         if cururl == "https://www.instagram.com/":
             print("Login success")
             pickle.dump(self.driver.get_cookies(),
                         open(self.cookiepath, "wb"))
             self.loggedin = True
             return 0
         # We didn't get to the main page so there was a problem
         else:
             print("Error: Login Was not successful")
             return -1
     except Exception as err:
         print("Error: in Login")
         traceback.print_exc()
         print(str(err))
         return -1
Beispiel #23
0
def scrape_followers(driver, account):
    # Load account page
    driver.get("https://www.instagram.com/{0}/".format(account))

    sleep(2)
    driver.find_element_by_partial_link_text("follower").click()
    waiter.find_element(driver, "//div[@role='dialog']", by=XPATH)
    allfoll = int(driver.find_element_by_xpath("//li[2]/a/span").text)
    follower_css = "ul div li:nth-child({}) a.notranslate"
    for group in itertools.count(start=1, step=12):
        for follower_index in range(group, group + 12):
            if follower_index > allfoll:
                raise StopIteration
            yield waiter.find_element(driver,
                                      follower_css.format(follower_index)).text
        last_follower = waiter.find_element(driver,
                                            follower_css.format(group + 11))
        driver.execute_script("arguments[0].scrollIntoView();", last_follower)
Beispiel #24
0
    def scrape_following(self, username):
        browser = self.browser
        try:
            browser.get("https://www.instagram.com/" + username + "/")
            browser.find_element_by_partial_link_text('following').click()

            waiter.find_element(browser, "//div[@role='dialog']", by=XPATH)

            following_css = "ul div li:nth-child({}) a.notranslate"
            following_index = 0
            for group in itertools.count(start=1, step=12):
                for following_index in range(group, group + 12):
                    yield waiter.find_element(
                        browser, following_css.format(following_index)).text
                last_following = waiter.find_element(
                    browser, following_css.format(following_index))
                browser.execute_script("arguments[0].scrollIntoView();",
                                       last_following)

        except Exception as e:
            print("Error Code: ", e)
def scrape_followers(driver, account):
    # Load account page
    driver.get("https://www.instagram.com/{0}/".format(account))

    # Click the 'Follower(s)' link
    # driver.find_element_by_partial_link_text("follower").click()
    xpathMod = "//a[@href='/"+account+"/followers/']"
    waiter.find_element(driver, xpathMod, by=XPATH).click()

    # Wait for the followers modal to load
    waiter.find_element(driver, "//div[@role='dialog']", by=XPATH)

    # At this point a Followers modal pops open. If you immediately scroll to the bottom,
    # you hit a stopping point and a "See All Suggestions" link. If you fiddle with the
    # model by scrolling up and down, you can force it to load additional followers for
    # that person.

    # Now the modal will begin loading followers every time you scroll to the bottom.
    # Keep scrolling in a loop until you've hit the desired number of followers.
    # In this instance, I'm using a generator to return followers one-by-one
    follower_css = "ul div li:nth-child({}) a.notranslate"  # Taking advange of CSS's nth-child functionality
    for group in itertools.count(start=1, step=12):
        for follower_index in range(group, group + 12):
            yield waiter.find_element(driver, follower_css.format(follower_index)).text

        # Instagram loads followers 12 at a time. Find the last follower element
        # and scroll it into view, forcing instagram to load another 12
        # Even though we just found this elem in the previous for loop, there can
        # potentially be large amount of time between that call and this one,
        # and the element might have gone stale. Lets just re-acquire it to avoid
        # that
        last_follower = waiter.find_element(driver, follower_css.format(follower_index))
        driver.execute_script("arguments[0].scrollIntoView();", last_follower)
Beispiel #26
0
    def __getuserusedhashtag(self, targetaccount):
        try:
            hashtags = []

            LikedPostsLinks = commenters_util.get_photo_urls_from_profile(
                self.driver,
                targetaccount,
                50,
                False,
            )
            for i in range(0, 5):
                LikedPostsLinks2 = commenters_util.get_photo_urls_from_profile(
                    self.driver,
                    targetaccount,
                    50,
                    False,
                )
                Linksgot = len(LikedPostsLinks2)
                if Linksgot > len(LikedPostsLinks):
                    LikedPostsLinks = LikedPostsLinks2

            for x in LikedPostsLinks:
                for i in range(0, 2):
                    try:
                        time.sleep(2)
                        self.driver.get(x)
                        time.sleep(3)
                        ss = waiter.find_element(
                            self.driver,
                            "/html/body/div[1]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span",
                            by=XPATH,
                        )
                        ss = self.driver.find_element_by_xpath(
                            "/html/body/div[1]/section/main/div/div/article/div[2]/div[1]/ul/div/li/div/div/div[2]/span"
                        )
                        posttext = ss.text
                        pp = posttext.split()
                        for word in pp:
                            if (
                                word[0] == "#"
                                and (word not in hashtags)
                                and ("#" not in word[1:])
                            ):
                                hashtags.append(word)

                        break
                    except:
                        print("Error Try Again")
            return hashtags
        except:
            print("some error in hashtag")
            return -1
Beispiel #27
0
def login(driver, username, password):
    '''
    Logs into instagram.
    '''
    # load page
    driver.get("https://www.instagram.com/accounts/login/")

    # login
    waiter.find_write(driver,
                      "//div/label/input[@name='username']",
                      username,
                      by=XPATH)
    waiter.find_write(driver,
                      "//div/label/input[@name='password']",
                      password,
                      by=XPATH)
    waiter.find_element(driver, "//div/button[@type='submit']",
                        by=XPATH).click()

    # wait for the page to load. increase from 5 if internet is slow
    time.sleep(5)
    print("login complete.\n")
Beispiel #28
0
 def __getFollowx(self, onPerform):
     self.browser.get(f'https://www.instagram.com/{self.__username}/')
     if onPerform == 'following':
         Util.clearScreen()
         print(f'> Watching "{self.__username}" profile.\n')
     ctxSelector = self.browser.find_element_by_xpath(
         f'//a[@href=\'/{self.__username}/{onPerform}/\']')
     self.totalUserFollowx = int(
         ctxSelector.find_element_by_tag_name('span').text)
     ctxSelector.click()
     print(f'> Collecting users in "{onPerform.capitalize()}" list ...')
     usrXMLPath = 'ul div li:nth-child({}) a.notranslate'
     listIterator = tqdm(range(1, self.totalUserFollowx, 12),
                         ncols=65,
                         leave=False)
     for currUsrs in listIterator:
         for usrIndex in range(currUsrs, currUsrs + 12):
             yield find_element(self.browser,
                                usrXMLPath.format(usrIndex)).text
         lastUsrListed = find_element(self.browser,
                                      usrXMLPath.format(usrIndex))
         self.browser.execute_script("arguments[0].scrollIntoView()",
                                     lastUsrListed)
Beispiel #29
0
 def gethashtagAccountquality(self):
     try:
         Failed = True
         count = 0
         for i in range(0, 5):
             try:
                 Sqlquery = "SELECT Hashtag FROM HashtagAccount WHERE numpost IS NULL order by Rand() Limit 1"
                 self.mycursor.execute(Sqlquery)
                 for db in self.mycursor:
                     count += 1
                 if count == 0:
                     return 2
                 Failed = False
                 break
             except:
                 time.sleep(1)
                 self.__init_db()
                 time.sleep(1)
         if Failed == False:
             hashtag = db[0]
             url = "https://www.instagram.com/explore/tags/" + hashtag[1:]
             self.driver.get(url)
             time.sleep(5)
             ss = waiter.find_element(
                 self.driver,
                 "/html/body/div[1]/section/main/header/div[2]/div[1]/div[2]/span/span",
                 by=XPATH,
             )
             num = ss.text
             num = num.replace(",", "", 20)
             num = int(num)
             for i in range(0, 5):
                 try:
                     Sqldata = [num, hashtag]
                     Sqlquery = (
                         "Update HashtagAccount SET numpost = %s WHERE Hashtag = %s"
                     )
                     self.mycursor.execute(Sqlquery, Sqldata)
                     self.mydb.commit()
                     return "Success"
                 except:
                     time.sleep(1)
                     self.__init_db()
                     time.sleep(1)
         else:
             print("Error in Hashtag Scraping")
             return -1
     except:
         print("Error in Hashtag Scraping")
         return -1
Beispiel #30
0
    def follow_users_from_user(self, user):
        driver = self.driver
        driver.get("https://www.instagram.com/" + user + "/?hl=pt-br")
        time.sleep(5)
        waiter.find_element(driver,
                            "//a[@href='/" + user + "/followers/']",
                            by=XPATH).click()
        time.sleep(5)
        #follow = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'a.BY3EC > button')))
        #follow.click()
        #segui = driver.find_elements_by_class_name('BY3EC')
        #if segui:
        #    segui[0].click()
        for y in range(12):
            #find all li elements in list
            fBody = driver.find_element_by_xpath("//div[@class='isgrP']")
            scroll = 0
            while scroll < 2:  # scroll 5 times
                driver.execute_script(
                    'arguments[0].scrollTop = arguments[0].scrollTop + arguments[0].offsetHeight;',
                    fBody)
                time.sleep(1)
                scroll += 1

            fList = driver.find_elements_by_xpath("//div[@class='isgrP']//li")
            print("Dando scroll em {} usuários.".format(len(fList)))

            print("Fim do scroll.")
            for x in range(5):
                try:
                    follow_button = driver.find_element_by_css_selector(
                        'button.sqdOP.L3NKy.y3zKF')
                    follow_button.click()
                    print("[ Usuário seguido! ]")
                    time.sleep(2)
                except:
                    continue