Exemple #1
0
    def cache(key, value = False, expiry = False):
        try:
            #Open data or write
            if value == False:
                #Get db cache
                cache = DB.selectOne("SELECT * FROM cache WHERE key = ?;", (key,))

                #Check if expired
                if(cache['expiry']):
                    datetime.strptime(cache['expiry'], "%Y-%m-%d %H:%M:%S.%f") < datetime.now()
                else:
                    return None

                return cache['value']
            else:
                DB.execute("INSERT OR REPLACE INTO cache(key, value, expiry) VALUES(?, ?, ?);", (
                        key, value, str(datetime.now() + timedelta(minutes=expiry)),
                    )
                )
        except Exception as e:
            print(e)
            return None
Exemple #2
0
    def run(self):
        followedCount = 0
        for accountName in self.template['screen_names']:
            #Check if followers have been cached
            followersCache = Utils.cache(accountName + '_scraped')
            if (followersCache != '1'):
                #Retrieve followers
                followers = {}
                self.browser.get('https://twitter.com/' + accountName +
                                 '/followers')

                #Grab all followers
                while True:
                    #Wait for page load and get links
                    WebDriverWait(self.browser, 5).until(
                        EC.presence_of_element_located((
                            By.XPATH,
                            "/html/body/div[1]/div[1]/div[1]/div[2]/main/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/section/div[1]/div[1]"
                        )))
                    followerLinks = self.browser.find_elements_by_xpath(
                        '/html/body/div[1]/div[1]/div[1]/div[2]/main/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]/section/div[1]/div[1]/div'
                    )

                    for followerLink in followerLinks:
                        try:
                            #Init db connection
                            DB.execute(
                                '''INSERT OR IGNORE INTO 
                                followers(user_name, user_link, followed, followed_at, parent_account) VALUES(?, ?, ?, ?, ?);''',
                                (
                                    followerLink.find_element_by_css_selector(
                                        'a').get_attribute('href').replace(
                                            'https://twitter.com/', ''),
                                    followerLink.find_element_by_css_selector(
                                        'a').get_attribute('href'),
                                    True if ('Following' in followerLink.text
                                             or 'Pending' in followerLink.text)
                                    else False,
                                    '',
                                    accountName,
                                ))
                        except:
                            print('Ignoring...')  #Some load without data

                    #Handle infinite scroll (https://dev.to/mr_h/python-selenium-infinite-scrolling-3o12)
                    last_height = self.browser.execute_script(
                        "return document.body.scrollHeight"
                    )  # Get scroll height
                    self.browser.execute_script(
                        "window.scrollTo(0, document.body.scrollHeight);"
                    )  #Scroll to bottom
                    time.sleep(5)  # Wait to load page

                    # Calculate new scroll height and compare with last scroll height
                    new_height = self.browser.execute_script(
                        "return document.body.scrollHeight")
                    if new_height == last_height:
                        # If heights are the same it will exit the function
                        break
                    last_height = new_height

                Utils.cache(accountName + '_scraped', 1, 43200)

            #Get updated list from db
            followers = DB.selectAll(
                "SELECT * FROM followers WHERE parent_account = ? and followed = 0 limit ?;",
                (accountName, self.template['amount']))

            #Loop through followers and follow
            for follower in followers:

                #Load user page
                print("Following " + follower['user_name'])
                self.browser.get(follower['user_link'])
                WebDriverWait(self.browser, 10).until(
                    EC.element_to_be_clickable(
                        (By.XPATH,
                         '//span[text()="@' + follower['user_name'] + '"]')))

                #Follow user
                try:
                    followBtn = self.browser.find_element_by_xpath(
                        '//span[text()="Follow"]')
                    followBtn.click()
                except:
                    print('User already followed')

                #Update user as followed
                DB.execute(
                    '''UPDATE followers SET 
                    followed = ?,
                    followed_at = ? 
                    WHERE id = ?;''', (1, str(datetime.now()), follower['id']))

                followedCount += 1

                print("Taking a short rest...")
                time.sleep(
                    randrange(self.template['sleep_delay'] * 0.6,
                              self.template['sleep_delay']))

        #Final output
        print("Followed " + str(followedCount) + " people")
    def run(self):
        unfollowedCount = 0

        #Get recently followed
        recentlyFollowed = DB.selectAll(
            "select * from `followers` where date(`followed_at`) < date('now', '-5 days') and `followed_back` is NULL;",
            ())

        #Check if followers have followed back
        for followedUser in recentlyFollowed:
            self.browser.get(followedUser['user_link'])

            #Get following back status
            try:
                WebDriverWait(self.browser, 5).until(
                    EC.element_to_be_clickable(
                        (By.XPATH,
                         "//span[text()='Following' or text()='Pending']")))
                followsBack = self.browser.find_elements_by_xpath(
                    '//span[text()="Follows you"]')
                if (len(followsBack) > 0):
                    DB.execute(
                        '''UPDATE followers SET 
                        followed_back = ?
                        WHERE id = ?;''', (1, followedUser['id']))
                else:
                    #Click unfollow
                    self.browser.find_element_by_xpath(
                        '//span[text()="Following" or text()="Pending"]'
                    ).click()

                    #Click confirm
                    WebDriverWait(self.browser, 7).until(
                        EC.element_to_be_clickable(
                            (By.XPATH,
                             "//span[text()='Discard' or text()='Unfollow']")))
                    self.browser.find_element_by_xpath(
                        '//span[text()="Discard" or text()="Unfollow"]').click(
                        )

                    #Mark as not followed back
                    DB.execute(
                        '''UPDATE followers SET 
                        followed_back = ?
                        WHERE id = ?;''', (0, followedUser['id']))
                    unfollowedCount = unfollowedCount + 1
                    print('Stopped following ' + followedUser['user_name'])
            except Exception as e:
                print(e)
                #Mark as unfollowed
                # DB.execute('''UPDATE followers SET
                #     followed_back = 0
                #     WHERE id = ?;''', (followedUser['id'])
                # )
                print('Error occured checking follow status of ' +
                      followedUser['user_name'])

            print("Taking a short rest...")
            time.sleep(
                randrange(self.template['sleep_delay'] * 0.6,
                          self.template['sleep_delay']))

        #Final output
        print("Unfollowed " + str(unfollowedCount) + " people")