Ejemplo n.º 1
0
 def like_media(self, media_id, media_code, username):
     """
     Function is used to like user post using instagram public api
     (AJAX requests)
     :param media_id: id of the post
     :param media_code: code of the post
     :param username: username (of the followings)
     :return:
     """
     url_like = 'https://www.instagram.com/web/likes/{}/like/'.format(
         media_id)
     try:
         post_like = self.session.post(url_like)
         if post_like.status_code == 200:
             insta_logger.info('User {} post code #{} was liked'.format(
                 username, media_code))
             return 1
         else:
             insta_logger.error('Failed to like user {} post code #{},'
                                'status code: {}'.format(
                                    username, media_code,
                                    post_like.status_code))
     except Exception as e:
         insta_logger.error(
             'Exception raised while liking user {} post code #{}'
             'Exception: {}'.format(username, media_code, e))
Ejemplo n.º 2
0
    def check_like(self, media_code, username):
        """
        Function check if post have been already liked using instagram
        public api (AJAX requests)
        :param media_code: code of the post
        :param username: username (of the followings)
        :return: 1 if post has benn already liked
        """
        media_url = 'https://www.instagram.com/p/{}/?__a=1'.format(media_code)
        try:
            media_data = self.session.get(media_url).json()
            try:
                liked = media_data['graphql']['shortcode_media'][
                    'viewer_has_liked']
                if liked:
                    return 1
            except KeyError:
                insta_logger.error(
                    'Key error while checking if media is liked by user {},'
                    ' media code{}'.format(username, media_code))

        except Exception as e:
            insta_logger.error('Exception raised while checking'
                               'if media is liked by user {}, media code: {}'
                               'Exception: {}'.format(username, media_code, e))
Ejemplo n.º 3
0
def load_ignore_list():
    """
    Get names of users from file to be excluded from liking list
    :return: list of users to be ignored, empty list if file structure is
    modified or not found
    """
    dir_name = os.path.dirname(os.path.abspath(__file__))
    if os.path.exists(os.path.join(dir_name, 'ignore_list.txt')):
        with open('ignore_list.txt', 'r') as ignore_list_file:
            full_text = ignore_list_file.read()
            if full_text.find('Ignore list:') != -1:
                start_index = full_text.index('Ignore list:') + len(
                    'Ignore list:')
                list_raw = full_text[start_index:].split(',')
                insta_logger.info('Ignore list extracted')
                print('Ignore list extracted')
                return [account.strip() for account in list_raw]
            print('"Ignore list.txt" was edited incorrectly. '
                  'Can\'t create ignore list.'
                  ' Please see description.')
            insta_logger.error('Ignore list file incorrectly edited')
            return []
    print('No ignore list found')
    insta_logger.error('No ignore list found')
    return []
Ejemplo n.º 4
0
    def login(self, timeout=10, attempts=3):
        """
        Login to account using Selenium and PhantomJS headless browser
        :param timeout: Number of seconds before timing out
        :param attempts: number of attempts to login, default is 3,
        shutting down if failed to login
        :return:
        """
        for attempt in range(attempts):
            print('-Login with Selenium, attempt {} of {}'.format(
                attempt + 1, attempts))
            try:
                self.driver.get(self.login_page)
                WebDriverWait(
                    self.driver, timeout=timeout).until(
                    EC.presence_of_element_located((
                        By.XPATH,
                        '/html/body/span/section/main/div/article/div/'
                        'div[1]/div/form/div[1]/input')))
                login = self.driver.find_element(
                    By.XPATH,
                    '/html/body/span/section/main/div/article/'
                    'div/div[1]/div/form/div[1]/input')
                login.send_keys(self.user_login)
                password = self.driver.find_element(
                    By.XPATH,
                    '/html/body/span/section/main/div/article/div/'
                    'div[1]/div/form/div[2]/input')
                password.send_keys(self.user_password)
                btn = self.driver.find_element(
                    By.XPATH,
                    '/html/body/span/section/main/div/article'
                    '/div/div[1]/div/form/span/button')
                btn.click()
                time.sleep(5)
                if self.driver.current_url == self.main_url:
                    print('--Login successful')
                    insta_logger.info('Selenium: login successful')
                    return 1
                print('--Unable to login with given credentials, attempt {}'
                      ''.format(attempt + 1))
                insta_logger.error('Selenium: login fail, wrong credentials,'
                                   ' attempt #{}'.format(attempt + 1))

            except Exception as e:
                insta_logger.error(
                    'Selenium: login error, Exception: {}, attempt {}'.format(
                        e, attempt + 1))
                print('--Failed. Exception raised.')
        insta_logger.critical('Selenium: unable to login.'
                              'Shutting down')
        insta_logger.info('--------------STOP---------------')
        print('Unable to login. Refer to log file. '
              'Shutting down')
        sys.exit()
Ejemplo n.º 5
0
 def logout(self):
     """
     Logout from account with 'requests'
     :return:
     """
     try:
         logout_page = self.session.get(self.logout_url)
         time.sleep(3)
         if logout_page.status_code == 200:
             insta_logger.info('Successful logout')
         else:
             insta_logger.error('Failed logout, status code: {}'.format(
                 logout_page.status_code))
     except Exception as e:
         insta_logger.error('Failed logout, exception: {}'.format(e))
Ejemplo n.º 6
0
 def login(self, attempts=3):
     """
     Login to instagram account with 'requests'
     :param attempts: number of attempts to login, default is 3,
     shutting down if failed to login
     :return: 1 if login successful
     """
     print('-Trying to login to account to use public api')
     for attempt in range(attempts):
         try:
             print('-Login attempt {} of {}'.format(attempt + 1, attempts))
             self.session.get(self.login_url)
             csrf_token = self.session.cookies['csrftoken']
             self.session.headers.update({'X-CSRFToken': csrf_token})
             time.sleep(3)
             login_to_acc = self.session.post(self.login_url,
                                              data={
                                                  'username':
                                                  self.user_login,
                                                  'password':
                                                  self.user_password
                                              },
                                              allow_redirects=True)
             time.sleep(3)
             if login_to_acc.status_code == 200:
                 main_page = self.session.get(self.main_url)
                 if main_page.text.find(self.user_login) != -1:
                     print('--Successful login')
                     insta_logger.info('Requests: Successful login')
                     self.session.headers.update(
                         {'X-CSRFToken': login_to_acc.cookies['csrftoken']})
                     self.logged_in = True
                     return 1
                 print('--Login failed, wrong credentials')
                 insta_logger.error('Requests: Login failed (status code: '
                                    '{}), credentials: login: {} password:'******' {}, attempt: {}'.format(
                                        login_to_acc.status_code,
                                        self.user_login, self.user_password,
                                        attempt))
             else:
                 print('--Login failed, status code: {}'.format(
                     login_to_acc.status_code))
                 insta_logger.error('Requests: login failed (status code: '
                                    '{}), credentials: login: {} password: '******'{}, attempt: {}'.format(
                                        login_to_acc.status_code,
                                        self.user_login, self.user_password,
                                        attempt))
         except Exception as e:
             insta_logger.error(
                 'Requests: login error, Exception: {}, attempt {}'.format(
                     e, attempt + 1))
             print('--Failed. Exception raised.')
     insta_logger.critical('Requests: unable to login.' 'Shutting down')
     insta_logger.info('--------------STOP---------------')
     print('Unable to login. Refer to log file. ' 'Shutting down')
     sys.exit()
Ejemplo n.º 7
0
 def get_recent_media_feed(self, url):
     """
     Function gather data about user recent posts: posts ids and posts codes
     To get json response with user media data query param '?__a=1' must be
     added to end of url. Response return max value of 12 posts, to get more
     posts query param &max_id='page_num' must be added.
     :param url: instagram user url
     :return: dictionary with user as key and set of tuples with posts media
     code and media id as value
     """
     main_url = urljoin(url, '?__a=1')
     step = 0
     end_cursor = None
     username = urlparse(url)[2].strip('/')
     media_data = {username: set()}
     try:
         while step != self.pagination + 1:
             if end_cursor:
                 query = '&max_id={}'.format(end_cursor)
                 feed_url = '{}{}'.format(main_url, query)
             else:
                 feed_url = main_url
             feed_page_raw = self.session.get(feed_url)
             json_data = feed_page_raw.json()
             if username in self.ignore_list:
                 insta_logger.info(
                     'User {} is in ignore list'.format(username))
                 return
             try:
                 feed_data = json_data['user']['media']['nodes']
                 if feed_data:
                     for media in feed_data:
                         media_data[username].add(
                             (media['code'], media['id']))
                         if len(media_data[username]
                                ) >= self.posts_to_check:
                             insta_logger.info(
                                 'User {} media data extracted,'
                                 ' total media {}'.format(
                                     username, len(media_data[username])))
                             return media_data
                     step += 1
                     if json_data['user']['media']['page_info'][
                             'has_next_page']:
                         end_cursor = json_data['user']['media'][
                             'page_info']['end_cursor']
                         time.sleep(1 * random.random())
                     else:
                         break
                 else:
                     insta_logger.info(
                         'User {} has no media data, ignored'.format(
                             username))
                     return
             except KeyError:
                 insta_logger.error(
                     'Key error while getting media feed, user {}'.format(
                         username))
                 return
         insta_logger.info(
             'User {} media data extracted, total media {}'.format(
                 username, len(media_data[username])))
         return media_data
     except Exception as e:
         insta_logger.error('Exception raised while getting feed data,'
                            'Exeption: {}'.format(e))
Ejemplo n.º 8
0
 def crawl_folowing_links(self, timeout=15, attempts=3):
     """
     Crawl links from web page using Selenium.
     Paginate hidden links with scroll down script.
     :param timeout: Number of seconds before timing out
     :param attempts: number of attempts to get followings links,
      default is 3, shutting down if failed to get links
     :return: 1 if links extracted from web page successfully
     """
     for attempt in range(attempts):
         print('-Trying to get followings, attempt {} of {}'.format(
             attempt + 1, attempts))
         try:
             self.driver.get(self.user_page)
             time.sleep(3)
             WebDriverWait(
                 self.driver, timeout=timeout).until(
                 EC.presence_of_element_located((
                     By.CLASS_NAME,
                     '_s53mj')))
             total_following_web_elem = self.driver.find_element(
                 By.CSS_SELECTOR,
                 'a[href*="following"] > span')
             total_following = int(total_following_web_elem.text)
             folowing_button = self.driver.find_element(By.CSS_SELECTOR,
                                                        'a[href*='
                                                        '"following"]')
             print('--Total following to extract: {}'.format(
                 total_following))
             folowing_button.click()
             time.sleep(3)
             current_total = len(
                 self.driver.find_elements(By.CLASS_NAME, '_cx1ua'))
             while current_total != total_following:
                 self.driver.execute_script('window.scrollTo(0, document'
                                            '.body.scrollHeight);')
                 time.sleep(2)
                 current_total = len(
                     self.driver.find_elements(By.CLASS_NAME, '_cx1ua'))
                 sys.stdout.write('\r--Total followings'
                                  ' extracted: {}'.format(current_total))
                 sys.stdout.flush()
             sys.stdout.write('\n')
             links_web_elem = self.driver.find_elements(By.CLASS_NAME,
                                                        '_cx1ua')
             links = [link.find_element_by_tag_name(
                 'a').get_attribute('href') for link in links_web_elem]
             if len(links) == total_following:
                 insta_logger.info(
                     'Selenium: Followings links extracted successfully')
                 print('--Followings extracted successfully')
                 self.followings_list = links
                 return 1
             insta_logger.info(
                 'Selenium: failed to extract '
                 'Followings links, attempt #{}'.format(attempt + 1))
             print('--Failed')
         except Exception as e:
             insta_logger.error('Selenium: crawl_following exception '
                                'raised. Exception: {}, attempt #{}'.format(
                 e, attempt + 1))
             print('--Failed')
     insta_logger.critical(
         'Selenium: unable to get followings. Shutting down')
     insta_logger.info(
         '--------------STOP---------------')
     print(
         '\nUnable to get followings. Refer to log file. Shutting'
         ' down')
     sys.exit()