Python InstaLogger.loggerの例、util.instalogger.InstaLogger.logger Pythonの例

コード例 #1

0

ファイルを表示

def web_adress_navigator(browser, link):
    """Checks and compares current URL of web page and the URL to be navigated and if it is different, it does navigate"""

    try:
        current_url = browser.current_url
        print(current_url)
    except WebDriverException:
        try:
            current_url = browser.execute_script("return window.location.href")
        except WebDriverException:
            current_url = None

    if current_url is None or current_url != link:
        response = browser.get(link)

        if check_page_title_notfound(browser):
            InstaLogger.logger().error("Failed to get page " + link)
            raise PageNotFound404("Failed to get page " + link)
        #if response.status_code == 404:
        #    InstaLogger.logger().error("Failed to get page " + link)
        #   raise PageNotFound404()
        # update server calls

        WebDriverWait(browser, 10).until(
            EC.presence_of_element_located((By.ID, "viewport")))

コード例 #2

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: wonkru-bot/inzta-crawler

    def extract_users_from_comments(self, comments):
        # adding who commented into user_commented_list
        user_commented_list = []
        user_comments = []

        for comm in comments:
            try:
                user_commented = comm.find_element_by_tag_name('a').get_attribute("href").split('/')
                user_commented_list.append(user_commented[3])
            except:
                InstaLogger.logger().error("ERROR something went wrong getting user_commented")
            # first comment has to be loaded every time to get the caption and tag from post
            if (Settings.output_comments is True or len(user_comments) < 1):
                user_comment = {}
                try:
                    user_comment = {
                        'user': user_commented[3],
                        'comment': comm.find_element_by_css_selector('h2 + span, h3 + span').text
                    }
                    InstaLogger.logger().info(user_comment)
                    InstaLogger.logger().info(
                        user_commented[3] + " -- " + comm.find_element_by_css_selector('h2 + span, h3 + span').text)
                    user_comments.append(user_comment)
                except:
                    InstaLogger.logger().error("ERROR something went wrong getting comment")

        InstaLogger.logger().info(str(len(user_commented_list)) + " comments.")
        return user_commented_list, user_comments

コード例 #3

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: wonkru-bot/inzta-crawler

    def extract_comments(self):
        comments_found_last_run = 0
        comments_run_same_length = 0
        comments = []

        try:
            if self.post.find_elements_by_tag_name('ul'):
                comment_list = self.post.find_element_by_tag_name('ul')
                comments = comment_list.find_elements_by_tag_name('li')

                """
                if len(comments) > 1:
                    # load hidden comments
                    comments = load_more_comments(comments)
                    InstaLogger.logger().info(f"found comments: {len(comments)}")

                else:
                    InstaLogger.logger().info("found comment: 1")
                """
        except BaseException as e:
            InstaLogger.logger().error(e)
        except:
            InstaLogger.logger().error("Error - getting comments")

        return comments, int(len(comments) - 1)

コード例 #4

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: noke8868/instagram-profilecrawl

def extract_post_likers(browser, post, postlink, likes):
    user_liked_list = []
    if (Settings.scrape_posts_likers is False):
        return user_liked_list
    else:
        InstaLogger.logger().info("GETTING LIKERS FROM POST")

    postlink = postlink + "liked_by/"
    try:

        post.find_element_by_xpath("//a[@class='zV_Nj']").click()

        likers_list = post.find_elements_by_xpath(
            "//li[@class='wo9IH']//a[contains(@class, 'FPmhX')]")

        tried_catch_likers = 0
        while len(likers_list) < likes:
            likers_list_before = len(likers_list)
            InstaLogger.logger().info("found likers: " +
                                      str(len(likers_list)) + " should be " +
                                      str(likes) + " -- scroll for more")
            try:
                div_likebox_elem = browser.find_element_by_xpath(
                    "//div[contains(@class, 'wwxN2')]")
                browser.execute_script(
                    "arguments[0].scrollTop = arguments[0].scrollHeight",
                    div_likebox_elem)

            except BaseException as e:
                tried_catch_likers = tried_catch_likers + 1
                print("error on scrolling - next try (tried: " +
                      str(tried_catch_likers) + ") Message:" + e)

            sleep(Settings.sleep_time_between_post_scroll)
            likers_list = post.find_elements_by_xpath(
                "//li[@class='wo9IH']//a[contains(@class, 'FPmhX')]")
            if (likers_list_before == len(likers_list)):
                tried_catch_likers = tried_catch_likers + 1
                print("error on scrolling - next try (tried: " +
                      str(tried_catch_likers) + ")")
                sleep(Settings.sleep_time_between_post_scroll * 1.5)

            if tried_catch_likers > 10:
                print("exit scrolling likers")
                break

        likers_list = post.find_elements_by_xpath(
            "//li[@class='wo9IH']//a[contains(@class, 'FPmhX')]")

        for liker in likers_list:
            user_like = liker.get_attribute("href").split('/')
            user_liked_list.append(user_like[3])

        InstaLogger.logger().info('likers: ' + str(len(user_liked_list)))

    except BaseException as e:
        InstaLogger.logger().error("Error - getting post likers")
        InstaLogger.logger().error(e)
    return user_liked_list

コード例 #5

0

ファイルを表示

 def extract_caption(self, user_comments, username):
     caption = ''
     if len(user_comments) > 0:
         user_commented = user_comments[0]
         if username == user_commented['user']:
             caption = user_commented['comment']
             InstaLogger.logger().info(f"caption: {caption}")
     return caption

コード例 #6

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: wonkru-bot/inzta-crawler

    def extract_username(self):
        username = ''

        try:
            username = self.post.find_element_by_class_name('e1e1d').find_element_by_tag_name('a').text
        except:
            InstaLogger.logger().error("ERROR - getting Post infos (username) ")

        return username

コード例 #7

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: wonkru-bot/inzta-crawler

    def extract_date(self):
        date = ''

        try:
            date = self.post.find_element_by_xpath('//a/time').get_attribute("datetime")
            InstaLogger.logger().info("Post date: " + str(date))
        except:
            InstaLogger.logger().error("ERROR - getting Post Date ")

        return date

コード例 #8

0

ファイルを表示

def extract_information(browser, username, limit_amount):
    """Get all the information for the given username"""

    InstaLogger.logger().info('Extracting information from ' + username)
    isprivate = False

    try:
        user_link = "https://www.instagram.com/{}/".format(username)
        web_adress_navigator(browser, user_link)
    except PageNotFound404 as e:
        raise NoInstaProfilePageFound(e)

    num_of_posts_to_do = 999999

    ig_user = InstagramUser(browser, username)
    ig_user.get_user_info()

    if limit_amount < 1:
        limit_amount = 999999

    num_of_posts_to_do = min(limit_amount, ig_user.num_of_posts['count'])

    prev_divs = browser.find_elements_by_class_name('_70iju')

    post_infos = []
    user_commented_total_list = []
    if Settings.scrape_posts_infos is True and ig_user.isprivate is False:
        post_infos, user_commented_total_list = quick_post_extract(
            browser, num_of_posts_to_do)

    ig_user.posts = post_infos
    ig_user.scraped = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    InstaLogger.logger().info("User " + username + " has " +
                              str(len(user_commented_total_list)) +
                              " comments.")

    # sorts the list by frequencies, so users who comment the most are at the top
    import collections
    from operator import itemgetter, attrgetter
    counter = collections.Counter(user_commented_total_list)
    com = sorted(counter.most_common(), key=itemgetter(1, 0), reverse=True)
    com = map(lambda x: [x[0]] * x[1], com)
    user_commented_total_list = [item for sublist in com for item in sublist]

    # remove duplicates preserving order (that's why not using set())
    user_commented_list = []
    last = ''
    for i in range(len(user_commented_total_list)):
        if username.lower() != user_commented_total_list[i]:
            if last != user_commented_total_list[i]:
                user_commented_list.append(user_commented_total_list[i])
            last = user_commented_total_list[i]

    return ig_user, user_commented_list

コード例 #9

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: wonkru-bot/inzta-crawler

    def extract_post_mentions(self):
        mentions = []
        if (Settings.mentions is False):
            return mentions

        if self.post.find_elements_by_class_name('JYWcJ'):  # perhaps JYWcJ
            mention_list = self.post.find_elements_by_class_name('JYWcJ')  # perhaps JYWcJ
            for mention in mention_list:
                user_mention = mention.get_attribute("href").split('/')
                mentions.append(user_mention[3])
            InstaLogger.logger().info(f"mentions: {str(len(mentions))}")

        return mentions

コード例 #10

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: pttnx/instagram-profilecrawl

def extract_post_caption(user_comments, username):
    tags = []
    caption = ''
    try:
        if len(user_comments) > 0:
            user_commented = user_comments[0]
            if username == user_commented['user']:
                caption = user_commented['comment']
                InstaLogger.logger().info("caption:" + caption)
                tags = findall(r'#[A-Za-z0-9]*', caption)
    except:
        InstaLogger.logger().error("getting caption")
    return caption, tags

コード例 #11

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: pttnx/instagram-profilecrawl

def extract_post_mentions(browser, post):
    mentions = []
    if (Settings.mentions is True):
        try:
            if post.find_elements_by_class_name('xUdfV'):  # perhaps JYWcJ
                mention_list = post.find_elements_by_class_name('xUdfV')  # perhaps JYWcJ
                for mention in mention_list:
                    user_mention = mention.get_attribute("href").split('/')
                    mentions.append(user_mention[3])
                InstaLogger.logger().info(str(len(mentions)) + "mentions")
        except:
            InstaLogger.logger().error("getting mentions")
    return mentions

コード例 #12

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: rishabh-gupta-fk/instagram-profilecrawl

    def extract_comments(self):
        print("extracting comments")
        comments_found_last_run = 0
        comments_run_same_length = 0
        comments = []

        try:
            # if self.post.find_elements_by_tag_name('ul'):
            #     comment_list = self.post.find_element_by_tag_name('ul')
            #     comments = comment_list.find_elements_by_tag_name('li')
            #     print("hello")
            #     """
            #     if len(comments) > 1:
            #         # load hidden comments
            #         comments = load_more_comments(comments)
            #         InstaLogger.logger().info(f"found comments: {len(comments)}")

            #     else:
            #         InstaLogger.logger().info("found comment: 1")
            #     """

            # RISHABH
            while self.browser.find_element_by_class_name('dCJp8'):
                more_button = self.browser.find_element_by_class_name('dCJp8')
                InstaLogger.logger().info("clicking button for loading more comments")
                self.browser.execute_script("arguments[0].click();", more_button)
                sleep(Settings.sleep_time_between_comment_loading)
            

        except BaseException as e:
            InstaLogger.logger().error(e)
        except:
            InstaLogger.logger().error("Error - getting more comments")
        print("Loaded all comments.")
        try:    
            if self.browser.find_element_by_class_name('C4VMK'):
                cmnts = self.browser.find_elements_by_class_name('C4VMK')
                cmnt_text = []
                for cmnt in cmnts:
                    c = cmnt.find_elements_by_tag_name('span')[-1].get_attribute('innerHTML')
                    u = cmnt.find_element_by_class_name('_6lAjh').find_element_by_tag_name('a').get_attribute('innerHTML')
                    cmnt_text.append((u, c))

            #save comments in a file - RISHABH
            post_name = self.postlink.split("/")[-2]
            print("saving to file: " + self.username + "/" + post_name + ".txt")
            with open(self.username + "/" + post_name + ".txt", 'w+') as f:
                f.write("author\tpostlink\tcommenter\tcomment\n")
                for t in cmnt_text:
                    f.write(self.username + "\t")
                    f.write(post_name + "\t")
                    f.write('\t'.join(str(s) for s in t) + '\n')
                    
        except:
            InstaLogger.logger().error("Error - getting comments")

        

        return comments, int(len(comments) - 1)

コード例 #13

0

ファイルを表示

def init_chromedriver(chrome_options, capabilities):
    chromedriver_location = Settings.chromedriver_location
    try:
        browser = webdriver.Chrome(chromedriver_location,
                                                desired_capabilities=capabilities,
                                                chrome_options=chrome_options)
    except WebDriverException as exc:
        InstaLogger.logger().error('ensure chromedriver is installed at {}'.format(
            Settings.chromedriver_location))
        raise exc

    matches = re.match(r'^(\d+\.\d+)',
                       browser.capabilities['chrome']['chromedriverVersion'])
    if float(matches.groups()[0]) < Settings.chromedriver_min_version:
        InstaLogger.logger().error('chromedriver {} is not supported, expects {}+'.format(
            float(matches.groups()[0]), Settings.chromedriver_min_version))
        browser.close()
        raise Exception('wrong chromedriver version')

    return browser

コード例 #14

0

ファイルを表示

def extract_user_posts(browser, num_of_posts_to_do):
    links2, preview_imgs = get_num_posts(browser, num_of_posts_to_do)

    post_infos = []

    counter = 1
    # into user_commented_total_list I will add all username links who commented on any post of this user
    user_commented_total_list = []

    for postlink in links2:

        InstaLogger.logger().info(f"\n {counter} / {len(links2)}")
        counter = counter + 1

        try:
            instagram_post = InstagramPost(browser, postlink)
            instagram_post.extract_post_info()

            location = {
                'location_url': instagram_post.location_url,
                'location_name': instagram_post.location_name,
                'location_id': instagram_post.location_id,
                'latitude': instagram_post.lat,
                'longitude': instagram_post.lng,
            }

            post_infos.append({
                'caption': instagram_post.caption,
                'location': location,
                'imgs': instagram_post.imgs,
                'imgdesc': instagram_post.imgdesc,
                'preview_img': preview_imgs.get(instagram_post.postlink, None),
                'date': instagram_post.date,
                'tags': instagram_post.tags,
                'likes': {
                    'count': instagram_post.likes,
                    'list': instagram_post.user_liked_list
                },
                'views': instagram_post.views,
                'url': instagram_post.postlink,
                'comments': {
                    'count': instagram_post.commentscount,
                    'list': instagram_post.user_comments
                },
                'mentions': instagram_post.mentions
            })
            user_commented_total_list = user_commented_total_list + instagram_post.user_commented_list
        except NoSuchElementException as err:
            InstaLogger.logger().error("Could not get information from post: " + instagram_post.postlink)
            InstaLogger.logger().error(err)
    return post_infos, user_commented_total_list

コード例 #15

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: wonkru-bot/inzta-crawler

    def __init__(self, browser, postlink):
        self.browser = browser
        self.postlink = postlink

        try:
            InstaLogger.logger().info("Scraping Post Link: " + self.postlink)
            web_adress_navigator(self.browser, self.postlink)
        except PageNotFound404 as e:
            raise NoInstaPostPageFound(e)
        except NoSuchElementException as err:
            InstaLogger.logger().error("Could not get information from post: " + self.postlink)
            InstaLogger.logger().error(err)
            pass

        self.post = self.browser.find_element_by_class_name('ltEKP')

コード例 #16

0

ファイルを表示

    def extract_image_data(self):
        img_tags = []
        imgs = []
        imgdesc = []

        img_tags = self.post.find_elements_by_class_name('FFVAD')
        InstaLogger.logger().info("number of images: " + str(len(img_tags)))

        for i in img_tags:
            imgs.append(i.get_attribute('src'))
            imgdesc.append(i.get_attribute('alt'))
            InstaLogger.logger().info(f"post image: {imgs[-1]}")
            InstaLogger.logger().info(f"alt text: {imgdesc[-1]}")

        return img_tags, imgs, imgdesc

コード例 #17

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: sejaldua/caption-gener8r

def extract_post_mentions(browser, post):
    mentions = []
    if (Settings.mentions is False):
        return mentions

    try:
        if post.find_elements_by_class_name('JYWcJ'):  # perhaps JYWcJ
            mention_list = post.find_elements_by_class_name(
                'JYWcJ')  # perhaps JYWcJ
            for mention in mention_list:
                user_mention = mention.get_attribute("href").split('/')
                mentions.append(user_mention[3])
            InstaLogger.logger().info("mentions: " + str(len(mentions)))
    except Exception as err:
        InstaLogger.logger().error("Error - getting mentions")
        InstaLogger.logger().error(err)
    return mentions

コード例 #18

0

ファイルを表示

def _extract_post_info(proxy_browser, post_link):
    try:

        caption, location_url, location_name, location_id, lat, lng, imgs, img_desc, tags, likes, comments_count, \
        date, user_commented_list, user_comments, mentions, user_liked_post, views = extract_post_info(proxy_browser, post_link)

        location = {
            'location_url': location_url,
            'location_name': location_name,
            'location_id': location_id,
            'latitude': lat,
            'longitude': lng,
        }

        return {
            'caption': caption,
            'location': location,
            'imgs': imgs,
            'imgdesc': img_desc,
            'date': date,
            'tags': tags,
            'likes': {
                'count': likes,
                'list': user_liked_post
            },
            'views': views,
            'url': post_link,
            'comments': {
                'count': comments_count,
                'list': user_comments
            },
            'mentions': mentions
        }
    except NoSuchElementException as err:
        InstaLogger.logger().error("Could not get information from post: " +
                                   post_link)
        InstaLogger.logger().error(err)
    except Exception as ex:
        InstaLogger.logger().error("Could not get information from post: " +
                                   post_link)
    return None

コード例 #19

0

ファイルを表示

    def get_user_info(self):
        """Get the basic user info from the profile screen"""

        self.isprivate = self._is_user_private()
        self.alias = self._user_alias()
        self.bio = self._user_bio()
        self.bio_url = self._user_bio_url()
        self.profile_image = self._user_profile_image()

        infos = self.container.find_elements_by_class_name('Y8-fY')
        if infos:
            self.num_of_posts = {'count': extract_exact_info(infos[0])}
            self.following = {'count': extract_exact_info(infos[2])}
            self.followers = {'count': extract_exact_info(infos[1])}

            if Settings.scrape_follower == True:
                if not self.isprivate:
                    self.followers['list'] = extract_followers(
                        self.browser, self.username)

        InstaLogger.logger().info("Alias name: " + self.alias)
        InstaLogger.logger().info("Bio: " + self.bio)
        InstaLogger.logger().info("Url: " + self.bio_url)
        InstaLogger.logger().info("Posts: " + str(self.num_of_posts))
        InstaLogger.logger().info("Follower: " + str(self.followers['count']))
        InstaLogger.logger().info("Following: " + str(self.following))
        InstaLogger.logger().info("Is private: " + str(self.isprivate))

コード例 #20

0

ファイルを表示

def quick_post_extract(browser, num_of_posts_to_do):
    body_elem = browser.find_element_by_tag_name('body')

    previouslen = 0
    breaking = 0

    num_of_posts_to_scroll = 12 * math.ceil(num_of_posts_to_do / 12)

    post_infos = []
    posts_set = set()
    posts_set_len = 0

    while (posts_set_len < num_of_posts_to_do):
        print(posts_set_len)

        JSGetPostsFromReact = """
            var feed = document.getElementsByTagName('article')[0];
            var __reactInternalInstanceKey = Object.keys(feed).filter(k=>k.startsWith('__reactInternalInstance'))[0]
            var posts = feed[__reactInternalInstanceKey].return.stateNode.state.combinedPosts
            return posts;
        """
        posts_json = browser.execute_script(JSGetPostsFromReact)

        for post_json in posts_json:
            # TODO: Convert to InstagramPost
            # instagram_post = InstagramPost.from_react_json(post_json)
            post_code = post_json['code']
            if post_code in posts_set:
                continue

            posts_set.add(post_code)

            location = {}
            if post_json.get('location'):
                loc_id = post_json['location']['id']
                loc_slug = post_json['location']['slug']

                location = {
                    'location_url':
                    f"https://www.instagram.com/explore/locations/{loc_id}/{loc_slug}/",
                    'location_name': post_json['location']['name'],
                    'location_id': loc_id,
                    'latitude': post_json['location']['lat'],
                    'longitude': post_json['location']['lng'],
                }

            num_comments = post_json['numComments']
            num_likes = post_json.get('numLikes') or post_json.get(
                'numPreviewLikes', -1)

            post_infos.append({
                'caption': post_json.get('caption'),
                'location': location,
                'imgs': [],
                'imgdesc': [],
                'preview_img': post_json['thumbnailResources'],
                'date': post_json['postedAt'],
                'tags': [],
                'likes': {
                    'count': num_likes,
                    'list': []
                },
                'views': post_json.get('videoViews', -1),
                'url': f"https://www.instagram.com/p/{post_code}/",
                'comments': {
                    'count': num_comments,
                    'list': []
                },
                'mentions': []
            })

        body_elem.send_keys(Keys.END)
        sleep(Settings.sleep_time_between_post_scroll)

        posts_set_len = len(posts_set)
        ##remove below part to never break the scrolling script before reaching the num_of_posts
        if (posts_set_len == previouslen):
            breaking += 1
            InstaLogger.logger().info(
                f"breaking in {4 - breaking}...\nIf you believe this is only caused by slow internet, increase sleep time 'sleep_time_between_post_scroll' in settings.py"
            )
        else:
            breaking = 0

        if breaking > 3:
            InstaLogger.logger().info(
                "Not getting any more posts, ending scrolling")
            sleep(2)
            break

        previouslen = len(post_infos)

    return post_infos, []

コード例 #21

0

ファイルを表示

def get_num_posts(browser, num_of_posts_to_do):
    """Get all posts from user"""
    links = []
    links2 = []
    preview_imgs = {}

    # list links contains 30 links from the current view, as that is the maximum Instagram is showing at one time
    # list links2 contains all the links collected so far
    # preview_imgs dictionary maps link in links2 to link's post's preview image src
    try:
        body_elem = browser.find_element_by_tag_name('body')

        # load_button = body_elem.find_element_by_xpath\
        #  ('//a[contains(@class, "_1cr2e _epyes")]')
        # body_elem.send_keys(Keys.END)
        # sleep(3)

        previouslen = 0
        breaking = 0

        InstaLogger.logger().info(
            f"number of posts to do: {num_of_posts_to_do}")
        num_of_posts_to_scroll = 12 * math.ceil(num_of_posts_to_do / 12)
        InstaLogger.logger().info(
            f"Getting first {num_of_posts_to_scroll} posts but checking {num_of_posts_to_do} posts only, if you want to change this limit, change limit_amount value in crawl_profile.py\n"
        )
        while (len(links2) < num_of_posts_to_do):

            prev_divs = browser.find_elements_by_tag_name('main')
            links_elems = [
                div.find_elements_by_tag_name('a') for div in prev_divs
            ]
            links = sum(
                [[link_elem.get_attribute('href') for link_elem in elems]
                 for elems in links_elems], [])

            for elems in links_elems:
                for link_elem in elems:

                    href = link_elem.get_attribute('href')
                    if "/p/" in href:
                        img = link_elem.find_element_by_tag_name('img')
                        src = img.get_attribute('src')
                        preview_imgs[href] = src

            for link in links:
                if "/p/" in link:
                    if (len(links2) < num_of_posts_to_do):
                        links2.append(link)

            links2 = list(set(links2))
            InstaLogger.logger().info(
                f"Scrolling profile {len(links2)} / {num_of_posts_to_scroll}")
            body_elem.send_keys(Keys.END)
            sleep(Settings.sleep_time_between_post_scroll)

            ##remove bellow part to never break the scrolling script before reaching the num_of_posts
            if (len(links2) == previouslen):
                breaking += 1
                InstaLogger.logger().info(
                    f"breaking in {4 - breaking}...\nIf you believe this is only caused by slow internet, increase sleep time 'sleep_time_between_post_scroll' in settings.py"
                )
            else:
                breaking = 0
            if breaking > 3:
                InstaLogger.logger().info(
                    "Not getting any more posts, ending scrolling")
                sleep(2)
                break
            previouslen = len(links2)
            ##

    except NoSuchElementException as err:
        InstaLogger.logger().error('Something went terribly wrong')

    return links2, preview_imgs

コード例 #22

0

ファイルを表示

def extract_followers(browser, username):
    InstaLogger.logger().info('Extracting follower from ' + username)
    try:
        user_link = "https://www.instagram.com/{}".format(username)
        web_adress_navigator(browser, user_link)
    except PageNotFound404 as e:
        raise NoInstaProfilePageFound(e)
    sleep(5)

    followers = []

    # find number of followers
    elem = browser.find_element_by_xpath(
        "//div[@id='react-root']//header[@class='vtbgv ']//ul[@class='k9GMp ']/child::li[2]/a/span"
    )
    elem.click()
    sleep(15)

    # remove suggestion list and load 24 list elements after this
    browser.execute_script(
        "document.getElementsByClassName('isgrP')[0].scrollTo(0,500)")
    sleep(10)

    elems = browser.find_elements_by_xpath(
        "//body//div[@class='PZuss']//a[@class='FPmhX notranslate  _0imsa ']")
    for i in range(12):
        val = elems[i].get_attribute('innerHTML')
        followers.append(val)

    for i in range(12):
        browser.execute_script(
            "document.getElementsByClassName('PZuss')[0].children[0].remove()")

    isDone = False

    while 1:
        try:

            start = time()
            browser.execute_script(
                "document.getElementsByClassName('isgrP')[0].scrollTo(0,document.getElementsByClassName('isgrP')[0].scrollHeight)"
            )

            while 1:
                try:
                    if int(
                            browser.execute_script(
                                "return document.getElementsByClassName('PZuss')[0].children.length"
                            )) == 24:
                        break
                except (KeyboardInterrupt, SystemExit):
                    # f.close()
                    raise
                except:
                    continue
                if time() - start > 10:
                    isDone = True
                    break

            if isDone:
                break

            elems = browser.find_elements_by_xpath(
                "//body//div[@class='PZuss']//a[@class='FPmhX notranslate  _0imsa ']"
            )
            list_segment = ""
            for i in range(12):
                val = elems[i].get_attribute('innerHTML')
                list_segment += (val + '\n')
                followers.append(val)

            for i in range(12):
                browser.execute_script(
                    "document.getElementsByClassName('PZuss')[0].children[0].remove()"
                )

            InstaLogger.logger().info(time() - start)

        except (KeyboardInterrupt, SystemExit):
            # f.close()
            raise
        except:
            continue

    list_segment = ""
    elems = browser.find_elements_by_xpath(
        "//body//div[@class='PZuss']//a[@class='FPmhX notranslate  _0imsa ']")
    for i in range(len(elems)):
        val = elems[i].get_attribute('innerHTML')
        list_segment += (val + '\n')
        followers.append(val)

    return followers

コード例 #23

0

ファイルを表示

def extract_user_posts(browser, num_of_posts_to_do):
    """Get all posts from user"""
    links = []
    links2 = []
    preview_imgs = {}

    # list links contains 30 links from the current view, as that is the maximum Instagram is showing at one time
    # list links2 contains all the links collected so far
    # preview_imgs dictionary maps link in links2 to link's post's preview image src
    try:
        body_elem = browser.find_element_by_tag_name('body')

        # load_button = body_elem.find_element_by_xpath\
        #  ('//a[contains(@class, "_1cr2e _epyes")]')
        # body_elem.send_keys(Keys.END)
        # sleep(3)

        previouslen = 0
        breaking = 0

        print("number of posts to do: ", num_of_posts_to_do)
        num_of_posts_to_scroll = 12 * math.ceil(num_of_posts_to_do / 12)
        print(
            "Getting first", num_of_posts_to_scroll, "posts but checking ",
            num_of_posts_to_do,
            " posts only, if you want to change this limit, change limit_amount value in crawl_profile.py\n"
        )
        while (len(links2) < num_of_posts_to_do):

            prev_divs = browser.find_elements_by_tag_name('main')
            links_elems = [
                div.find_elements_by_tag_name('a') for div in prev_divs
            ]
            links = sum(
                [[link_elem.get_attribute('href') for link_elem in elems]
                 for elems in links_elems], [])

            for elems in links_elems:
                for link_elem in elems:

                    href = link_elem.get_attribute('href')
                    try:
                        if "/p/" in href:
                            try:
                                img = link_elem.find_element_by_tag_name('img')
                                src = img.get_attribute('src')
                                preview_imgs[href] = src
                            except NoSuchElementException:
                                print("img exception 132")
                                continue
                    except Exception as err:
                        print(err)

            for link in links:
                if "/p/" in link:
                    if (len(links2) < num_of_posts_to_do):
                        links2.append(link)
            links2 = list(set(links2))
            print("Scrolling profile ", len(links2), "/",
                  num_of_posts_to_scroll)
            body_elem.send_keys(Keys.END)
            sleep(Settings.sleep_time_between_post_scroll)

            ##remove bellow part to never break the scrolling script before reaching the num_of_posts
            if (len(links2) == previouslen):
                breaking += 1
                print(
                    "breaking in ", 4 - breaking,
                    "...\nIf you believe this is only caused by slow internet, increase sleep time 'sleep_time_between_post_scroll' in settings.py"
                )
            else:
                breaking = 0
            if breaking > 3:
                print("Not getting any more posts, ending scrolling")
                sleep(2)
                break
            previouslen = len(links2)
            ##

    except NoSuchElementException as err:
        InstaLogger.logger().error('Something went terribly wrong')

    post_infos = []

    counter = 1
    # into user_commented_total_list I will add all username links who commented on any post of this user
    user_commented_total_list = []

    for postlink in links2:

        print("\n", counter, "/", len(links2))
        counter = counter + 1

        try:
            caption, location_url, location_name, location_id, lat, lng, imgs, imgdesc, tags, likes, commentscount, date, user_commented_list, user_comments, mentions, user_liked_post, views = extract_post_info(
                browser, postlink)

            location = {
                'location_url': location_url,
                'location_name': location_name,
                'location_id': location_id,
                'latitude': lat,
                'longitude': lng,
            }

            post_infos.append({
                'caption': caption,
                'location': location,
                'imgs': imgs,
                'imgdesc': imgdesc,
                'preview_img': preview_imgs.get(postlink, None),
                'date': date,
                'tags': tags,
                'likes': {
                    'count': likes,
                    'list': user_liked_post
                },
                'views': views,
                'url': postlink,
                'comments': {
                    'count': commentscount,
                    'list': user_comments
                },
                'mentions': mentions
            })
            user_commented_total_list = user_commented_total_list + user_commented_list
        except NoSuchElementException as err:
            InstaLogger.logger().error(
                "Could not get information from post: " + postlink)
            InstaLogger.logger().error(err)
        except:
            InstaLogger.logger().error(
                "Could not get information from post: " + postlink)
    return post_infos, user_commented_total_list

コード例 #24

0

ファイルを表示

def get_user_info(browser, username):
    """Get the basic user info from the profile screen"""
    num_of_posts = 0
    followers = {'count': 0}
    following = {'count': 0}
    prof_img = ""
    bio = ""
    bio_url = ""
    alias = ""
    container = browser.find_element_by_class_name('v9tJq')
    isprivate = False
    try:
        if container.find_element_by_class_name('Nd_Rl'):
            isprivate = True
    except:
        isprivate = False

    try:
        alias = container.find_element_by_class_name(
            '-vDIg').find_element_by_tag_name('h1').text
    except:
        InstaLogger.logger().info("alias is empty")

    try:
        bio = container.find_element_by_class_name('-vDIg') \
            .find_element_by_tag_name('span').text
    except:
        InstaLogger.logger().info("Bio is empty")

    try:
        bio_url = container.find_element_by_class_name('yLUwa').text
    except:
        InstaLogger.logger().info("Bio Url is empty")

    try:
        img_container = browser.find_element_by_class_name('RR-M-')
        prof_img = img_container.find_element_by_tag_name('img').get_attribute(
            'src')
    except:
        InstaLogger.logger().info("image is empty")

    try:
        infos = container.find_elements_by_class_name('Y8-fY')

        try:
            num_of_posts = extract_exact_info(infos[0])
        except:
            InstaLogger.logger().error("Number of Posts empty")

        try:
            following = {'count': extract_exact_info(infos[2])}
        except:
            InstaLogger.logger().error("Following is empty")

        try:
            followers = {'count': extract_exact_info(infos[1])}

            try:
                if Settings.scrape_follower == True:
                    if isprivate == True:
                        InstaLogger.logger().info(
                            "Cannot get Follower List - private account")
                    else:
                        followers['list'] = extract_followers(
                            browser, username)
            except Exception as exception:
                # Output unexpected Exceptions.
                print("Unexpected error:", sys.exc_info()[0])
                print(exception)

                InstaLogger.logger().error("Cannot get Follower List")
        except:
            InstaLogger.logger().error("Follower is empty")
    except:
        InstaLogger.logger().error("Infos (Following, Abo, Posts) is empty")

    information = {
        'alias': alias,
        'username': username,
        'bio': bio,
        'prof_img': prof_img,
        'num_of_posts': num_of_posts,
        'followers': followers,
        'following': following,
        'bio_url': bio_url,
        'isprivate': isprivate,
    }

    InstaLogger.logger().info("alias name: " + information['alias'])
    InstaLogger.logger().info("bio: " + information['bio'])
    InstaLogger.logger().info("url: " + information['bio_url'])
    InstaLogger.logger().info("Posts: " + str(information['num_of_posts']))
    InstaLogger.logger().info("Follower: " +
                              str(information['followers']['count']))
    InstaLogger.logger().info("Following: " + str(information['following']))
    InstaLogger.logger().info("isPrivate: " + str(information['isprivate']))
    return information

コード例 #25

0

ファイルを表示

ファイル: extractor.py プロジェクト: kusw3/instagram-profilecrawl

def extract_information(browser, username, limit_amount):
    InstaLogger.logger().info('Extracting information from ' + username)
    """Get all the information for the given username"""
    isprivate = False
    try:
        user_link = "https://www.instagram.com/{}/".format(username)
        web_adress_navigator(browser, user_link)
    except PageNotFound404 as e:
        raise NoInstaProfilePageFound(e)

    num_of_posts_to_do = 999999
    alias_name = ''
    bio = ''
    prof_img = ''
    num_of_posts = 0
    followers = 0
    following = 0
    bio_url = ''
    try:
        alias_name, bio, prof_img, num_of_posts, followers, following, bio_url, isprivate = get_user_info(browser)
        if limit_amount < 1:
            limit_amount = 999999
        num_of_posts_to_do = min(limit_amount, num_of_posts)
    except Exception as err:
        InstaLogger.logger().error("Couldn't get user profile. - Terminating")
        quit()
    prev_divs = browser.find_elements_by_class_name('_70iju')

    post_infos = []
    user_commented_total_list = []
    if Settings.scrap_posts_infos is True and isprivate is False:
        try:
            post_infos, user_commented_total_list = extract_user_posts(browser, num_of_posts_to_do)
        except:
            InstaLogger.logger().error("Couldn't get user posts.")

    information = {
        'alias': alias_name,
        'username': username,
        'bio': bio,
        'prof_img': prof_img,
        'num_of_posts': num_of_posts,
        'followers': followers,
        'following': following,
        'bio_url': bio_url,
        'isprivate': isprivate,
        'scrapped': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        'posts': post_infos,
    }

    InstaLogger.logger().info("User " + username + " has " + str(len(user_commented_total_list)) + " comments.")

    # sorts the list by frequencies, so users who comment the most are at the top
    import collections
    from operator import itemgetter, attrgetter
    counter = collections.Counter(user_commented_total_list)
    com = sorted(counter.most_common(), key=itemgetter(1, 0), reverse=True)
    com = map(lambda x: [x[0]] * x[1], com)
    user_commented_total_list = [item for sublist in com for item in sublist]

    # remove duplicates preserving order (that's why not using set())
    user_commented_list = []
    last = ''
    for i in range(len(user_commented_total_list)):
        if username.lower() != user_commented_total_list[i]:
            if last != user_commented_total_list[i]:
                user_commented_list.append(user_commented_total_list[i])
            last = user_commented_total_list[i]

    return information, user_commented_list

コード例 #26

0

ファイルを表示

ファイル: extractor.py プロジェクト: kusw3/instagram-profilecrawl

def get_user_info(browser):
    """Get the basic user info from the profile screen"""
    num_of_posts = 0
    followers = 0
    following = 0
    prof_img = ""
    bio = ""
    bio_url = ""
    alias_name = ""
    container = browser.find_element_by_class_name('v9tJq')
    isprivate = False

    try:
        infos = container.find_elements_by_class_name('Y8-fY')
        num_of_posts = extract_exact_info(infos[0])
        followers = extract_exact_info(infos[1])
        following = extract_exact_info(infos[2])
    except:
        InstaLogger.logger().error("Infos (Follower, Abo, Posts) is empty")
        infos = ""

    try:
        alias_name = container.find_element_by_class_name('-vDIg').find_element_by_tag_name('h1').text
    except:
        InstaLogger.logger().info("alias is empty")

    try:
        bio = container.find_element_by_class_name('-vDIg') \
            .find_element_by_tag_name('span').text
    except:
        InstaLogger.logger().info("Bio is empty")

    try:
        bio_url = container.find_element_by_class_name('yLUwa').text
    except:
        InstaLogger.logger().info("Bio Url is empty")

    try:
        img_container = browser.find_element_by_class_name('RR-M-')
        prof_img = img_container.find_element_by_tag_name('img').get_attribute('src')
    except:
        InstaLogger.logger().info("image is empty")

    try:
        if container.find_element_by_class_name('Nd_Rl'):
            isprivate = True
    except:
        isprivate = False

    InstaLogger.logger().info("alias name: " + alias_name)
    InstaLogger.logger().info("bio: " + bio)
    InstaLogger.logger().info("url: " + bio_url)
    InstaLogger.logger().info("Posts: " + str(num_of_posts))
    InstaLogger.logger().info("Follower: " + str(followers))
    InstaLogger.logger().info("Following: " + str(following))
    InstaLogger.logger().info("isPrivate: " + str(isprivate))
    return alias_name, bio, prof_img, num_of_posts, followers, following, bio_url, isprivate

コード例 #27

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: piperer/instagram-profilecrawl

def extract_post_comments(browser, post):
    # if more than 22 comment elements, use the second to see
    # how much comments, else count the li's

    # first element is the text, second either the first comment
    # or the button to display all the comments

    comments = []
    user_commented_list = []
    user_comments = []
    try:
        if post.find_elements_by_tag_name('ul'):
            comment_list = post.find_element_by_tag_name('ul')
            comments = comment_list.find_elements_by_tag_name('li')

            if len(comments) > 1:
                # load hidden comments
                tried_catch_comments = 0
                while (comments[1].text.lower() == 'load more comments'
                       or comments[1].text.lower().startswith('view all')):
                    try:
                        if comments[1].find_element_by_tag_name('button'):
                            print("click button for loading more")
                            comments[1].find_element_by_tag_name(
                                'button').click()
                        elif comments[1].find_element_by_tag_name('a'):
                            print("click a for loading more")
                            comments[1].find_element_by_tag_name('a').click()
                        sleep(Settings.sleep_time_between_comment_loading)
                    except:
                        print("error on clicking - next try (tried: " +
                              str(tried_catch_comments) + ") comments:" +
                              str(len(comments)) + ")")
                        tried_catch_comments = tried_catch_comments + 1
                        if tried_catch_comments > 10:
                            print("exit getting comments")
                            break
                        sleep(Settings.sleep_time_between_comment_loading)

                    comment_list = post.find_element_by_tag_name('ul')
                    comments = comment_list.find_elements_by_tag_name('li')
                # adding who commented into user_commented_list
                InstaLogger.logger().info("found comments: " +
                                          str(len(comments)))
            else:
                print("found comment: 1")

            for comm in comments:
                try:
                    user_commented = comm.find_element_by_tag_name(
                        'a').get_attribute("href").split('/')
                    user_commented_list.append(user_commented[3])
                except:
                    InstaLogger.logger().error(
                        "ERROR something went wrong getting user_commented")
                # first comment has to be loaded everytime to get the caption and tag from post
                if (Settings.output_comments is True
                        or len(user_comments) < 1):
                    user_comment = {}
                    try:
                        user_comment = {
                            'user': user_commented[3],
                            'comment':
                            comm.find_element_by_tag_name('span').text
                        }
                        InstaLogger.logger().info(
                            user_commented[3] + " -- " +
                            comm.find_element_by_tag_name('span').text)
                        user_comments.append(user_comment)
                    except:
                        InstaLogger.logger().error(
                            "ERROR something went wrong getting comment")

        InstaLogger.logger().info(str(len(user_commented_list)) + " comments.")
    except BaseException as e:
        InstaLogger.logger().error(e)
    except:
        InstaLogger.logger().error("Error - getting comments")

    return user_comments, user_commented_list, int(len(comments) - 1)

コード例 #28

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: piperer/instagram-profilecrawl

def extract_post_info(browser, postlink):
    """Get the information from the current post"""

    try:
        InstaLogger.logger().info("Scrapping Post Link: " + postlink)
        web_adress_navigator(browser, postlink)
    except PageNotFound404 as e:
        raise NoInstaPostPageFound(e)
    except NoSuchElementException as err:
        InstaLogger.logger().error("Could not get information from post: " +
                                   postlink)
        InstaLogger.logger().error(err)
        pass

    post = browser.find_element_by_class_name('ltEKP')
    date = ''
    # Get caption
    caption = ''
    username = ''
    try:
        username = post.find_element_by_class_name('e1e1d').text
    except:
        InstaLogger.logger().error("ERROR - getting Post infos (username) ")

    # Get location details
    location_url = ''
    location_name = ''
    location_id = 0
    lat = ''
    lng = ''
    imgs = []
    img = ''

    try:
        # Location url and name
        location_div = post.find_element_by_class_name(
            'M30cS').find_elements_by_tag_name('a')
        if location_div:
            location_url = location_div[0].get_attribute('href')
            location_name = location_div[0].text
            # Longitude and latitude
            location_id = location_url.strip(
                'https://www.instagram.com/explore/locations/').split('/')[0]
            url = 'https://www.instagram.com/explore/locations/' + location_id + '/?__a=1'
            response = requests.get(url)
            data = response.json()
            lat = data['graphql']['location']['lat']
            lng = data['graphql']['location']['lng']
        InstaLogger.logger().info("location_id: " + location_id)
        InstaLogger.logger().info("location_url: " + location_url)
        InstaLogger.logger().info("location_name: " + location_name)
        InstaLogger.logger().info("lat: " + lat)
        InstaLogger.logger().info("lng: " + lng)
    except:
        InstaLogger.logger().error("getting Location Infos  (perhaps not set)")

    try:
        date = post.find_element_by_xpath('//a/time').get_attribute("datetime")
        InstaLogger.logger().info("Post date: " + str(date))
    except:
        InstaLogger.logger().error("ERROR - getting Post Date ")

    try:
        imgs = post.find_elements_by_tag_name('img')
        if len(imgs) >= 2:
            img = imgs[1].get_attribute('src')
        else:
            img = imgs[0].get_attribute('src')
        InstaLogger.logger().info("post image: " + img)
    except:
        InstaLogger.logger().error("ERROR - Post Image ")

    likes = 0

    try:
        #if len(post.find_elements_by_xpath('//article/div/section')) > 2:
        likes_element = post.find_elements_by_xpath(
            '//article/div[2]/section[2]/div/div/a/span')
        if len(likes_element) > 1:
            likes = str(likes_element[1].text)
        else:
            likes = str(likes_element[0].text)

        likes = likes.replace(',', '').replace('.', '')
        likes = likes.replace('k', '00')
        InstaLogger.logger().info("post likes: " + likes)
    except Exception as err:
        InstaLogger.logger().error("ERROR - Getting Post Likes")
        InstaLogger.logger().error(err)
    # if likes is not known, it would cause errors to convert empty string to int

    try:
        likes = int(likes)
    except Exception as err:
        InstaLogger.logger().error(
            "ERROR - Extracting number of likes failed. Saving likes as -1")
        InstaLogger.logger().error(err)
        likes = -1

    user_comments = []
    user_commented_list = []
    user_liked_list = []
    mentions = []
    tags = []
    caption = ''
    commentscount = 0

    try:
        user_comments, user_commented_list, commentscount = extract_post_comments(
            browser, post)
    except:
        InstaLogger.logger().error(
            "ERROR - getting Post comments function trying")

    try:
        caption, tags = extract_post_caption(user_comments, username)
        # delete first comment because its the caption of the user posted
        if len(caption) > 0:
            user_comments.pop(0)
    except:
        InstaLogger.logger().error(
            "ERROR - getting Post caption/tags function")

    try:
        mentions = extract_post_mentions(browser, post)
    except:
        InstaLogger.logger().error("ERROR - getting Post Mentions function")

    try:
        user_liked_list = extract_post_likers(browser, post, postlink, likes)
    except:
        InstaLogger.logger().error("ERROR - getting Post Likers function")

    return caption, location_url, location_name, location_id, lat, lng, img, tags, int(
        likes
    ), commentscount, date, user_commented_list, user_comments, mentions, user_liked_list

コード例 #29

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: sejaldua/caption-gener8r

def extract_post_likers(browser, post, postlink, likes):
    user_liked_list = []

    xpath_identifier_user = "******"

    if (Settings.scrape_posts_likers is False):
        return user_liked_list
    else:
        InstaLogger.logger().info("GETTING LIKERS FROM POST")

    postlink = postlink + "liked_by/"
    tried_catch_likers = 0
    likers_list_before = 0
    try:

        # post.find_element_by_xpath("//a[contains(@class, 'zV_Nj')]").click()
        elementToClick = post.find_element_by_xpath(
            "//a[contains(@class, 'zV_Nj')]")
        browser.execute_script("arguments[0].click();", elementToClick)
        sleep(3)
        # likers_list = post.find_elements_by_xpath("//li[@class='wo9IH']//a[contains(@class, 'FPmhX')]")
        likers_list = post.find_elements_by_xpath(xpath_identifier_user)
        print("LÄNGE " + str(len(likers_list)) + "")
        while len(likers_list) < likes:

            InstaLogger.logger().info("new likers in actual view: " +
                                      str(len(likers_list)) + " - list: " +
                                      str(len(user_liked_list)) +
                                      " should be " + str(likes) +
                                      " -- scroll for more")
            try:
                div_likebox_elem = browser.find_element_by_xpath(
                    "//div[contains(@class, 'i0EQd')]/div/div/div[last()]"
                )  # old:wwxN2
                # browser.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", div_likebox_elem)
                browser.execute_script("arguments[0].scrollIntoView(true);",
                                       div_likebox_elem)
            except BaseException as e:
                tried_catch_likers = tried_catch_likers + 1
                div_likebox_elem = browser.find_element_by_xpath(
                    "//div[contains(@class, 'i0EQd')]/div/div/div[1]")
                browser.execute_script("arguments[0].scrollIntoView(true);",
                                       div_likebox_elem)
                print("error on scrolling - next try (tried: " +
                      str(tried_catch_likers) + ") Message:" + e)

            sleep(Settings.sleep_time_between_post_scroll)
            # likers_list = post.find_elements_by_xpath(" //li[@class='wo9IH']//a[contains(@class, 'FPmhX')]")
            likers_list = post.find_elements_by_xpath(xpath_identifier_user)
            for liker in likers_list:
                user_like = liker.get_attribute("href").split('/')
                username_liked_post = user_like[3]
                if username_liked_post not in user_liked_list:
                    user_liked_list.append(username_liked_post)

            if (likers_list_before == len(user_liked_list)):
                tried_catch_likers = tried_catch_likers + 1
                print("error on scrolling - next try (tried: " +
                      str(tried_catch_likers) + ")")
                sleep(Settings.sleep_time_between_post_scroll * 1.5)
                div_likebox_elem = browser.find_element_by_xpath(
                    "//div[contains(@class, 'i0EQd')]/div/div/div[1]")
                browser.execute_script("arguments[0].scrollIntoView(true);",
                                       div_likebox_elem)

            if tried_catch_likers > 10:
                InstaLogger.logger().error("exit scrolling likers " +
                                           str(tried_catch_likers) +
                                           "x tries - liker list: " +
                                           str(len(user_liked_list)) +
                                           " should be " + str(likes) + "")
                break
            likers_list_before = len(user_liked_list)

        InstaLogger.logger().info('likers: ' + str(len(user_liked_list)))

    except BaseException as e:
        InstaLogger.logger().error("Error - getting post likers")
        InstaLogger.logger().error(e)
    return user_liked_list

コード例 #30

0

ファイルを表示

ファイル: extractor_posts.py プロジェクト: sejaldua/caption-gener8r

def extract_post_comments(browser, post):
    # if more than 22 comment elements, use the second to see
    # how much comments, else count the li's

    # first element is the text, second either the first comment
    # or the button to display all the comments

    # sometimes getting comments ends in a endless loop
    # therefore reduce the run
    comments_found_last_run = 0
    comments_run_same_length = 0
    comments = []
    user_commented_list = []
    user_comments = []
    try:
        if post.find_elements_by_tag_name('ul'):
            comment_list = post.find_element_by_tag_name('ul')
            comments = comment_list.find_elements_by_tag_name('li')

            if len(comments) > 1:
                # load hidden comments
                tried_catch_comments = 0
                while (comments[1].text.lower() == 'load more comments'
                       or comments[1].text.lower().startswith('view all')):
                    try:
                        if comments[1].find_element_by_tag_name('button'):
                            print("clicking button for loading more comments")
                            browser.execute_script(
                                "arguments[0].click();",
                                comments[1].find_element_by_tag_name('button'))
                        elif comments[1].find_element_by_tag_name('a'):
                            print("clicking a for loading more")
                            browser.execute_script(
                                "arguments[0].click();",
                                comments[1].find_element_by_tag_name('a'))
                        sleep(Settings.sleep_time_between_comment_loading)

                        comment_list = post.find_element_by_tag_name('ul')
                        comments = comment_list.find_elements_by_tag_name('li')
                        print("comments (loaded: " + str(len(comments)) +
                              "/lastrun: " + str(comments_found_last_run) +
                              ")")

                        if (comments_found_last_run == len(comments)):
                            comments_run_same_length = comments_run_same_length + 1
                            if comments_run_same_length > 10:
                                InstaLogger.logger().error(
                                    "exit getting comments: " +
                                    str(comments_run_same_length) +
                                    "x same length of comments, perhaps endless loop"
                                )
                                break
                        else:
                            comments_same_length = 0

                        comments_found_last_run = len(comments)
                    except:
                        InstaLogger.logger().error(
                            "error clicking - next try (tried: " +
                            str(tried_catch_comments) + ") comments:" +
                            str(len(comments)) + ")")
                        tried_catch_comments = tried_catch_comments + 1
                        if tried_catch_comments > 10:
                            InstaLogger.logger().error(
                                "exit getting comments, " +
                                str(tried_catch_comments) +
                                "x tried to get comments")
                            break
                        sleep(Settings.sleep_time_between_comment_loading)

                InstaLogger.logger().info("found comments: " +
                                          str(len(comments)))
            else:
                print("found comment: 1")

            # adding who commented into user_commented_list
            for comm in comments:
                try:
                    user_commented = comm.find_element_by_tag_name(
                        'a').get_attribute("href").split('/')
                    user_commented_list.append(user_commented[3])
                except:
                    InstaLogger.logger().error(
                        "ERROR something went wrong getting user_commented")
                # first comment has to be loaded every time to get the caption and tag from post
                if (Settings.output_comments is True
                        or len(user_comments) < 1):
                    user_comment = {}
                    try:
                        user_comment = {
                            'user':
                            user_commented[3],
                            'comment':
                            comm.find_element_by_css_selector(
                                'h2 + span, h3 + span').text
                        }
                        print(user_comment)
                        InstaLogger.logger().info(
                            user_commented[3] + " -- " +
                            comm.find_element_by_css_selector(
                                'h2 + span, h3 + span').text)
                        user_comments.append(user_comment)
                    except:
                        InstaLogger.logger().error(
                            "ERROR something went wrong getting comment")

        InstaLogger.logger().info(str(len(user_commented_list)) + " comments.")
    except BaseException as e:
        InstaLogger.logger().error(e)
    except:
        InstaLogger.logger().error("Error - getting comments")

    return user_comments, user_commented_list, int(len(comments) - 1)