Example #1
0
    def parse_thread(self, response):
        """Navigates from a Page in a Thread to the next Page in that Thread.
        Navigates from a Page in a Thread to each User that commented on this Page.
        Searches and scrapes valid Bitcoins from this Page.
        """

        # Navigate to every Commenter's Profile page
        list_users = response.css('.poster_info b a::attr(href)').extract()
        for href in list_users:
            logging.debug("Page {}, going to User {}".format(
                response.url, href))
            yield scrapy.Request(href, callback=self.parse_user_profile)

        # Search each comment for a valid Bitcoin address
        for comment in itertools.chain(
                response.css('.windowbg').extract(),
                response.css('windowbg2').extract()):
            valid_addresses = collect_bitcoins(str.encode(comment))
            if len(valid_addresses) > 0:
                logging.debug("Page {}, yielding a Comment".format(
                    response.url))
                yield {
                    "comment_url": response.url,
                    "bitcoin_addresses": valid_addresses,
                    "comment_text": comment
                }

        # Navigate to the next Page in the Thread
        next_page = response.css('.prevnext .navPages ::attr(href)').extract()
        if len(next_page) > 0:
            logging.debug("Page {}, going to Page {}".format(
                response.url, next_page[-1]))
            yield scrapy.Request(next_page[-1], callback=self.parse_thread)
Example #2
0
 def parse_user_profile(self, response):
     # Used to find Threads on a Board Page. Will navigate to next Board Page
     bitcoins = bitcoin_helper.collect_bitcoins(response.body)
     # print("LENGTH OF LIST: "+str(len(bitcoins)))
     if len(bitcoins) > 0:
         user_id = response.css(
             '.windowbg tr:nth-child(1) td:nth-child(2)::text'
         ).extract_first()
         yield {
             "user_id": user_id,
             "Profile URL": response.url,
             "bitcoin_addresses": bitcoins
         }
Example #3
0
 def parse_user_profile(self, response):
     """Searches and scrapes valid Bitcoins from a User's Profile page.
     """
     # Search for valid bitcoins
     bitcoins = collect_bitcoins(response.body)
     if len(bitcoins) > 0:
         user_id = response.css(
             '.windowbg tr:nth-child(1) td:nth-child(2)::text'
         ).extract_first()
         logging.debug("User {}, yielding Bitcoins".format(response.url))
         yield {
             "user_id": user_id,
             "profile_url": response.url,
             "bitcoin_addresses": bitcoins
         }
Example #4
0
    def parse_page(self, response):
        """Searches and scrapes valid Bitcoins from this Page.
        """
        # Search each comment for a valid Bitcoin address
        for comment in itertools.chain(
                response.css('.windowbg').extract(),
                response.css('windowbg2').extract()):
            valid_addresses = collect_bitcoins(str.encode(comment))
            if len(valid_addresses) > 0:
                logging.debug("Page {}, yielding Bitcoins".format(
                    response.url))
                p = parse_comment(comment)

                comment = {
                    "username": p["username"],
                    "bitcoin_addresses": valid_addresses,
                    "profile_url": p["profile_url"],
                    "date": p["date"],
                    "comment": p["comment"],
                    "comment_url": response.url
                }
                yield comment