Ejemplo n.º 1
0
    def page_url(self, tv_listing):
        """
        parses the imdb search url page and retrieves the first listing page url.
        :param read: read object of a resource. This way we can stub it out, in such a way that we can send a url response
        object or file read object
        :return: returns url of the imdb listing page: http://www.imdb.com/title/tt0898266/?ref_=fn_tt_tt_1
        """
        url = self.finder_url(tv_listing)
        with contextlib.closing(urllib.urlopen(url)) as page_response:
            helper = ScrapeHelper(page_response.read())

            if helper.is_table_exists("findList"):
                rows = helper.find_table_by_class("findList")
            else:
                url = self.finder_url(tv_listing, titles=True)
                with contextlib.closing(
                        urllib.urlopen(url)) as page_response_1:
                    helper = ScrapeHelper(page_response_1.read())
                    rows = helper.find_table_by_class("findList")

        top_result = rows[0].find('td').find('a').get('href')
        return self.url + top_result