Esempio n. 1
0
    def get_title(self):
        if self.title is not None:
            return self.title

        site = format_site_from_url(self.url)

        try:
            result = []
            def process_website(result):
                browser = Browser()
                browser.set_handle_robots(False)
                browser.open(self.url, timeout=9.00)
                result.append(browser)
            thread = threading.Thread(target=process_website, args=(result,))
            thread.start()
            thread.join(timeout=10)
            if len(result) == 0:
                raise Exception("browser timedout or failed")
            browser = result[0]
            self.title = "[%s] %s" % (site.encode("Utf-8"), encoding_sucks(clean_title(browser.title())).lower().capitalize())
            self.langue = get_langue_from_html(browser.response().get_data())
            self.save()
            return self.title
        except Exception as e:
            print "Error: fail on %s: %s" % (self.url, e)
            self.title = "[%s] Error: couldn't fetch the title" % site
            self.save()
            return self.title
Esempio n. 2
0
    def get_title(self):
        if self.title is not None:
            return self.title

        b = Browser()
        site = ".".join(map(lambda x: x.capitalize(), self.url.split("/")[2].replace("www.", "").split(".")[:-1]))
        try:
            b.open(self.url)
            return "[%s] %s" % (site.encode("Utf-8"), encoding_sucks(b.title()))
        except URLError:
            self.title = "[%s] Error: couldn't fetch the title" % site
            self.save()
            return self.title