Ejemplo n.º 1
0
    def get_title(self):
        if self.title is not None:
            return self.title

        site = format_site_from_url(self.url)

        try:
            result = []
            def process_website(result):
                browser = Browser()
                browser.set_handle_robots(False)
                browser.open(self.url, timeout=9.00)
                result.append(browser)
            thread = threading.Thread(target=process_website, args=(result,))
            thread.start()
            thread.join(timeout=10)
            if len(result) == 0:
                raise Exception("browser timedout or failed")
            browser = result[0]
            self.title = "[%s] %s" % (site.encode("Utf-8"), encoding_sucks(clean_title(browser.title())).lower().capitalize())
            self.langue = get_langue_from_html(browser.response().get_data())
            self.save()
            return self.title
        except Exception as e:
            print "Error: fail on %s: %s" % (self.url, e)
            self.title = "[%s] Error: couldn't fetch the title" % site
            self.save()
            return self.title
Ejemplo n.º 2
0
    def get_langue(self):
        if self.langue is not None:
            return self.langue

        try:
            lang = get_langue_from_html(urlopen(self.url).read())
            self.langue = lang
            self.save()
            return lang
        except URLError:
            self.langue = ""
            self.save()
            return self.langue