def _spider_book_info(url, letter):
    try:
        html = getHttp(url, handleException = False)
        soup = BeautifulSoup()
        soup.feed(html)
        h1 = soup.first("h1")
        if h1 is None:
            return None

        assert h1 is not None
        title = retrieveContents(h1).decode("iso-8859-1")

        subtitle = None
        author = None
        code = None

        labels = [retrieveContents(x) for x in soup.fetch("span", {"class": "title-label"})]
        data = soup.fetch("span", {"class": "title-data"})
        try:
            index = labels.index("Subtitle")
            subtitle = retrieveContents(data[index]).decode("iso-8859-1")
        except ValueError:
            pass

        try:
            index = labels.index("Author")
            author = retrieveContents(data[index].first("a")).decode("iso-8859-1")
        except ValueError:
            pass

        try:
            index = labels.index("Language")
            href = str(data[index].first("a", {"href": "/language.php?code=%"})["href"])
            code = href[19:href.find("&", 19)].decode("iso-8859-1")
        except ValueError:
            pass

        tid = soup.first("input", {"type": "hidden", "name": "tid"})
        assert tid is not None
        book_id = tid["value"].decode("iso-8859-1")

        print (u"%s: \"%s\"" % (author, title)).encode("iso-8859-1", "ignore")

        sel = soup.first("select", {"name": "book"})
        assert sel is not None
        opts = sel.fetch("option")
        formats = []
        for opt in opts:
            try:
                format = retrieveContents(opt).split()[0]
                if format not in ebooks.FORMATS:
                    continue

                val = opt["value"]
                formats.append((format, val))

            except Exception, ex:
                log(SEV_EXC, exceptionAsStr(ex))
        formats.sort()
        return (url, title, subtitle, author, book_id, code, formats)
    def _parse_letter_page(self, letter, html, index):
        self._check_finish()
        soup = BeautifulSoup()
        soup.feed(html)
        div = soup.first("div", {"class": "sidebar-module"})
        assert div is not None
        count = int(retrieveContents(div.contents[2]).split()[2])
        offset = 0
        self._lock.acquire()
        try:
            if count <= self._data[letter][0]:
                print 'Letter "%s" is up to date (%d records).' % (letter, self._data[letter][0])
                return True, count, 0
            offset = self._offsets[letter]
        finally:
            self._lock.release()

        spidered = 0
        div = soup.first("div", {"class": "titleList"})
        assert div is not None
        as = div.fetch("a")
        urls = []
        for a in as:
            url = _g_manybooks_url + urllib.quote(a["href"])
            urls.append(url)

        for url in urls:
            self._check_finish()
            i = -1
            self._lock.acquire()
            try:
                books = self._data[letter][1]
                i = _find_book_index(books, url, index)
            finally:
                self._lock.release()

            if -1 != i:
                index = i + 1
            else:
                book = _spider_book_info(url, letter)
                if book is not None:
                    spidered += 1
                    self._lock.acquire()
                    try:
                        self._fresh_books.append((letter, index + offset, book))
                        if len(self._fresh_books) == self.flush_after:
                            self._flush_books()
                        offset += 1
                        self._offsets[letter] = offset
                        if self._data[letter][0] + offset  == count:
                            return True, count, spidered
                    finally:
                        self._lock.release()
        return (index + offset == count), index, spidered