Exemple #1
0
class Cbdb(Source):

    NAMESPACES={
        'x':"http://www.w3.org/1999/xhtml"
    }

    '''
    List of platforms this plugin works on For example: ['windows', 'osx', 'linux']
    '''
    supported_platforms = ['windows', 'osx', 'linux']

    BASE_URL = 'http://www.cbdb.cz/'
    BASE_DETAIL_URL = 'http://www.cbdb.cz/kniha-'

    '''
    The name of this plugin. You must set it something other than Trivial Plugin for it to work.
    '''
    name = 'cbdb'

    '''
    The version of this plugin as a 3-tuple (major, minor, revision)
    '''
    version = (1, 0, 4)

    '''
    A short string describing what this plugin does
    '''
    description = u'Download metadata and cover from cbdb.cz'

    '''
    The author of this plugin
    '''
    author = u'MarDuke [email protected]'

    '''
    When more than one plugin exists for a filetype, the plugins are run in order of decreasing priority i.e. plugins with higher priority will be run first. The highest possible priority is sys.maxint. Default priority is 1.
    '''
    priority = 1

    '''
    The earliest version of calibre this plugin requires
    '''
    minimum_calibre_version = (1, 0, 0)

    '''
    If False, the user will not be able to disable this plugin. Use with care.
    '''
    can_be_disabled = True

    '''
    Set of capabilities supported by this plugin. Useful capabilities are: ‘identify’, ‘cover’
    '''
    capabilities = frozenset(['identify', 'cover'])

    '''
    List of metadata fields that can potentially be download by this plugin during the identify phase
    '''
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate', 'comments', 'publisher', 'identifier:isbn', 'rating', 'identifier:cbdb', 'languages'])

    '''
    Set this to True if your plugin returns HTML formatted comments
    '''
    has_html_comments = False

    '''
    Setting this to True means that the browser object will add Accept-Encoding: gzip to all requests.
    This can speedup downloads but make sure that the source actually supports gzip transfer encoding correctly first
    '''
    supports_gzip_transfer_encoding = False

    '''
    Cached cover URLs can sometimes be unreliable (i.e. the download could fail or the returned image could be bogus.
    If that is often the case with this source set to False
    '''
    cached_cover_url_is_reliable = True

    '''
    A list of Option objects. They will be used to automatically construct the configuration widget for this plugin
    '''
    options = (
               Option('max_search', 'number', 25,
                      'Maximum knih',
                      'Maximum knih které se budou zkoumat jestli vyhovují hledaným parametrům'),

               Option('max_covers', 'number', 5,
                      'Maximum obálek',
                      'Maximum obálek které se budou stahovat'),

               Option('serie_index', 'bool', True,
                      'Pozice v sérii',
                      'Cbdb neudává pozici v sérii, pouze vypisuje seznam knih v sérii ve správném pořadí, takže pokud některá např. chybí jsou pozice rozhozené, je zde možnost tuto nespolehlivou vlastnost vypnout. Stále se ovšem bude zobrazovat alespoň informace o názvu série'),
    )

    '''
    A string that is displayed at the top of the config widget for this plugin
    '''
    config_help_message = None

    '''
    If True this source can return multiple covers for a given query
    '''
    can_get_multiple_covers = True

    '''
    If set to True covers downloaded by this plugin are automatically trimmed.
    '''
    auto_trim_covers = False

    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30):
        '''
        Identify a book by its title/author/isbn/etc.
        If identifiers(s) are specified and no match is found and this metadata source does not store all related identifiers (for example, all ISBNs of a book), this method should retry with just the title and author (assuming they were specified).
        If this metadata source also provides covers, the URL to the cover should be cached so that a subsequent call to the get covers API with the same ISBN/special identifier does not need to get the cover URL again. Use the caching API for this.
        Every Metadata object put into result_queue by this method must have a source_relevance attribute that is an integer indicating the order in which the results were returned by the metadata source for this query. This integer will be used by compare_identify_results(). If the order is unimportant, set it to zero for every result.
        Make sure that any cover/isbn mapping information is cached before the Metadata object is put into result_queue.
        Parameters:
            log – A log object, use it to output debugging information/errors
            result_queue – A result Queue, results should be put into it. Each result is a Metadata object
            abort – If abort.is_set() returns True, abort further processing and return as soon as possible
            title – The title of the book, can be None
            authors – A list of authors of the book, can be None
            identifiers – A dictionary of other identifiers, most commonly {‘isbn’:‘1234...’}
            timeout – Timeout in seconds, no network request should hang for longer than timeout.
        Returns:
            None if no errors occurred, otherwise a unicode representation of the error suitable for showing to the user
        '''

        self.log = Log(self.name, log)

        found = []
        xml = None
        detail_ident = None

        #test previous found first
        ident = identifiers.get(self.name, None)

        XPath = partial(etree.XPath, namespaces=self.NAMESPACES)
        entry = XPath('//div[@id="search_result_box_books"]//table[@class="search_graphic"][1]')
        detail_test = XPath('//td[@id="book_photo_box"]/img/@src')

        query = self.create_query(title=title, authors=authors,
                identifiers=identifiers)
        if not query:
            self.log('Insufficient metadata to construct query')
            return

        br = self.browser
        try:
            self.log('download page search %s'%query)
            raw = br.open(query, timeout=timeout).read().strip()

            def fixHtml(obj):
                return obj.group().replace('&','&')

            raw = re.sub('&.{3}[^;]',  fixHtml,  raw)
            raw = raw.decode('utf-8', errors='replace')
            self.log.filelog(raw, 'D:\\tmp\\cbdb.html')
        except Exception as e:
            self.log.exception('Failed to make identify query: %r'%query)
            return as_unicode(e)

        try:
            parser = etree.HTMLParser(recover=True)
            clean = clean_ascii_chars(raw)
            feed = fromstring(clean, parser=parser)
#             for error in parser.error_log:
#                 self.log(error.message)

            entries = entry(feed)
            if len(entries) == 0:
                xml = feed
                detail_detect = detail_test(feed)[0]
                detail_ident = int(re.findall('\d+', detail_detect)[0])
                if ident is not None and detail_ident != ident:
                    found.append(ident)
            else:
                self.log('Found %i matches'%len(entries))
                #self.log('Matches %s'%(entries))
                act_authors = []
                for act in authors:
                    act_authors.append(act.split(" ")[-1])

#                ident_found = False
                tmp_entries = []
                for book_ref in entries:
                    title_tag = book_ref.xpath(".//div/a[starts-with(@href, 'kniha-')]", namespaces=self.NAMESPACES)
                    auths = [] #authors surnames
                    authors_tag = book_ref.xpath(".//a[@class='search_author_link']", namespaces=self.NAMESPACES)
                    for i in (authors_tag):
                        auths.append(i.text.split(" ")[-1])
                    add = (title_tag[0].get('href'), title_tag[0].text, auths)
                    tmp_entries.append(add)

                if len(tmp_entries) > self.prefs['max_search']:
                    tmp_entries.sort(key=self.prefilter_compare_gen(title=title, authors=act_authors))
                    tmp_entries = tmp_entries[:self.prefs['max_search']]

                for val in tmp_entries:
                    found.append(val[0])

        except Exception as e:
            self.log.exception('Failed to parse identify results')
            return as_unicode(e)

        if ident and found.count(ident) > 0:
            found.remove(ident)
            found.insert(0, ident)

        try:
            workers = []
            #if redirect push to worker actual parsed xml, no need to download and parse it again
            if xml is not None:
                workers = [Worker(detail_ident, result_queue, br, log, 0, self, xml)]
            workers += [Worker(ident, result_queue, br, log, i, self, None) for i, ident in enumerate(found)]

            for w in workers:
                w.start()
                time.sleep(0.1)

            while not abort.is_set():
                a_worker_is_alive = False
                for w in workers:
                    w.join(0.2)
                    if abort.is_set():
                        break
                    if w.is_alive():
                        a_worker_is_alive = True
                if not a_worker_is_alive:
                    break
        except Exception as e:
            self.log.exception(e)

        return None

    def create_query(self, title=None, authors=None, identifiers={}):
        '''
        create url for HTTP request
        '''
        from urllib import urlencode
        q = ''
        if title:
            q += ' '.join(self.get_title_tokens(title))

        if isinstance(q, unicode):
            q = q.encode('utf-8')
        if not q:
            return None
        return self.BASE_URL+'hledat?'+urlencode({
            'text':q
        })

    def get_cached_cover_url(self, identifiers):
        '''
        Return cached cover URL for the book identified by the identifiers dict or None if no such URL exists.
        Note that this method must only return validated URLs, i.e. not URLS that could result in a generic cover image or a not found error.
        '''
        url = None
        ident = identifiers.get(self.name, None)
        if ident is not None:
            url = self.cached_identifier_to_cover_url(ident)
        return url

    def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
        '''
        Download a cover and put it into result_queue. The parameters all have the same meaning as for identify(). Put (self, cover_data) into result_queue.
        This method should use cached cover URLs for efficiency whenever possible. When cached data is not present, most plugins simply call identify and use its results.
        If the parameter get_best_cover is True and this plugin can get multiple covers, it should only get the “best” one.
        '''
        self.log = Log(self.name, log)
        cached_urls = self.get_cached_cover_url(identifiers)
        if not title:
            return
        if not cached_urls:
            self.log('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title, authors, identifiers, timeout)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_urls = self.get_cached_cover_url(mi.identifiers)
                if cached_urls is not None:
                    break

        if cached_urls is None:
            log.info('No cover found')
            return
        self.log("Covers:%s"%cached_urls)
        if abort.is_set():
            return
        self.download_multiple_covers(title, authors, cached_urls, get_best_cover, timeout, result_queue, abort, log)

    def get_book_url(self, identifiers):
        '''
        Return a 3-tuple or None. The 3-tuple is of the form:
        (identifier_type, identifier_value, URL).
        The URL is the URL for the book identified by identifiers at this
        source. identifier_type, identifier_value specify the identifier
        corresponding to the URL.
        This URL must be browseable to by a human using a browser. It is meant
        to provide a clickable link for the user to easily visit the books page
        at this source.
        If no URL is found, return None. This method must be quick, and
        consistent, so only implement it if it is possible to construct the URL
        from a known scheme given identifiers.
        '''
        ident = identifiers.get(self.name, None)
        if ident:
            return (self.name, ident, "%skniha-%s"%(self.BASE_URL,ident))
        else:
            return None

    def get_book_url_name(self, idtype, idval, url):
        '''
        Return a human readable name from the return value of get_book_url().
        '''
        return self.name

    def identify_results_keygen(self, title=None, authors=None, identifiers={}):
        '''
        Return a function that is used to generate a key that can sort Metadata
        objects by their relevance given a search query (title, authors,
        identifiers).

        These keys are used to sort the results of a call to :meth:`identify`.

        For details on the default algorithm see
        :class:`InternalMetadataCompareKeyGen`. Re-implement this function in
        your plugin if the default algorithm is not suitable.
        '''
        def keygen(mi):
            return MetadataCompareKeyGen(mi, self, title, authors,
                identifiers)
        return keygen

    def prefilter_compare_gen(self, title=None, authors=None):
        '''
        Return a function that used to preOrdering if ser get more results
        than we want to check. Filtering should found most relevant results
        based on title and authors
        '''
        def keygen(data):
            return PreFilterMetadataCompare(data, self, title, authors)
        return keygen
Exemple #2
0
class Worker(Thread):

    #string id
    ident = None

    #int id
    number = None

    def __init__(self, ident, result_queue, browser, log, relevance, plugin, xml, timeout=20):
        Thread.__init__(self)
        self.daemon = True
        self.ident, self.result_queue = ident, result_queue
        self.browser = browser.clone_browser()
        self.relevance = relevance
        self.plugin, self.timeout = plugin, timeout
        self.cover_url = self.isbn = None
        self.XPath = partial(etree.XPath, namespaces=plugin.NAMESPACES)
        self.xml = xml
        if xml is not None:
            self.number = int(ident)
        else:
            if ident.startswith('kniha-'):
                self.number = int(self.ident.split('-')[1])
            else:
                self.number = int(ident)

        self.log = Log("worker %i"%self.number, log)

    def initXPath(self):
        self.xpath_title = self.XPath('//*[@itemprop="name"]/text()')
        self.xpath_authors = self.XPath('//a[@itemprop="author"]/text()')
        self.xpath_comments = self.XPath('//p[@itemprop="about"]')
        self.xpath_rating = self.XPath('//div[@id="item_rating"]/text()')
        self.xpath_isbn = self.XPath('//span[@itemprop="isbn"]/text()')
        self.xpath_publisher = self.XPath('//div[@class="book_info_line"]/a[starts-with(@href, "nakladatelstvi-")]/text()')
        self.xpath_pub_date = self.XPath('//div[@class="book_info_line"]/a[starts-with(@href, "nakladatelstvi-")]/following-sibling::text()[1]')
        self.xpath_tags = self.XPath('//span[@itemprop="genre"]/text()')
        self.xpath_serie = self.XPath('//a[@href="?show=serie"]/text()')
        self.xpath_serie_index = self.XPath('//a[@href="?show=serie"]/preceding-sibling::text()')
        self.xpath_cover = self.XPath('//div[@id="book_covers_control"]/@onclick')

    def run(self):
        self.initXPath()

        if self.xml is not None:
            xml_detail = self.xml
        else:
            xml_detail = self.download_detail()

        if xml_detail is not None:
            try:
                result = self.parse(xml_detail)
                if result:
                    self.result_queue.put(result)
            except Exception as e:
                self.log.exception(e)
        else:
            self.log('Download metadata failed for: %r'%self.ident)

    def parse(self, xml_detail):
        title = self.parse_title(xml_detail)
        authors = self.parse_authors(xml_detail)
        comments = self.parse_comments(xml_detail)
        rating = self.parse_rating(xml_detail)
        isbn = self.parse_isbn(xml_detail)
        publisher, pub_year = self.parse_publisher(xml_detail)
        tags = self.parse_tags(xml_detail)
        serie, serie_index = self.parse_serie(xml_detail, title)
        cover = self.parse_cover(xml_detail)

        if title is not None and authors is not None:
            mi = Metadata(title, authors)
            mi.languages = {'ces'}
            mi.comments = as_unicode(comments)
            mi.identifiers = {self.plugin.name:str(self.number)}
            mi.rating = rating
            mi.tags = tags
            mi.publisher = publisher
            mi.pubdate = pub_year
            mi.isbn = isbn
            mi.series = serie
            mi.series_index = serie_index
            mi.cover_url = cover

            if cover:
                self.plugin.cache_identifier_to_cover_url(str(self.number), cover)

            return mi
        else:
            self.log('Result skipped for because title or authors not found')
            return None

    def parse_title(self, xml_detail):
        tmp = self.xpath_title(xml_detail)
        if len(tmp) > 0:
            res = unicode(tmp[0])
            self.log('Found title:%s'%res)
            return res
        else:
            self.log('Found title:None')
            return None

    def parse_authors(self, xml_detail):
        tmp = self.xpath_authors(xml_detail)
        if len(tmp) > 0:
            self.log('Found authors:%s'%tmp)
            auths = []
            for author in tmp:
                auths.append(unicode(author))

            return auths
        else:
            self.log('Found authors:None')
            return None

    def parse_comments(self, xml_detail):
        tmp = self.xpath_comments(xml_detail)

        if len(tmp) > 0:
            #result = "".join(tmp[0].text).strip()
            result = unicode(tmp[0].text).strip()
            self.log('Found comment:%s'%result)

            return result
        else:
            self.log('Found comment:None')
            return None

    def parse_rating(self, xml_detail):
        tmp = self.xpath_rating(xml_detail)
        if len(tmp) > 0:
            rating = float(int(tmp[0].replace('%','')) / 20)
            self.log('Found rating:%s'%rating)
            return rating
        else:
            self.log('Found rating:None')
            return None

    def parse_isbn(self, xml_detail):
        tmp = self.xpath_isbn(xml_detail)
        if len(tmp) > 0:
            self.log('Found ISBN:%s'%tmp[0].strip())
            return tmp[0].strip()
        else:
            self.log('Found ISBN:None')
            return None

    def parse_publisher(self, xml_detail):
        tmp = self.xpath_publisher(xml_detail)
        tmpDate = self.xpath_pub_date(xml_detail)
        if len(tmp) > 0:
            publisher = tmp[0]
            pubDate = self.prepare_date(int(re.search('(\d+)', tmpDate[0]).group(0)))
            self.log('Found publisher:%s'%publisher)
            self.log('Found pub date:%s'%pubDate)
            return [publisher, pubDate]

        self.log('Found publisher:None')
        self.log('Found pub date:None')
        return (None, None)

    def parse_tags(self, xml_detail):
        tmp = self.xpath_tags(xml_detail)
        if len(tmp) > 0:
            result = tmp
            self.log('Found tags:%s'%result)
            return result
        else:
            self.log('Found tags:None')
            return None

    def parse_serie(self, xml_detail, title):
        tmp = self.xpath_serie(xml_detail)
        if len(tmp) == 0:
            self.log('Found serie:None')
            return [None, None]
        index = 0
        if self.plugin.prefs['serie_index']:
            tmpIndex = self.xpath_serie_index(xml_detail)
            index = int(re.search('(\d+)', tmpIndex[0]).group(0))

        self.log('Found serie:%s[%i]'%(tmp[0],index))
        return [tmp[0], index]

    def parse_cover(self, xml_detail):
        tmp = self.xpath_cover(xml_detail)
        result = []
        if len(tmp) > 0:
            nums = re.findall('\d+', tmp[0])
            ident = int(nums[0])
            num_add = int(nums[1])
            result.append(self.plugin.BASE_URL + 'books/%i.jpg'%ident)
            cnt = num_add
            covers = self.plugin.prefs['max_covers']
            if covers:
                if cnt > covers:
                    cnt = covers
            for n in range(1,cnt):
                result.append(self.plugin.BASE_URL + 'books/%i_%i.jpg'%(ident, n))

        if len(result) > 0:
            self.log('Found covers:%s'%result)
        else:
            self.log('Found covers:None')
        return result

    def download_detail(self):
        query = self.plugin.BASE_DETAIL_URL + self.ident
        br = self.browser
        try:
            self.log('download page detail %s'%query)
            data = br.open(query, timeout=self.timeout).read().strip()
            parser = etree.HTMLParser(recover=True)
            clean = clean_ascii_chars(data)
            self.log.filelog(clean, 'D:\\tmp\\file' + self.ident +'.html')
            xml = fromstring(clean, parser=parser)
#             for error in parser.error_log:
#                 self.log(error.message)
            return xml
        except Exception as e:
            self.log.exception('Failed to make download : %r'%query)
            return None

    def prepare_date(self,year):
        from calibre.utils.date import utc_tz
        return datetime.datetime(year, 1, 1, tzinfo=utc_tz)
Exemple #3
0
class Worker(Thread):

    #string id
    ident = None

    #int id
    number = None

    def __init__(self, ident, result_queue, browser, log, relevance, plugin, xml, timeout=20):
        Thread.__init__(self)
        self.daemon = True
        self.ident, self.result_queue = ident, result_queue
        self.browser = browser.clone_browser()
        self.relevance = relevance
        self.plugin, self.timeout = plugin, timeout
        self.cover_url = self.isbn = None
        self.XPath = partial(etree.XPath, namespaces=plugin.NAMESPACES)
        self.xml = xml
        self.log = Log("worker %s"%ident, log)

    def initXPath(self):
        self.xpath_title = '//table[@class="detail_table"]//td[@class="detail_td_item_name" and text() = "Název:"]/following::td[1]/text()'
        self.xpath_authors = '//table[@class="detail_table"]//td[@class="detail_td_item_name" and text() = "Autor:"]/following::td[1]/a/text()'
        self.xpath_comments = '//table[@class="detail_table"]//td[@class="detail_td_item_name" and text() = "Další informace:"]/following::td[1]/text()'
        self.xpath_stars = '//input[@id="rating"]/@value'
        self.xpath_isbn = '//table[@class="detail_table"]//td[@class="detail_td_item_name" and text() = "ISBN:"]/following::td[1]/text()'
        self.xpath_publisher = '//table[@class="detail_table"]//td[@class="detail_td_item_name" and text() = "Nakladatel (rok vydání):"]/following::td[1]//text()'
        self.xpath_tags = '//table[@class="detail_table"]//td[@class="detail_td_item_name" and text() = "Žánry a lit. útvary:"]/following::td[1]/a/text()'
        self.xpath_edition = '//table[@class="detail_table"]//td[@class="detail_td_item_name" and text() = "Edice:"]/following::td[1]//text()'
        self.xpath_serie = '//table[@class="detail_table"]//td[@class="detail_td_item_name" and text() = "Série:"]/following::td[1]//text()'
        self.xpath_cover = '//td[@class="detail_td_item_value"]/img/@src'

    def run(self):
        self.initXPath()

        if self.xml is not None:
            xml_detail = self.xml
        else:
            xml_detail = self.download_detail()
        if xml_detail is not None:
            try:
                result = self.parse(xml_detail)
                if result:
                    self.result_queue.put(result)
            except Exception as e:
                self.log.exception(e)
        else:
            self.log.exception('Download metadata failed for: %s'%self.ident)

    def parse(self, xml_detail):
        title = self.parse_title(xml_detail)
        authors = self.parse_authors(xml_detail)
        comments = self.parse_comments(xml_detail)
        rating = self.parse_rating(xml_detail)
        isbn = self.parse_isbn(xml_detail)
        publisher, pub_year = self.parse_publisher(xml_detail)
        tags = self.parse_tags(xml_detail)
        serie, serie_index = self.parse_serie(xml_detail)
        cover = self.parse_cover(xml_detail)

        if title is not None and authors is not None:
            mi = Metadata(title, authors)
            mi.languages = {'ces'}
            mi.comments = as_unicode(comments)
            mi.identifiers = {self.plugin.name:self.ident}
            mi.rating = rating
            mi.tags = tags
            mi.publisher = publisher
            mi.pubdate = pub_year
            mi.isbn = isbn
            mi.series = serie
            mi.series_index = serie_index
            mi.cover_url = cover

            if cover:
                self.plugin.cache_identifier_to_cover_url(self.ident, cover)

            return mi
        else:
            return None

    def parse_title(self, xml_detail):
        tmp = xml_detail.xpath(self.xpath_title)
        if len(tmp) > 0:
            self.log('Found title:%s'%tmp[0].strip())
            return tmp[0].strip()
        else:
            self.log('Found title:None')
            return None

    def parse_authors(self, xml_detail):
        tmp = xml_detail.xpath(self.xpath_authors)
        if len(tmp) > 0:
            auths = []
            for a in tmp:
                self.log(a)
                parts = a.split(",")
                self.log(parts)
                auths.append("%s %s"%(parts[1].strip(),parts[0]))
            self.log('Found authors:%s'%auths)
            return auths
        else:
            self.log('Found authors:None')
            return None

    def parse_comments(self, xml_detail):
        tmp = xml_detail.xpath(self.xpath_comments)
        if len(tmp) > 0:
            result = "<br/>".join(tmp).strip()

            if self.plugin.prefs['edition_to_comments'] != 'Nepřidávat':
                tmp = xml_detail.xpath(self.xpath_edition)
                if len(tmp) > 0:
                    num_tmp = int(re.search("\d+", tmp[1]).group())
                    add = "Edice: %s %d. díl"%(tmp[0], num_tmp)
                    if self.plugin.prefs['edition_to_comments'] != 'Na konec':
                        result = result + "<br/>" + add
                    elif self.plugin.prefs['edition_to_comments'] != 'Na začatek':
                        result = add + "<br/>" + result

            self.log('Found comment:%s'%result)
            return result
        else:
            self.log('Found comment:None')
            return None

    def parse_rating(self, xml_detail):
        tmp = xml_detail.xpath(self.xpath_stars)
        if len(tmp) > 0:
            rating = float(tmp[0])
            self.log('Found rating:%s'%rating)
            return rating+1
        else:
            self.log('Found rating:None')
            return None

    def parse_isbn(self, xml_detail):
        tmp = xml_detail.xpath(self.xpath_isbn)
        if len(tmp) > 0:
            self.log('Found ISBN:%s'%tmp[0])
            return tmp[0]
        else:
            self.log('Found ISBN:None')
            return None

    def parse_publisher(self, xml_detail):
        tmp = xml_detail.xpath(self.xpath_publisher)
        if len(tmp) > 0:
            pub = tmp[0]
            pubdt = int(tmp[1].strip()[1:-1])
            self.log('Found publisher:%s'%pub)
            self.log('Found pubdate:%s'%pubdt)
            return [pub, datetime.datetime(pubdt, 1, 1, tzinfo=utc_tz)]
        else:
            self.log('Found publisher:None')
            self.log('Found pubdate:None')
            return (None, None)

    def parse_tags(self, xml_detail):
        tags = []
        tags.extend(xml_detail.xpath(self.xpath_tags))
        if self.plugin.prefs['edition']:
            tmp = xml_detail.xpath(self.xpath_edition)
            if len(tmp) > 1:
                tags.append(self.plugin.prefs['edition_prefix'] + tmp[0])

        if len(tags) > 0:
            self.log('Found tags:%s'%tags)
            return tags
        else:
            self.log('Found tags:None')
            return None

    def parse_serie(self, xml_detail):
        tmp = xml_detail.xpath(self.xpath_serie)
        if len(tmp) > 1:
            serie = tmp[0]
            serie_index = int(tmp[1].split('-')[1].strip())
            self.log('Found serie:%s[%d]'%(serie, serie_index))
            return [serie, serie_index]
        else:
            self.log('Found serie:None')
            return [None, None]

    def parse_cover(self, xml_detail):
        tmp = xml_detail.xpath(self.xpath_cover)
        if len(tmp) > 0:
            cover = "%snew/%s"%(self.plugin.BASE_URL,tmp[0])
            self.log('Found covers:%s'%cover)
            return cover
        else:
            self.log('Found covers:None')

    def download_detail(self):
        query = "%snew/?mainpage=pub&subpage=detail&id=%s"%(self.plugin.BASE_URL, self.ident)
        br = self.browser
        try:
            self.log('download page detail %s'%query)
            data = br.open(query, timeout=self.timeout).read().strip()
            parser = etree.HTMLParser(recover=True)
            clean = clean_ascii_chars(data)
            xml = fromstring(clean,  parser=parser)
            self.log.filelog(clean, "\\tmp\\worker-%s.html"%self.ident)
            return xml
        except Exception as e:
            self.log.exception('Failed to make download : %r'%query)
            return None