Example #1
0
 def process_metadata(self, idx, content, codec):
     if idx == 100:
         if self.mi.is_null('authors'):
             self.mi.authors = []
         au = clean_xml_chars(self.decode(content).strip())
         self.mi.authors.append(au)
         if self.mi.is_null('author_sort') and re.match(r'\S+?\s*,\s+\S+', au.strip()):
             self.mi.author_sort = au.strip()
     elif idx == 101:
         self.mi.publisher = clean_xml_chars(self.decode(content).strip())
         if self.mi.publisher in {'Unknown', _('Unknown')}:
             self.mi.publisher = None
     elif idx == 103:
         self.mi.comments  = clean_xml_chars(self.decode(content).strip())
     elif idx == 104:
         raw = check_isbn(self.decode(content).strip().replace('-', ''))
         if raw:
             self.mi.isbn = raw
     elif idx == 105:
         if not self.mi.tags:
             self.mi.tags = []
         self.mi.tags.extend([x.strip() for x in clean_xml_chars(self.decode(content)).split(';')])
         self.mi.tags = list(set(self.mi.tags))
     elif idx == 106:
         try:
             self.mi.pubdate = parse_date(content, as_utc=False)
         except:
             pass
     elif idx == 108:
         self.mi.book_producer = clean_xml_chars(self.decode(content).strip())
     elif idx == 112:  # dc:source set in some EBSP amazon samples
         try:
             content = content.decode(codec).strip()
             isig = 'urn:isbn:'
             if content.lower().startswith(isig):
                 raw = check_isbn(content[len(isig):])
                 if raw and not self.mi.isbn:
                     self.mi.isbn = raw
             elif content.startswith('calibre:'):
                 # calibre book uuid is stored here by recent calibre
                 # releases
                 cid = content[len('calibre:'):]
                 if cid:
                     self.mi.application_id = self.mi.uuid = cid
         except:
             pass
     elif idx == 113:  # ASIN or other id
         try:
             self.uuid = content.decode('ascii')
             self.mi.set_identifier('mobi-asin', self.uuid)
         except:
             self.uuid = None
     elif idx == 116:
         self.start_offset, = struct.unpack(b'>L', content)
     elif idx == 121:
         self.kf8_header, = struct.unpack(b'>L', content)
         if self.kf8_header == NULL_INDEX:
             self.kf8_header = None
Example #2
0
def get_metadata(stream, cover=True):
    with TemporaryDirectory("_pdf_metadata_read") as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, "src.pdf"), "wb") as f:
            shutil.copyfileobj(stream, f)
        try:
            res = fork_job("calibre.ebooks.metadata.pdf", "read_info", (pdfpath, bool(cover)))
        except WorkerError as e:
            prints(e.orig_tb)
            raise RuntimeError("Failed to run pdfinfo")
        info = res["result"]
        with open(res["stdout_stderr"], "rb") as f:
            raw = f.read().strip()
            if raw:
                prints(raw)
        if not info:
            raise ValueError("Could not read info dict from PDF")
        covpath = os.path.join(pdfpath, "cover.jpg")
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, "rb") as f:
                cdata = f.read()

    title = info.get("Title", None)
    au = info.get("Author", None)
    if au is None:
        au = [_("Unknown")]
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)
    # if isbn is not None:
    #    mi.isbn = isbn

    creator = info.get("Creator", None)
    if creator:
        mi.book_producer = creator

    keywords = info.get("Keywords", None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(",")]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get("Subject", None)
    if subject:
        mi.tags.insert(0, subject)

    if cdata:
        mi.cover_data = ("jpeg", cdata)

    return mi
Example #3
0
    def create_query(self, log, title=None, authors=None, identifiers={}):
        isbn = check_isbn(identifiers.get('isbn', None))
        q = ''
        url = ''
        if isbn is not None:
            q = '&isbn=' + isbn
            url = '/search/search.nhn?serviceSm=advbook.basic&ic=service.summary' + q
        elif title or authors:
            title_tokens = list(
                self.get_title_tokens(title,
                                      strip_joiners=False,
                                      strip_subtitle=True))
            author_tokens = self.get_author_tokens(authors,
                                                   only_first_author=True)

            tokens = [
                quote(t.encode('utf-8') if isinstance(t, unicode) else t)
                for t in title_tokens
            ]
            tokens += [
                quote(t.encode('utf-8') if isinstance(t, unicode) else t)
                for t in author_tokens
            ]
            q += '&query=' + '+'.join(tokens)
            url = '/search/search.nhn?sm=sta_hty.book' + q

        if not url:
            return None

        log.info('Search from %s' % (url))
        return NaverBook.BASE_URL + url
    def create_query(self, log, title=None, authors=None, identifiers={}):
        isbn = check_isbn(identifiers.get('isbn', None))
        url = ''
        if title or authors:
            title_tokens = list(
                self.get_title_tokens(title,
                                      strip_joiners=False,
                                      strip_subtitle=True))
            author_tokens = self.get_author_tokens(authors,
                                                   only_first_author=True)

            tokens = [
                quote(t.encode('utf-8') if isinstance(t, unicode) else t)
                for t in title_tokens
            ]
            tokens += [
                quote(t.encode('utf-8') if isinstance(t, unicode) else t)
                for t in author_tokens
            ]
            url = '/search/?q=' + '+'.join(tokens)

        if not url:
            return None

        log.info('Search from %s' % (url))
        return RidiBooks.BASE_URL + url
    def _create_query(self, log, title=None, authors=None, identifiers={}):
        """ Generates the search url to use to find the book """
        isbn = check_isbn(identifiers.get('isbn', None))
        q = []
        if isbn is not None:
            # do isbn search
            q.append('Isbn=' + isbn)
            
        if title or authors:
            # do title and or author based search
            
            # tokenize the author and title fields from the current metadata
            title_tokens = list(self.get_title_tokens(title,
                                strip_joiners=False, strip_subtitle=True))
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            
            # sanitize the title and author info before sending
            title_tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in title_tokens]
            author_tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in author_tokens]
            
            # build the query from the tokens
            if len(title_tokens):
                q.append("Title={0}".format('+'.join(title_tokens)))
            if len(author_tokens):
                q.append("Author={0}".format('+'.join(author_tokens)))
            
            q = '&'.join(q)

        if not q:
            return None
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        return Shelfari.BASE_URL + '/search/books?' + q
Example #6
0
 def get_cached_cover_url(self, identifiers):
     url = None
     isbn = check_isbn(identifiers.get('isbn', None))
     if isbn is None:
         return None
     url = self.COVERURL % isbn
     return url
Example #7
0
    def identify(self, log, result_queue, abort, title=None, authors=None, identifiers=None, timeout=30):
        log.debug(u'Bookradar identification started ...')

        identifiers = identifiers or {}
        search_tokens = []

        if title:
            search_tokens += list(self.get_title_tokens(title))

        if authors:
            search_tokens += list(self.get_author_tokens(authors, only_first_author=True))

        isbn = check_isbn(identifiers.get('isbn', None))
        if isbn:
            search_tokens += (isbn,)

        search_str = ' '.join(search_tokens)
        url = self.url_pattern % search_str

        log.info(u'Searching for: %s' % search_str)

        try:
            response = requests.get(url, timeout=timeout)
        except requests.exceptions.RequestException as e:
            log.exception('Failed to get data from `%s`: %s' % (url, e.message))
            return as_unicode(e)

        if abort.is_set():
            return

        metadata = self.parse_response(response, isbn_initial=isbn, log=log)

        for result in metadata:
            self.clean_downloaded_metadata(result)
            result_queue.put(result)
Example #8
0
 def create_query2(self, log, title=None, authors=None, identifiers={}):
     ''' The edelweiss advanced search appears to be broken, use the keyword search instead, until it is fixed. '''
     from urllib import urlencode
     BASE_URL = 'http://edelweiss.abovethetreeline.com/CatalogOverview.aspx?'
     params = {
         'group': 'search',
         'section': 'CatalogOverview',
         'searchType': 1,
         'searchOrgID': '',
         'searchCatalogID': '',
         'searchMailingID': '',
         'searchSelect': 1,
     }
     keywords = []
     isbn = check_isbn(identifiers.get('isbn', None))
     if isbn is not None:
         keywords.append(isbn)
     elif title or authors:
         title_tokens = list(self.get_title_tokens(title))
         if title_tokens:
             keywords.extend(title_tokens)
         author_tokens = self.get_author_tokens(authors,
                                                only_first_author=True)
         if author_tokens:
             keywords.extend(author_tokens)
     if not keywords:
         return None
     params['keywords'] = (' '.join(keywords)).encode('utf-8')
     return BASE_URL + urlencode(params)
Example #9
0
    def get_all_details(self, log, metadata, abort, result_queue, identifiers, timeout):  # {{{
        req_isbn = identifiers.get("isbn", None)

        for mi in metadata:
            if abort.is_set():
                break
            try:
                ozon_id = mi.identifiers["ozon"]

                try:
                    self.get_book_details(log, mi, timeout)
                except:
                    log.exception("Failed to get details for metadata: %s" % mi.title)

                all_isbns = getattr(mi, "all_isbns", [])
                if req_isbn and all_isbns and check_isbn(req_isbn) not in all_isbns:
                    log.debug("skipped, no requested ISBN %s found" % req_isbn)
                    continue

                for isbn in all_isbns:
                    self.cache_isbn_to_identifier(isbn, ozon_id)

                if mi.ozon_cover_url:
                    self.cache_identifier_to_cover_url(ozon_id, mi.ozon_cover_url)

                self.clean_downloaded_metadata(mi)
                result_queue.put(mi)
            except:
                log.exception("Failed to get details for metadata: %s" % mi.title)
Example #10
0
 def create_query2(self, log, title=None, authors=None, identifiers={}):
     ''' The edelweiss advanced search appears to be broken, use the keyword search instead, until it is fixed. '''
     from urllib import urlencode
     BASE_URL = 'http://edelweiss.abovethetreeline.com/CatalogOverview.aspx?'
     params = {
         'group':'search',
         'section':'CatalogOverview',
         'searchType':1,
         'searchOrgID':'',
         'searchCatalogID': '',
         'searchMailingID': '',
         'searchSelect':1,
     }
     keywords = []
     isbn = check_isbn(identifiers.get('isbn', None))
     if isbn is not None:
         keywords.append(isbn)
     elif title or authors:
         title_tokens = list(self.get_title_tokens(title))
         if title_tokens:
             keywords.extend(title_tokens)
         author_tokens = self.get_author_tokens(authors,
                 only_first_author=True)
         if author_tokens:
             keywords.extend(author_tokens)
     if not keywords:
         return None
     params['keywords'] = (' '.join(keywords)).encode('utf-8')
     return BASE_URL+urlencode(params)
Example #11
0
 def parse_new_details(self, root, mi, non_hero):
     table = non_hero.xpath('descendant::table')[0]
     for tr in table.xpath('descendant::tr'):
         cells = tr.xpath('descendant::td')
         if len(cells) == 2:
             name = self.totext(cells[0])
             val = self.totext(cells[1])
             if not val:
                 continue
             if name in self.language_names:
                 ans = self.lang_map.get(val, None)
                 if not ans:
                     ans = canonicalize_lang(val)
                 if ans:
                     mi.language = ans
             elif name in self.publisher_names:
                 pub = val.partition(';')[0].partition('(')[0].strip()
                 if pub:
                     mi.publisher = pub
                 date = val.rpartition('(')[-1].replace(')', '').strip()
                 try:
                     from calibre.utils.date import parse_only_date
                     date = self.delocalize_datestr(date)
                     mi.pubdate = parse_only_date(date, assume_utc=True)
                 except:
                     self.log.exception('Failed to parse pubdate: %s' % val)
             elif name in {'ISBN', 'ISBN-10', 'ISBN-13'}:
                 ans = check_isbn(val)
                 if ans:
                     self.isbn = mi.isbn = ans
Example #12
0
def _format_isbn(log, isbn):  # {{{
    # for now only RUS ISBN are supported
    # http://ru.wikipedia.org/wiki/ISBN_российских_издательств
    isbn_pat = re.compile(r"""
        ^
        (\d{3})?            # match GS1 Prefix for ISBN13
        (5)                 # group identifier for Russian-speaking countries
        (                   # begin variable length for Publisher
            [01]\d{1}|      # 2x
            [2-6]\d{2}|     # 3x
            7\d{3}|         # 4x (starting with 7)
            8[0-4]\d{2}|    # 4x (starting with 8)
            9[2567]\d{2}|   # 4x (starting with 9)
            99[26]\d{1}|    # 4x (starting with 99)
            8[5-9]\d{3}|    # 5x (starting with 8)
            9[348]\d{3}|    # 5x (starting with 9)
            900\d{2}|       # 5x (starting with 900)
            91[0-8]\d{2}|   # 5x (starting with 91)
            90[1-9]\d{3}|   # 6x (starting with 90)
            919\d{3}|       # 6x (starting with 919)
            99[^26]\d{4}    # 7x (starting with 99)
        )                   # end variable length for Publisher
        (\d+)               # Title
        ([\dX])             # Check digit
        $
    """, re.VERBOSE)

    res = check_isbn(isbn)
    if res:
        m = isbn_pat.match(res)
        if m:
            res = '-'.join([g for g in m.groups() if g])
        else:
            log.error('cannot format ISBN %s. Fow now only russian ISBNs are supported' % isbn)
    return res
Example #13
0
 def clean_downloaded_metadata(self, mi):
     if mi.title and self.domain in ('com', 'uk'):
         mi.title = fixcase(mi.title)
     mi.authors = fixauthors(mi.authors)
     if self.domain in ('com', 'uk'):
         mi.tags = list(map(fixcase, mi.tags))
     mi.isbn = check_isbn(mi.isbn)
Example #14
0
    def get_all_details(self, log, metadata, abort, result_queue, identifiers, timeout, cachedPagesDict={}):  # {{{
        req_isbn = identifiers.get('isbn', None)

        for mi in metadata:
            if abort.is_set():
                break
            try:
                ozon_id = mi.identifiers['ozon']

                try:
                    self.get_book_details(log, mi, timeout, cachedPagesDict[ozon_id] if cachedPagesDict and ozon_id in cachedPagesDict else None)
                except:
                    log.exception(u'Failed to get details for metadata: %s' % mi.title)

                all_isbns = getattr(mi, 'all_isbns', [])
                if req_isbn and all_isbns and check_isbn(req_isbn) not in all_isbns:
                    log.debug(u'skipped, no requested ISBN %s found' % req_isbn)
                    continue

                for isbn in all_isbns:
                    self.cache_isbn_to_identifier(isbn, ozon_id)

                if mi.ozon_cover_url:
                    self.cache_identifier_to_cover_url(ozon_id, mi.ozon_cover_url)

                self.clean_downloaded_metadata(mi)
                result_queue.put(mi)
            except:
                log.exception(u'Failed to get details for metadata: %s' % mi.title)
Example #15
0
 def create_query(self, log, title=None, authors=None, identifiers={}):
     from urllib import urlencode
     BASE_URL = 'http://edelweiss.abovethetreeline.com/Browse.aspx?source=catalog&rg=4187&group=browse&pg=0&'
     params = {
         'browseType': 'title',
         'startIndex': 0,
         'savecook': 1,
         'sord': 20,
         'secSord': 20,
         'tertSord': 20,
     }
     keywords = []
     isbn = check_isbn(identifiers.get('isbn', None))
     if isbn is not None:
         keywords.append(isbn)
     elif title:
         title_tokens = list(self.get_title_tokens(title))
         if title_tokens:
             keywords.extend(title_tokens)
         # Searching with author names does not work on edelweiss
         # author_tokens = self.get_author_tokens(authors,
         #         only_first_author=True)
         # if author_tokens:
         #     keywords.extend(author_tokens)
     if not keywords:
         return None
     params['bsk'] = (' '.join(keywords)).encode('utf-8')
     return BASE_URL + urlencode(params)
Example #16
0
    def create_query(self, title=None, authors=None, identifiers={}):  # {{{
        base_url = BASE_URL % self.isbndb_key
        isbn = check_isbn(identifiers.get('isbn', None))
        q = ''
        if isbn is not None:
            q = 'index1=isbn&value1=' + isbn
        elif title or authors:
            tokens = []
            title_tokens = list(self.get_title_tokens(title))
            tokens += title_tokens
            author_tokens = self.get_author_tokens(authors,
                                                   only_first_author=True)
            tokens += author_tokens
            tokens = [
                quote(t.encode('utf-8') if isinstance(t, unicode) else t)
                for t in tokens
            ]
            q = '+'.join(tokens)
            q = 'index1=combined&value1=' + q

        if not q:
            return None
        if isinstance(q, unicode):
            q = q.encode('utf-8')
        return base_url + q
Example #17
0
File: odt.py Project: sss/calibre
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if data.has_key('title'):
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif data.has_key('creator'):
        mi.authors = string_to_authors(data['creator'])
    if data.has_key('description'):
        mi.comments = data['description']
    if data.has_key('language'):
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata','') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass # Do not let an error reading the cover prevent reading other data

    return mi
Example #18
0
 def clean_downloaded_metadata(self, mi):
     if mi.title and self.domain in ('com', 'uk'):
         mi.title = fixcase(mi.title)
     mi.authors = fixauthors(mi.authors)
     if self.domain in ('com', 'uk'):
         mi.tags = list(map(fixcase, mi.tags))
     mi.isbn = check_isbn(mi.isbn)
Example #19
0
    def create_query(self,
                     log,
                     title=None,
                     authors=None,
                     identifiers={}):  # {{{
        from urllib import urlencode
        BASE_URL = 'https://books.google.com/books/feeds/volumes?'
        isbn = check_isbn(identifiers.get('isbn', None))
        q = ''
        if isbn is not None:
            q += 'isbn:' + isbn
        elif title or authors:

            def build_term(prefix, parts):
                return ' '.join('in' + prefix + ':' + x for x in parts)

            title_tokens = list(self.get_title_tokens(title))
            if title_tokens:
                q += build_term('title', title_tokens)
            author_tokens = self.get_author_tokens(authors,
                                                   only_first_author=True)
            if author_tokens:
                q += ('+' if q else '') + build_term('author', author_tokens)

        if isinstance(q, unicode):
            q = q.encode('utf-8')
        if not q:
            return None
        return BASE_URL + urlencode({
            'q': q,
            'max-results': 20,
            'start-index': 1,
            'min-viewability': 'none',
        })
Example #20
0
 def create_query(self, log, title=None, authors=None, identifiers={}):
     try:
         from urllib.parse import urlencode
     except ImportError:
         from urllib import urlencode
     import time
     BASE_URL = ('https://www.edelweiss.plus/GetTreelineControl.aspx?'
     'controlName=/uc/listviews/controls/ListView_data.ascx&itemID=0&resultType=32&dashboardType=8&itemType=1&dataType=products&keywordSearch&')
     keywords = []
     isbn = check_isbn(identifiers.get('isbn', None))
     if isbn is not None:
         keywords.append(isbn)
     elif title:
         title_tokens = list(self.get_title_tokens(title))
         if title_tokens:
             keywords.extend(title_tokens)
         author_tokens = self.get_author_tokens(authors, only_first_author=True)
         if author_tokens:
             keywords.extend(author_tokens)
     if not keywords:
         return None
     params = {
         'q': (' '.join(keywords)).encode('utf-8'),
         '_': str(int(time.time()))
     }
     return BASE_URL+urlencode(params)
Example #21
0
    def identify(self, log, result_queue, abort, title=None, authors=None,
              identifiers={}, timeout=30):

        md = self.worker_class(self.browser, timeout)

        d = {}
        idval = identifiers.get(self.idkey, None)
        isbn = identifiers.get('isbn', None)

        if idval: d['id'] = idval
        if isbn: d['isbn'] = isbn
        if title: d['title'] = title
        if authors: d['authors'] = authors

        md.query(d, maxresults = self.maxresults)

        while not abort.is_set():
            md.join(0.2)
            if abort.is_set(): break
            if not md.is_alive(): break

        time.sleep(self.sleep_time)

        if not abort.is_set():
            for i in range(0,len(md.ans)):
                mi = self.data2mi(md.ans[i])
                mi.source_relevance = i                # Less means more relevant.
                mi.isbn = check_isbn(mi.isbn)

                result_queue.put(mi)
        return None
Example #22
0
    def create_query(self, log, title=None, authors=None, identifiers={}, domain=None):  # {{{
        from urllib import urlencode

        if domain is None:
            domain = self.domain

        idomain, asin = self.get_domain_and_asin(identifiers)
        if idomain is not None:
            domain = idomain

        # See the amazon detailed search page to get all options
        q = {"search-alias": "aps", "unfiltered": "1"}

        if domain == "com":
            q["sort"] = "relevanceexprank"
        else:
            q["sort"] = "relevancerank"

        isbn = check_isbn(identifiers.get("isbn", None))

        if asin is not None:
            q["field-keywords"] = asin
        elif isbn is not None:
            q["field-isbn"] = isbn
        else:
            # Only return book results
            q["search-alias"] = {"br": "digital-text", "nl": "aps"}.get(domain, "stripbooks")
            if title:
                title_tokens = list(self.get_title_tokens(title))
                if title_tokens:
                    q["field-title"] = " ".join(title_tokens)
            if authors:
                author_tokens = self.get_author_tokens(authors, only_first_author=True)
                if author_tokens:
                    q["field-author"] = " ".join(author_tokens)

        if not ("field-keywords" in q or "field-isbn" in q or ("field-title" in q)):
            # Insufficient metadata to make an identify query
            return None, None

        # magic parameter to enable Japanese Shift_JIS encoding.
        if domain == "jp":
            q["__mk_ja_JP"] = "カタカナ"
        if domain == "nl":
            q["__mk_nl_NL"] = "ÅMÅŽÕÑ"
            if "field-keywords" not in q:
                q["field-keywords"] = ""
            for f in "field-isbn field-title field-author".split():
                q["field-keywords"] += " " + q.pop(f, "")
            q["field-keywords"] = q["field-keywords"].strip()

        if domain == "jp":
            encode_to = "Shift_JIS"
        elif domain == "nl":
            encode_to = "utf-8"
        else:
            encode_to = "latin1"
        encoded_q = dict([(x.encode(encode_to, "ignore"), y.encode(encode_to, "ignore")) for x, y in q.iteritems()])
        url = "http://www.amazon.%s/s/?" % self.get_website_domain(domain) + urlencode(encoded_q)
        return url, domain
Example #23
0
    def create_query(self, log, title=None, authors=None, identifiers={}):  # {{{
        from urllib import urlencode
        BASE_URL = 'https://books.google.com/books/feeds/volumes?'
        isbn = check_isbn(identifiers.get('isbn', None))
        q = ''
        if isbn is not None:
            q += 'isbn:'+isbn
        elif title or authors:
            def build_term(prefix, parts):
                return ' '.join('in'+prefix + ':' + x for x in parts)
            title_tokens = list(self.get_title_tokens(title))
            if title_tokens:
                q += build_term('title', title_tokens)
            author_tokens = self.get_author_tokens(authors,
                    only_first_author=True)
            if author_tokens:
                q += ('+' if q else '') + build_term('author',
                        author_tokens)

        if isinstance(q, unicode):
            q = q.encode('utf-8')
        if not q:
            return None
        return BASE_URL+urlencode({
            'q':q,
            'max-results':20,
            'start-index':1,
            'min-viewability':'none',
            })
Example #24
0
    def create_query(self, log, title=None, authors=None, identifiers={}):  # {{{
        from urllib import urlencode

        BASE_URL = "http://edelweiss.abovethetreeline.com/CatalogOverview.aspx?"
        params = {"group": "search", "searchType": 999, "searchOrgID": "", "dateRange": 0, "isbn": ""}
        for num in (0, 1, 2, 3, 4, 5, 6, 200, 201, 202, 204):
            params["condition%d" % num] = 1
            params["keywords%d" % num] = ""
        title_key, author_key = "keywords200", "keywords201"

        isbn = check_isbn(identifiers.get("isbn", None))
        found = False
        if isbn is not None:
            params["isbn"] = isbn
            found = True
        elif title or authors:
            title_tokens = list(self.get_title_tokens(title))
            if title_tokens:
                params[title_key] = " ".join(title_tokens)
                found = True
            author_tokens = self.get_author_tokens(authors, only_first_author=True)
            if author_tokens:
                params[author_key] = " ".join(author_tokens)
                found = True

        if not found:
            return None

        for k in (title_key, author_key, "isbn"):
            v = params[k]
            if isinstance(v, unicode):
                params[k] = v.encode("utf-8")

        return BASE_URL + urlencode(params)
Example #25
0
def _format_isbn(log, isbn):  # {{{
    # for now only RUS ISBN are supported
    # http://ru.wikipedia.org/wiki/ISBN_российских_издательств
    isbn_pat = re.compile(r"""
        ^
        (\d{3})?            # match GS1 Prefix for ISBN13
        (5)                 # group identifier for Russian-speaking countries
        (                   # begin variable length for Publisher
            [01]\d{1}|      # 2x
            [2-6]\d{2}|     # 3x
            7\d{3}|         # 4x (starting with 7)
            8[0-4]\d{2}|    # 4x (starting with 8)
            9[2567]\d{2}|   # 4x (starting with 9)
            99[26]\d{1}|    # 4x (starting with 99)
            8[5-9]\d{3}|    # 5x (starting with 8)
            9[348]\d{3}|    # 5x (starting with 9)
            900\d{2}|       # 5x (starting with 900)
            91[0-8]\d{2}|   # 5x (starting with 91)
            90[1-9]\d{3}|   # 6x (starting with 90)
            919\d{3}|       # 6x (starting with 919)
            99[^26]\d{4}    # 7x (starting with 99)
        )                   # end variable length for Publisher
        (\d+)               # Title
        ([\dX])             # Check digit
        $
    """, re.VERBOSE)

    res = check_isbn(isbn)
    if res:
        m = isbn_pat.match(res)
        if m:
            res = '-'.join([g for g in m.groups() if g])
        else:
            log.error('cannot format ISBN %s. Fow now only russian ISBNs are supported' % isbn)
    return res
Example #26
0
    def get_all_details(self, log, metadata, abort, result_queue, identifiers,
                        timeout):  # {{{
        req_isbn = identifiers.get('isbn', None)

        for mi in metadata:
            if abort.is_set():
                break
            try:
                ozon_id = mi.identifiers['ozon']

                try:
                    self.get_book_details(log, mi, timeout)
                except:
                    log.exception(u'Failed to get details for metadata: %s' %
                                  mi.title)

                all_isbns = getattr(mi, 'all_isbns', [])
                if req_isbn and all_isbns and check_isbn(
                        req_isbn) not in all_isbns:
                    log.debug(u'skipped, no requested ISBN %s found' %
                              req_isbn)
                    continue

                for isbn in all_isbns:
                    self.cache_isbn_to_identifier(isbn, ozon_id)

                if mi.ozon_cover_url:
                    self.cache_identifier_to_cover_url(ozon_id,
                                                       mi.ozon_cover_url)

                self.clean_downloaded_metadata(mi)
                result_queue.put(mi)
            except:
                log.exception(u'Failed to get details for metadata: %s' %
                              mi.title)
Example #27
0
 def test(mi):
     misbn = check_isbn(mi.isbn)
     if misbn and misbn == isbn_:
         return True
     prints('ISBN test failed. Expected: \'%s\' found \'%s\'' %
            (isbn_, misbn))
     return False
Example #28
0
	def create_query(self, log, title=None, authors=None, identifiers={}):
		isbn = check_isbn(identifiers.get('isbn', None))
		if isbn is not None:
			return '%stype=ISBN&arg=%s' % (ISFDB.SEARCH_URL, isbn)
		
		if title:
			title = title.replace('?', '')
			title_tokens = self.get_title_tokens(title, strip_joiners=False, strip_subtitle=True)
			title_tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in title_tokens]
			search_title = '+'.join(title_tokens)
		if authors:
			author_tokens = self.get_author_tokens(authors, only_first_author=True)
			author_tokens = [quote(t.encode('utf-8') if isinstance(t, unicode) else t) for t in author_tokens]
			search_author = '+'.join(author_tokens)
			
		log.info("TITLE %s" % search_title)
		log.info("AUTHOR %s" % search_author)
			
		if not title_tokens and not author_tokens:
			return None
		elif title_tokens and author_tokens:
			# Currently the third term is unused; we could pass in a publisher.
			return '%sUSE_1=pub_title&OPERATOR_1=contains&TERM_1=%s&CONJUNCTION_1=AND' \
			'&USE_2=author_canonical&OPERATOR_2=contains&TERM_2=%s' \
			'&ORDERBY=pub_title&START=0&TYPE=Publication' % (ISFDB.ADV_SEARCH_URL, search_title, search_author)
		elif title_tokens:
			return '%sUSE_1=pub_title&OPERATOR_1=contains&TERM_1=%s' \
			'&ORDERBY=pub_title&START=0&TYPE=Publication' % (ISFDB.ADV_SEARCH_URL, search_title)
		elif author_tokens:
			return '%sUSE_1=author_canonical&OPERATOR_1=contains&TERM_1=%s' \
			'&ORDERBY=pub_title&START=0&TYPE=Publication' % (ISFDB.ADV_SEARCH_URL, search_author)
Example #29
0
 def get_book_url(self, identifiers):
     aladin_id = identifiers.get('aladin.co.kr', None)
     isbn = check_isbn(identifiers.get('isbn', None))
     if isbn:
         return ('aladin.co.kr', aladin_id, '%s/shop/wproduct.aspx?ISBN=%s' % (Aladin_co_kr.BASE_URL, isbn))
     elif aladin_id:
         return ('aladin.co.kr', aladin_id, '%s/shop/wproduct.aspx?ItemId=%s' % (Aladin_co_kr.BASE_URL, aladin_id))
Example #30
0
 def accept(self):
     isbn = unicode(self.line_edit.text())
     if not check_isbn(isbn):
         return error_dialog(self, _('Invalid ISBN'),
                 _('The ISBN you entered is not valid. Try again.'),
                 show=True)
     QDialog.accept(self)
Example #31
0
def get_metadata(stream, extract_cover=True):
    zin = zipfile.ZipFile(stream, 'r')
    odfs = odfmetaparser()
    parser = xml.sax.make_parser()
    parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    parser.setContentHandler(odfs)
    content = zin.read('meta.xml')
    parser.parse(StringIO(content))
    data = odfs.seenfields
    mi = MetaInformation(None, [])
    if 'title' in data:
        mi.title = data['title']
    if data.get('initial-creator', '').strip():
        mi.authors = string_to_authors(data['initial-creator'])
    elif 'creator' in data:
        mi.authors = string_to_authors(data['creator'])
    if 'description' in data:
        mi.comments = data['description']
    if 'language' in data:
        mi.language = data['language']
    if data.get('keywords', ''):
        mi.tags = [x.strip() for x in data['keywords'].split(',') if x.strip()]
    opfmeta = False  # we need this later for the cover
    opfnocover = False
    if data.get('opf.metadata','') == 'true':
        # custom metadata contains OPF information
        opfmeta = True
        if data.get('opf.titlesort', ''):
            mi.title_sort = data['opf.titlesort']
        if data.get('opf.authors', ''):
            mi.authors = string_to_authors(data['opf.authors'])
        if data.get('opf.authorsort', ''):
            mi.author_sort = data['opf.authorsort']
        if data.get('opf.isbn', ''):
            isbn = check_isbn(data['opf.isbn'])
            if isbn is not None:
                mi.isbn = isbn
        if data.get('opf.publisher', ''):
            mi.publisher = data['opf.publisher']
        if data.get('opf.pubdate', ''):
            mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
        if data.get('opf.series', ''):
            mi.series = data['opf.series']
            if data.get('opf.seriesindex', ''):
                try:
                    mi.series_index = float(data['opf.seriesindex'])
                except ValueError:
                    mi.series_index = 1.0
        if data.get('opf.language', ''):
            cl = canonicalize_lang(data['opf.language'])
            if cl:
                mi.languages = [cl]
        opfnocover = data.get('opf.nocover', 'false') == 'true'
    if not opfnocover:
        try:
            read_cover(stream, zin, mi, opfmeta, extract_cover)
        except:
            pass  # Do not let an error reading the cover prevent reading other data

    return mi
Example #32
0
 def create_query(self, log, title=None, authors=None, identifiers={}):
     try:
         from urllib.parse import urlencode
     except ImportError:
         from urllib import urlencode
     import time
     BASE_URL = 'https://search.jd.com/Search?'
     keywords = []
     isbn = check_isbn(identifiers.get('isbn', None))
     if isbn is not None:
         keywords.append(isbn)
     elif title:
         title_tokens = list(self.get_title_tokens(title))
         if title_tokens:
             keywords.extend(title_tokens)
         if self.prefs['add_authors']:
             author_tokens = self.get_author_tokens(authors,
                                                    only_first_author=True)
             if author_tokens:
                 keywords.extend(author_tokens)
     if not keywords:
         return None
     word = (' '.join(keywords)).encode('utf-8')
     params = {'keyword': word, 'enc': 'utf-8', 'wp': word, 'book': 'y'}
     return BASE_URL + urlencode(params)
Example #33
0
 def parse_new_details(self, root, mi, non_hero):
     table = non_hero.xpath('descendant::table')[0]
     for tr in table.xpath('descendant::tr'):
         cells = tr.xpath('descendant::td')
         if len(cells) == 2:
             name = self.totext(cells[0])
             val = self.totext(cells[1])
             if not val:
                 continue
             if name in self.language_names:
                 ans = self.lang_map.get(val, None)
                 if not ans:
                     ans = canonicalize_lang(val)
                 if ans:
                     mi.language = ans
             elif name in self.publisher_names:
                 pub = val.partition(';')[0].partition('(')[0].strip()
                 if pub:
                     mi.publisher = pub
                 date = val.rpartition('(')[-1].replace(')', '').strip()
                 try:
                     from calibre.utils.date import parse_only_date
                     date = self.delocalize_datestr(date)
                     mi.pubdate = parse_only_date(date, assume_utc=True)
                 except:
                     self.log.exception('Failed to parse pubdate: %s' % val)
             elif name in {'ISBN', 'ISBN-10', 'ISBN-13'}:
                 ans = check_isbn(val)
                 if ans:
                     self.isbn = mi.isbn = ans
Example #34
0
 def create_query(self, log, title=None, authors=None, identifiers={}):
     try:
         from urllib.parse import urlencode
     except ImportError:
         from urllib import urlencode
     import time
     BASE_URL = (
         'https://www.edelweiss.plus/GetTreelineControl.aspx?'
         'controlName=/uc/listviews/controls/ListView_data.ascx&itemID=0&resultType=32&dashboardType=8&itemType=1&dataType=products&keywordSearch&'
     )
     keywords = []
     isbn = check_isbn(identifiers.get('isbn', None))
     if isbn is not None:
         keywords.append(isbn)
     elif title:
         title_tokens = list(self.get_title_tokens(title))
         if title_tokens:
             keywords.extend(title_tokens)
         author_tokens = self.get_author_tokens(authors,
                                                only_first_author=True)
         if author_tokens:
             keywords.extend(author_tokens)
     if not keywords:
         return None
     params = {
         'q': (' '.join(keywords)).encode('utf-8'),
         '_': type('')(int(time.time()))
     }
     return BASE_URL + urlencode(params)
Example #35
0
    def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log):
        from html5_parser import parse
        from lxml import html
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.library.comments import sanitize_comments_html

        try:
            raw = br.open_novisit(metadata_url).read()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                return False
            raise
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]

        try:
            root = parse(raw, maybe_xhtml=False, sanitize_names=True)
        except Exception:
            return False

        pub_date = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblPubDate']/text()")
        lang = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblLanguage']/text()")
        subjects = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblSubjects']/text()")
        ebook_isbn = root.xpath("//td/label[@id='ctl00_ContentPlaceHolder1_lblIdentifier']/text()")
        desc = root.xpath("//div/label[@id='ctl00_ContentPlaceHolder1_lblDescription']/ancestor::div[1]")

        if pub_date:
            from calibre.utils.date import parse_date
            try:
                mi.pubdate = parse_date(pub_date[0].strip())
            except:
                pass
        if lang:
            lang = lang[0].strip().lower()
            lang = {'english':'eng', 'french':'fra', 'german':'deu',
                    'spanish':'spa'}.get(lang, None)
            if lang:
                mi.language = lang

        if ebook_isbn:
            # print "ebook isbn is "+str(ebook_isbn[0])
            isbn = check_isbn(ebook_isbn[0].strip())
            if isbn:
                self.cache_isbn_to_identifier(isbn, ovrdrv_id)
                mi.isbn = isbn
        if subjects:
            mi.tags = [tag.strip() for tag in subjects[0].split(',')]

        if desc:
            desc = desc[0]
            desc = html.tostring(desc, method='html', encoding='unicode').strip()
            # remove all attributes from tags
            desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
            # Remove comments
            desc = re.sub(r'(?s)<!--.*?-->', '', desc)
            mi.comments = sanitize_comments_html(desc)

        return None
Example #36
0
 def clean_downloaded_metadata(self, mi):
     docase = mi.language == "eng" or (mi.is_null("language") and self.domain in {"com", "uk"})
     if mi.title and docase:
         mi.title = fixcase(mi.title)
     mi.authors = fixauthors(mi.authors)
     if mi.tags and docase:
         mi.tags = list(map(fixcase, mi.tags))
     mi.isbn = check_isbn(mi.isbn)
Example #37
0
 def clean_downloaded_metadata(self, mi):
     docase = (mi.language == 'eng')
     if mi.title and docase:
         mi.title = fixcase(mi.title)
     mi.authors = fixauthors(mi.authors)
     if mi.tags and docase:
         mi.tags = list(map(fixcase, mi.tags))
     mi.isbn = check_isbn(mi.isbn)
Example #38
0
def _format_isbn(log, isbn):  # {{{
    res = check_isbn(isbn)
    if res:
        m = isbn_pat.match(res)
        if m:
            res = '-'.join([g for g in m.groups() if g])
        else:
            log.error('cannot format isbn %s'%isbn)
    return res
Example #39
0
 def parse_isbn(self, pd):
     items = pd.xpath('descendant::*[starts-with(text(), "ISBN")]')
     if not items:
         items = pd.xpath('descendant::b[contains(text(), "ISBN:")]')
     for x in reversed(items):
         if x.tail:
             ans = check_isbn(x.tail.strip())
             if ans:
                 return ans
Example #40
0
 def accept(self):
     isbn = unicode(self.line_edit.text())
     if not check_isbn(isbn):
         return error_dialog(
             self,
             _('Invalid ISBN'),
             _('The ISBN you entered is not valid. Try again.'),
             show=True)
     QDialog.accept(self)
Example #41
0
 def parse_isbn(self, pd):
     items = pd.xpath('descendant::*[starts-with(text(), "ISBN")]')
     if not items:
         items = pd.xpath('descendant::b[contains(text(), "ISBN:")]')
     for x in reversed(items):
         if x.tail:
             ans = check_isbn(x.tail.strip())
             if ans:
                 return ans
Example #42
0
    def create_query(self,
                     log,
                     title=None,
                     authors=None,
                     identifiers={}):  # {{{
        try:
            from urllib.parse import urlencode
        except ImportError:
            from urllib import urlencode
        SEARCH_URL = 'https://api.douban.com/v2/book/search?count=10&'
        ISBN_URL = 'https://api.douban.com/v2/book/isbn/'
        SUBJECT_URL = 'https://api.douban.com/v2/book/'

        q = ''
        t = None
        isbn = check_isbn(identifiers.get('isbn', None))
        subject = identifiers.get('douban', None)
        if isbn is not None:
            q = isbn
            t = 'isbn'
        elif subject is not None:
            q = subject
            t = 'subject'
        elif title or authors:

            def build_term(prefix, parts):
                return ' '.join(x for x in parts)

            title_tokens = list(self.get_title_tokens(title))
            if title_tokens:
                q += build_term('title', title_tokens)
            author_tokens = list(
                self.get_author_tokens(authors, only_first_author=True))
            if author_tokens:
                q += ((' ' if q != '' else '') +
                      build_term('author', author_tokens))
            t = 'search'
        q = q.strip()
        if isinstance(q, type(u'')):
            q = q.encode('utf-8')
        if not q:
            return None
        url = None
        if t == "isbn":
            url = ISBN_URL + q
        elif t == 'subject':
            url = SUBJECT_URL + q
        else:
            url = SEARCH_URL + urlencode({
                'q': q,
            })
        if self.DOUBAN_API_KEY and self.DOUBAN_API_KEY != '':
            if t == "isbn" or t == "subject":
                url = url + "?apikey=" + self.DOUBAN_API_KEY
            else:
                url = url + "&apikey=" + self.DOUBAN_API_KEY
        return url
Example #43
0
    def create_queries(self, log, title=None, authors=None, identifiers={}):

        queries = []
        isbn = check_isbn(identifiers.get('isbn', None))
        if isbn is not None:
            queries.append('bath.isbn=%s' % (isbn))
        else:
            queries.append('dc.title=%s and dc.author=%s' % (title, authors))
        return queries
Example #44
0
    def get_cached_cover_url(self, identifiers):
        """
        """
        book_id = identifiers.get("databazeknih", None)
        if book_id is None:
            book_id = check_isbn(identifiers.get("isbn", None))

        url = self.cached_identifier_to_cover_url(book_id)
        return url
Example #45
0
def _format_isbn(log, isbn):  # {{{
    res = check_isbn(isbn)
    if res:
        m = isbn_pat.match(res)
        if m:
            res = '-'.join([g for g in m.groups() if g])
        else:
            log.error('cannot format isbn %s' % isbn)
    return res
Example #46
0
 def clean_downloaded_metadata(self, mi):
     docase = (mi.language == 'eng'
               or (mi.is_null('language') and self.domain in {'com', 'uk'}))
     if mi.title and docase:
         mi.title = fixcase(mi.title)
     mi.authors = fixauthors(mi.authors)
     if mi.tags and docase:
         mi.tags = list(map(fixcase, mi.tags))
     mi.isbn = check_isbn(mi.isbn)
Example #47
0
def _parse_isbn(root, mi, ctx):
    # some people try to put several isbn in this field, but it is not allowed.  try to stick to the 1-st one in this case
    isbn = ctx.XPath('normalize-space(//fb:publish-info/fb:isbn/text())')(root)
    if isbn:
        # some people try to put several isbn in this field, but it is not allowed.  try to stick to the 1-st one in this case
        if ',' in isbn:
            isbn = isbn[:isbn.index(',')]
        if check_isbn(isbn):
            mi.isbn = isbn
Example #48
0
    def create_query(self, log, title=None, authors=None, identifiers={},  # {{{
            domain=None):
        from urllib import urlencode
        if domain is None:
            domain = self.domain

        idomain, asin = self.get_domain_and_asin(identifiers)
        if idomain is not None:
            domain = idomain

        # See the amazon detailed search page to get all options
        q = {'search-alias': 'aps',
             'unfiltered': '1',
            }

        if domain == 'com':
            q['sort'] = 'relevanceexprank'
        else:
            q['sort'] = 'relevancerank'

        isbn = check_isbn(identifiers.get('isbn', None))

        if asin is not None:
            q['field-keywords'] = asin
        elif isbn is not None:
            q['field-isbn'] = isbn
        else:
            # Only return book results
            q['search-alias'] = 'digital-text' if domain == 'br' else 'stripbooks'
            if title:
                title_tokens = list(self.get_title_tokens(title))
                if title_tokens:
                    q['field-title'] = ' '.join(title_tokens)
            if authors:
                author_tokens = self.get_author_tokens(authors,
                        only_first_author=True)
                if author_tokens:
                    q['field-author'] = ' '.join(author_tokens)

        if not ('field-keywords' in q or 'field-isbn' in q or
                ('field-title' in q)):
            # Insufficient metadata to make an identify query
            return None, None

        # magic parameter to enable Japanese Shift_JIS encoding.
        if domain == 'jp':
            q['__mk_ja_JP'] = u'カタカナ'

        if domain == 'jp':
            encode_to = 'Shift_JIS'
        else:
            encode_to = 'latin1'
        encoded_q = dict([(x.encode(encode_to, 'ignore'), y.encode(encode_to,
            'ignore')) for x, y in
            q.iteritems()])
        url = 'http://www.amazon.%s/s/?'%self.get_website_domain(domain) + urlencode(encoded_q)
        return url, domain
Example #49
0
def _parse_isbn(root, mi, ctx):
    # some people try to put several isbn in this field, but it is not allowed.  try to stick to the 1-st one in this case
    isbn = ctx.XPath('normalize-space(//fb:publish-info/fb:isbn/text())')(root)
    if isbn:
        # some people try to put several isbn in this field, but it is not allowed.  try to stick to the 1-st one in this case
        if ',' in isbn:
            isbn = isbn[:isbn.index(',')]
        if check_isbn(isbn):
            mi.isbn = isbn
Example #50
0
    def create_query(self, log, title=None, authors=None, identifiers={},  # {{{
            domain=None):
        from urllib import urlencode
        if domain is None:
            domain = self.domain

        idomain, asin = self.get_domain_and_asin(identifiers)
        if idomain is not None:
            domain = idomain

        # See the amazon detailed search page to get all options
        q = {'search-alias': 'aps',
             'unfiltered': '1',
            }

        if domain == 'com':
            q['sort'] = 'relevanceexprank'
        else:
            q['sort'] = 'relevancerank'

        isbn = check_isbn(identifiers.get('isbn', None))

        if asin is not None:
            q['field-keywords'] = asin
        elif isbn is not None:
            q['field-isbn'] = isbn
        else:
            # Only return book results
            q['search-alias'] = 'digital-text' if domain == 'br' else 'stripbooks'
            if title:
                title_tokens = list(self.get_title_tokens(title))
                if title_tokens:
                    q['field-title'] = ' '.join(title_tokens)
            if authors:
                author_tokens = self.get_author_tokens(authors,
                        only_first_author=True)
                if author_tokens:
                    q['field-author'] = ' '.join(author_tokens)

        if not ('field-keywords' in q or 'field-isbn' in q or
                ('field-title' in q)):
            # Insufficient metadata to make an identify query
            return None, None

        # magic parameter to enable Japanese Shift_JIS encoding.
        if domain == 'jp':
            q['__mk_ja_JP'] = u'カタカナ'

        if domain == 'jp':
            encode_to = 'Shift_JIS'
        else:
            encode_to = 'latin1'
        encoded_q = dict([(x.encode(encode_to, 'ignore'), y.encode(encode_to,
            'ignore')) for x, y in
            q.iteritems()])
        url = 'http://www.amazon.%s/s/?'%self.get_website_domain(domain) + urlencode(encoded_q)
        return url, domain
Example #51
0
    def create_query(self, log, title=None, authors=None, identifiers={}):  # {{{
        try:
            from urllib.parse import urlencode
        except ImportError:
            from urllib import urlencode
        SEARCH_URL = "https://api.douban.com/v2/book/search?count=10&"
        ISBN_URL = "https://api.douban.com/v2/book/isbn/"
        SUBJECT_URL = "https://api.douban.com/v2/book/"

        q = ""
        t = None
        isbn = check_isbn(identifiers.get("isbn", None))
        subject = identifiers.get("douban", None)
        if isbn is not None:
            q = isbn
            t = "isbn"
        elif subject is not None:
            q = subject
            t = "subject"
        elif title or authors:

            def build_term(prefix, parts):
                return " ".join(x for x in parts)

            title_tokens = list(self.get_title_tokens(title))
            if title_tokens:
                q += build_term("title", title_tokens)
            author_tokens = list(
                self.get_author_tokens(authors, only_first_author=True)
            )
            if author_tokens:
                q += (" " if q != "" else "") + build_term("author", author_tokens)
            t = "search"
        q = q.strip()
        # if isinstance(q, type("")):
        #    q = q.encode("utf-8")
        q = str(q)
        if not q:
            return None
        url = None
        if t == "isbn":
            url = ISBN_URL + q
        elif t == "subject":
            url = SUBJECT_URL + q
        else:
            url = SEARCH_URL + urlencode(
                {
                    "q": q,
                }
            )
        if self.prefs.get("apikey"):
            if t == "isbn" or t == "subject":
                url = url + "?apikey=" + self.prefs["apikey"]
            else:
                url = url + "&apikey=" + self.prefs["apikey"]
        return url
Example #52
0
 def clean_downloaded_metadata(self, mi):
     docase = (
         mi.language == 'eng' or mi.is_null('language')
     )
     if mi.title and docase:
         mi.title = fixcase(mi.title)
     mi.authors = fixauthors(mi.authors)
     if mi.tags and docase:
         mi.tags = list(map(fixcase, mi.tags))
     mi.isbn = check_isbn(mi.isbn)
Example #53
0
	def get_cached_cover_url(self, identifiers):
		url = None
		databazeknih_id = identifiers.get(u'databazeknih', None)
		if databazeknih_id is None:
			isbn = check_isbn(identifiers.get(u'isbn', None))
			if isbn is not None:
				databazeknih_id = self.cached_isbn_to_identifier(isbn)
		if databazeknih_id is not None:
			url = self.cached_identifier_to_cover_url(databazeknih_id)
			return url
Example #54
0
 def get_cached_cover_url(self, identifiers):
     url = None
     moly_id = identifiers.get('moly_hu', None)
     if moly_id is None:
         isbn = check_isbn(identifiers.get('isbn', None))
         if isbn is not None:
             moly_id = self.cached_isbn_to_identifier(isbn)
     if moly_id is not None:
         url = self.cached_identifier_to_cover_url(moly_id)
     return url
Example #55
0
 def clean_downloaded_metadata(self, mi):
     '''
     Call this method in your plugin's identify method to normalize metadata
     before putting the Metadata object into result_queue. You can of
     course, use a custom algorithm suited to your metadata source.
     '''
     if mi.title:
         mi.title = fixcase(mi.title)
     mi.authors = fixauthors(mi.authors)
     mi.tags = list(map(fixcase, mi.tags))
     mi.isbn = check_isbn(mi.isbn)
Example #56
0
def isbn_test(isbn):
    isbn_ = check_isbn(isbn)

    def test(mi):
        misbn = check_isbn(mi.isbn)
        if misbn and misbn == isbn_:
            return True
        prints('ISBN test failed. Expected: \'%s\' found \'%s\''%(isbn_, misbn))
        return False

    return test
Example #57
0
 def clean_downloaded_metadata(self, mi):
     '''
     Call this method in your plugin's identify method to normalize metadata
     before putting the Metadata object into result_queue. You can of
     course, use a custom algorithm suited to your metadata source.
     '''
     if mi.title:
         mi.title = fixcase(mi.title)
     mi.authors = fixauthors(mi.authors)
     mi.tags = list(map(fixcase, mi.tags))
     mi.isbn = check_isbn(mi.isbn)
Example #58
0
 def paste_isbn(self):
     text = unicode(QApplication.clipboard().text()).strip()
     if not text or not check_isbn(text):
         d = ISBNDialog(self, text)
         if not d.exec_():
             return
         text = d.text()
         if not text:
             return
     vals = self.current_val
     vals['isbn'] = text
     self.current_val = vals
Example #59
0
 def clean_downloaded_metadata(self, mi):
     """
     Call this method in your plugin's identify method to normalize metadata
     before putting the Metadata object into result_queue. You can of
     course, use a custom algorithm suited to your metadata source.
     """
     docase = mi.language == "eng" or mi.is_null("language")
     if docase and mi.title:
         mi.title = fixcase(mi.title)
     mi.authors = fixauthors(mi.authors)
     if mi.tags and docase:
         mi.tags = list(map(fixcase, mi.tags))
     mi.isbn = check_isbn(mi.isbn)
Example #60
0
 def checkText(self, txt):
     isbn = unicode(txt)
     if not isbn:
         col = 'none'
         extra = ''
     elif check_isbn(isbn) is not None:
         col = 'rgba(0,255,0,20%)'
         extra = _('This ISBN number is valid')
     else:
         col = 'rgba(255,0,0,20%)'
         extra = _('This ISBN number is invalid')
     self.line_edit.setToolTip(extra)
     self.line_edit.setStyleSheet('QLineEdit { background-color: %s }'%col)