Ejemplo n.º 1
0
def field_from_string(field, raw, field_metadata):
    ''' Parse the string raw to return an object that is suitable for calling
    set() on a Metadata object. '''
    dt = field_metadata['datatype']
    val = object
    if dt in {'int', 'float'}:
        val = int(raw) if dt == 'int' else float(raw)
    elif dt == 'rating':
        val = float(raw) * 2
    elif dt == 'datetime':
        from calibre.utils.date import parse_only_date
        val = parse_only_date(raw)
    elif dt == 'bool':
        if raw.lower() in {'true', 'yes', 'y'}:
            val = True
        elif raw.lower() in {'false', 'no', 'n'}:
            val = False
        else:
            raise ValueError('Unknown value for %s: %s'%(field, raw))
    elif dt == 'text':
        ism = field_metadata['is_multiple']
        if ism:
            val = [x.strip() for x in raw.split(ism['ui_to_list'])]
            if field == 'identifiers':
                val = {x.partition(':')[0]:x.partition(':')[-1] for x in val}
            elif field == 'languages':
                from calibre.utils.localization import canonicalize_lang
                val = [canonicalize_lang(x) for x in val]
                val = [x for x in val if x]
    if val is object:
        val = raw
    return val
Ejemplo n.º 2
0
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.utils.date import parse_only_date
    from calibre.db.write import get_series_values
    if 'meta' not in extensions:
        extensions.append('meta')
    md = create_markdown_object(extensions)
    html = md.convert(txt)
    mi = Metadata(title or _('Unknown'))
    m = md.Meta
    for k, v in iteritems({'date':'pubdate', 'summary':'comments'}):
        if v not in m and k in m:
            m[v] = m.pop(k)
    for k in 'title authors series tags pubdate comments publisher rating'.split():
        val = m.get(k)
        if val:
            mf = mi.metadata_for_field(k)
            if not mf.get('is_multiple'):
                val = val[0]
            if k == 'series':
                val, si = get_series_values(val)
                mi.series_index = 1 if si is None else si
            if k == 'rating':
                try:
                    val = max(0, min(int(float(val)), 10))
                except Exception:
                    continue
            if mf.get('datatype') == 'datetime':
                try:
                    val = parse_only_date(val, assume_utc=False)
                except Exception:
                    continue
            setattr(mi, k, val)
    return mi, HTML_TEMPLATE % (mi.title, html)
Ejemplo n.º 3
0
def convert_markdown_with_metadata(txt, title='', extensions=DEFAULT_MD_EXTENSIONS):
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.utils.date import parse_only_date
    from calibre.db.write import get_series_values
    if 'meta' not in extensions:
        extensions.append('meta')
    md = create_markdown_object(extensions)
    html = md.convert(txt)
    mi = Metadata(title or _('Unknown'))
    m = md.Meta
    for k, v in {'date':'pubdate', 'summary':'comments'}.iteritems():
        if v not in m and k in m:
            m[v] = m.pop(k)
    for k in 'title authors series tags pubdate comments publisher rating'.split():
        val = m.get(k)
        if val:
            mf = mi.metadata_for_field(k)
            if not mf.get('is_multiple'):
                val = val[0]
            if k == 'series':
                val, si = get_series_values(val)
                mi.series_index = 1 if si is None else si
            if k == 'rating':
                try:
                    val = max(0, min(int(float(val)), 10))
                except Exception:
                    continue
            if mf.get('datatype') == 'datetime':
                try:
                    val = parse_only_date(val, assume_utc=False)
                except Exception:
                    continue
            setattr(mi, k, val)
    return mi, HTML_TEMPLATE % (mi.title, html)
Ejemplo n.º 4
0
 def parse_pubdate(self, pd):
     for x in reversed(pd.xpath(self.publisher_xpath)):
         if x.tail:
             ans = x.tail
             date = ans.rpartition('(')[-1].replace(')', '').strip()
             date = self.delocalize_datestr(date)
             return parse_only_date(date, assume_utc=True)
Ejemplo n.º 5
0
 def parse_new_details(self, root, mi, non_hero):
     table = non_hero.xpath('descendant::table')[0]
     for tr in table.xpath('descendant::tr'):
         cells = tr.xpath('descendant::td')
         if len(cells) == 2:
             name = self.totext(cells[0])
             val = self.totext(cells[1])
             if not val:
                 continue
             if name in self.language_names:
                 ans = self.lang_map.get(val, None)
                 if not ans:
                     ans = canonicalize_lang(val)
                 if ans:
                     mi.language = ans
             elif name in self.publisher_names:
                 pub = val.partition(';')[0].partition('(')[0].strip()
                 if pub:
                     mi.publisher = pub
                 date = val.rpartition('(')[-1].replace(')', '').strip()
                 try:
                     from calibre.utils.date import parse_only_date
                     date = self.delocalize_datestr(date)
                     mi.pubdate = parse_only_date(date, assume_utc=True)
                 except:
                     self.log.exception('Failed to parse pubdate: %s' % val)
             elif name in {'ISBN', 'ISBN-10', 'ISBN-13'}:
                 ans = check_isbn(val)
                 if ans:
                     self.isbn = mi.isbn = ans
Ejemplo n.º 6
0
 def parse_new_details(self, root, mi, non_hero):
     table = non_hero.xpath('descendant::table')[0]
     for tr in table.xpath('descendant::tr'):
         cells = tr.xpath('descendant::td')
         if len(cells) == 2:
             name = self.totext(cells[0])
             val = self.totext(cells[1])
             if not val:
                 continue
             if name in self.language_names:
                 ans = self.lang_map.get(val, None)
                 if not ans:
                     ans = canonicalize_lang(val)
                 if ans:
                     mi.language = ans
             elif name in self.publisher_names:
                 pub = val.partition(';')[0].partition('(')[0].strip()
                 if pub:
                     mi.publisher = pub
                 date = val.rpartition('(')[-1].replace(')', '').strip()
                 try:
                     from calibre.utils.date import parse_only_date
                     date = self.delocalize_datestr(date)
                     mi.pubdate = parse_only_date(date, assume_utc=True)
                 except:
                     self.log.exception('Failed to parse pubdate: %s' % val)
             elif name in {'ISBN', 'ISBN-10', 'ISBN-13'}:
                 ans = check_isbn(val)
                 if ans:
                     self.isbn = mi.isbn = ans
Ejemplo n.º 7
0
def field_from_string(field, raw, field_metadata):
    ''' Parse the string raw to return an object that is suitable for calling
    set() on a Metadata object. '''
    dt = field_metadata['datatype']
    val = object
    if dt in {'int', 'float'}:
        val = int(raw) if dt == 'int' else float(raw)
    elif dt == 'rating':
        val = float(raw) * 2
    elif dt == 'datetime':
        from calibre.utils.date import parse_only_date
        val = parse_only_date(raw)
    elif dt == 'bool':
        if raw.lower() in {'true', 'yes', 'y'}:
            val = True
        elif raw.lower() in {'false', 'no', 'n'}:
            val = False
        else:
            raise ValueError('Unknown value for %s: %s' % (field, raw))
    elif dt == 'text':
        ism = field_metadata['is_multiple']
        if ism:
            val = [x.strip() for x in raw.split(ism['ui_to_list'])]
            if field == 'identifiers':
                val = {x.partition(':')[0]: x.partition(':')[-1] for x in val}
            elif field == 'languages':
                from calibre.utils.localization import canonicalize_lang
                val = [canonicalize_lang(x) for x in val]
                val = [x for x in val if x]
    if val is object:
        val = raw
    return val
Ejemplo n.º 8
0
 def parse_pubdate(self, pd):
     for x in reversed(pd.xpath(self.publisher_xpath)):
         if x.tail:
             ans = x.tail
             date = ans.rpartition('(')[-1].replace(')', '').strip()
             date = self.delocalize_datestr(date)
             return parse_only_date(date, assume_utc=True)
Ejemplo n.º 9
0
def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())

    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('authors', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    mi = Metadata(title, [fix_author(x) for x in authors])
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(
            filter(None,
                   (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = base64.standard_b64decode(m[COVER_KEY])
            fmt, w, h = identify(bytes(data))
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w > -1 and h > -1:
            mi.cover_data = (fmt, data)

    return mi
Ejemplo n.º 10
0
def toPubdate(log, yearAsString):  # {{{
    res = None
    if yearAsString:
        try:
            res = parse_only_date(u"01.01." + yearAsString)
        except:
            log.error('cannot parse to date %s' % yearAsString)
    return res
Ejemplo n.º 11
0
def toPubdate(log, yearAsString): # {{{
    res = None
    if yearAsString:
        try:
            res = parse_only_date(u"01.01." + yearAsString)
        except:
            log.error('cannot parse to date %s'%yearAsString)
    return res
Ejemplo n.º 12
0
def toPubdate(log, yearAsString):  # {{{
    from calibre.utils.date import parse_only_date
    res = None
    if yearAsString:
        try:
            res = parse_only_date(u"01.01." + yearAsString)
        except:
            log.error('cannot parse to date %s' % yearAsString)
    return res
Ejemplo n.º 13
0
    def parse_pubdate(self, pd):
        for x in reversed(pd.xpath(self.publisher_xpath)):
            if x.tail:
                from calibre.utils.date import parse_only_date

                ans = x.tail
                date = ans.rpartition("(")[-1].replace(")", "").strip()
                date = self.delocalize_datestr(date)
                return parse_only_date(date, assume_utc=True)
Ejemplo n.º 14
0
def toPubdate(log, yearAsString):  # {{{
    from calibre.utils.date import parse_only_date
    res = None
    if yearAsString:
        try:
            res = parse_only_date(u"01.01." + yearAsString)
        except:
            log.error('cannot parse to date %s' % yearAsString)
    return res
Ejemplo n.º 15
0
Archivo: kfx.py Proyecto: AEliu/calibre
def read_metadata_kfx(stream, read_cover=True):
    ' Read the metadata.kfx file that is found in the sdr book folder for KFX files '
    c = Container(stream.read())
    m = extract_metadata(c.decode())
    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ''
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get('title') or _('Unknown')
    authors = get('authors', False) or [_('Unknown')]
    auth_pat = re.compile(r'([^,]+?)\s*,\s+([^,]+)$')

    def fix_author(x):
        if tweaks['author_sort_copy_method'] != 'copy':
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + ' ' + m.group(1)
        return x

    mi = Metadata(title, [fix_author(x) for x in authors])
    if has('author'):
        mi.author_sort = get('author')
    if has('ASIN'):
        mi.set_identifier('mobi-asin', get('ASIN'))
    elif has('content_id'):
        mi.set_identifier('mobi-asin', get('content_id'))
    if has('languages'):
        langs = list(filter(None, (canonicalize_lang(x) for x in get('languages', False))))
        if langs:
            mi.languages = langs
    if has('issue_date'):
        try:
            mi.pubdate = parse_only_date(get('issue_date'))
        except Exception:
            pass
    if has('publisher') and get('publisher') != 'Unknown':
        mi.publisher = get('publisher')
    if read_cover and m[COVER_KEY]:
        try:
            data = base64.standard_b64decode(m[COVER_KEY])
            w, h, fmt = identify_data(data)
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w and h:
            mi.cover_data = (fmt, data)

    return mi
Ejemplo n.º 16
0
def read_metadata_kfx(stream, read_cover=True):
    " Read the metadata.kfx file that is found in the sdr book folder for KFX files "
    c = Container(stream.read())
    m = extract_metadata(c.decode())
    # dump_metadata(m)

    def has(x):
        return m[x] and m[x][0]

    def get(x, single=True):
        ans = m[x]
        if single:
            ans = clean_xml_chars(ans[0]) if ans else ""
        else:
            ans = [clean_xml_chars(y) for y in ans]
        return ans

    title = get("title") or _("Unknown")
    authors = get("authors", False) or [_("Unknown")]
    auth_pat = re.compile(r"([^,]+?)\s*,\s+([^,]+)$")

    def fix_author(x):
        if tweaks["author_sort_copy_method"] != "copy":
            m = auth_pat.match(x.strip())
            if m is not None:
                return m.group(2) + " " + m.group(1)
        return x

    mi = Metadata(title, [fix_author(x) for x in authors])
    if has("author"):
        mi.author_sort = get("author")
    if has("ASIN"):
        mi.set_identifier("mobi-asin", get("ASIN"))
    elif has("content_id"):
        mi.set_identifier("mobi-asin", get("content_id"))
    if has("languages"):
        langs = list(filter(None, (canonicalize_lang(x) for x in get("languages", False))))
        if langs:
            mi.languages = langs
    if has("issue_date"):
        try:
            mi.pubdate = parse_only_date(get("issue_date"))
        except Exception:
            pass
    if has("publisher") and get("publisher") != "Unknown":
        mi.publisher = get("publisher")
    if read_cover and m[COVER_KEY]:
        try:
            data = base64.standard_b64decode(m[COVER_KEY])
            fmt, w, h = identify(bytes(data))
        except Exception:
            w, h, fmt = 0, 0, None
        if fmt and w > -1 and h > -1:
            mi.cover_data = (fmt, data)

    return mi
Ejemplo n.º 17
0
def get_comic_book_info(d, mi, series_index='volume'):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    series = d.get('series', '')
    if series.strip():
        mi.series = series
        si = d.get(series_index, None)
        if si is None:
            si = d.get('issue' if series_index == 'volume' else 'volume', None)
        if si is not None:
            try:
                mi.series_index = float(si)
            except Exception:
                mi.series_index = 1
    if d.get('language', None):
        lang = canonicalize_lang(d.get('lang'))
        if lang:
            mi.languages = [lang]
    if d.get('rating', -1) > -1:
        mi.rating = d['rating']
    for x in ('title', 'publisher'):
        y = d.get(x, '').strip()
        if y:
            setattr(mi, x, y)
    tags = d.get('tags', [])
    if tags:
        mi.tags = tags
    authors = []
    for credit in d.get('credits', []):
        if credit.get('role',
                      '') in ('Writer', 'Artist', 'Cartoonist', 'Creator'):
            x = credit.get('person', '')
            if x:
                x = ' '.join(reversed(x.split(', ')))
                authors.append(x)
    if authors:
        mi.authors = authors
    comments = d.get('comments', '')
    if comments and comments.strip():
        mi.comments = comments.strip()
    pubm, puby = d.get('publicationMonth',
                       None), d.get('publicationYear', None)
    if puby is not None:
        from calibre.utils.date import parse_only_date
        from datetime import date
        try:
            dt = date(puby, 6 if pubm is None else pubm, 15)
            dt = parse_only_date(str(dt))
            mi.pubdate = dt
        except Exception:
            pass
Ejemplo n.º 18
0
    def parse_response(cls, response, isbn_initial, log):
        metadata_items = []

        page_soup = BeautifulSoup(response.text)

        for idx, candidate in enumerate(cls.find(page_soup, 'b-result'), 1):

            title = cls.find(candidate, 'b-result__name-wrap', True)
            author = map(
                unicode.strip,
                cls.find(candidate, 'b-result__author', True).split(','))
            comments = cls.find(candidate, 'b-result__desc__full',
                                True).replace(u'Скрыть', '').strip()
            isbn = cls.find(candidate, 'b-result__isbn',
                            True).split(':')[-1].split(',')[0].strip()

            log.info(u'Found candidate %s: %s' % (idx, title))

            publisher = None
            pubdate = None

            other_info = cls.find(candidate, 'b-result__years', True).strip()
            if other_info:
                for entry in other_info.split(';'):
                    k, v = entry.split(':', 1)
                    k = k.strip()
                    if k == u'Год':
                        pubdate = parse_only_date('1.1.%s' %
                                                  v.split(',')[0].strip())
                    elif k == u'Издательство':
                        publisher = v.strip()

            metadata_item = Metadata(title, author)
            metadata_item.isbn = isbn or isbn_initial

            if comments:
                metadata_item.comments = comments

            if publisher is not None:
                metadata_item.publisher = publisher

            if pubdate is not None:
                metadata_item.pubdate = pubdate

            metadata_items.append(metadata_item)

        return metadata_items
Ejemplo n.º 19
0
def get_comic_book_info(d, mi, series_index='volume'):
    # See http://code.google.com/p/comicbookinfo/wiki/Example
    series = d.get('series', '')
    if series.strip():
        mi.series = series
        si = d.get(series_index, None)
        if si is None:
            si = d.get('issue' if series_index == 'volume' else 'volume', None)
        if si is not None:
            try:
                mi.series_index = float(si)
            except Exception:
                mi.series_index = 1
    if d.get('rating', -1) > -1:
        mi.rating = d['rating']
    for x in ('title', 'publisher'):
        y = d.get(x, '').strip()
        if y:
            setattr(mi, x, y)
    tags = d.get('tags', [])
    if tags:
        mi.tags = tags
    authors = []
    for credit in d.get('credits', []):
        if credit.get('role', '') in ('Writer', 'Artist', 'Cartoonist',
                'Creator'):
            x = credit.get('person', '')
            if x:
                x = ' '.join((reversed(x.split(', '))))
                authors.append(x)
    if authors:
        mi.authors = authors
    comments = d.get('comments', '')
    if comments and comments.strip():
        mi.comments = comments.strip()
    pubm, puby = d.get('publicationMonth', None), d.get('publicationYear', None)
    if puby is not None:
        from calibre.utils.date import parse_only_date
        from datetime import date
        try:
            dt = date(puby, 6 if pubm is None else pubm, 15)
            dt = parse_only_date(str(dt))
            mi.pubdate = dt
        except:
            pass
Ejemplo n.º 20
0
    def parse_response(cls, response, isbn_initial, log):
        metadata_items = []

        page_soup = BeautifulSoup(response.text)

        for idx, candidate in enumerate(cls.find(page_soup, 'b-result'), 1):

            title = cls.find(candidate, 'b-result__name-wrap', True)
            author = map(unicode.strip, cls.find(candidate, 'b-result__author', True).split(','))
            comments = cls.find(candidate, 'b-result__desc__full', True).replace(u'Скрыть', '').strip()
            isbn = cls.find(candidate, 'b-result__isbn', True).split(':')[-1].split(',')[0].strip()

            log.info(u'Found candidate %s: %s' % (idx, title))

            publisher = None
            pubdate = None

            other_info = cls.find(candidate, 'b-result__years', True).strip()
            if other_info:
                for entry in other_info.split(';'):
                    k, v = entry.split(':', 1)
                    k = k.strip()
                    if k == u'Год':
                        pubdate = parse_only_date('1.1.%s' % v.split(',')[0].strip())
                    elif k == u'Издательство':
                        publisher = v.strip()

            metadata_item = Metadata(title, author)
            metadata_item.isbn = isbn or isbn_initial

            if comments:
                metadata_item.comments = comments

            if publisher is not None:
                metadata_item.publisher = publisher

            if pubdate is not None:
                metadata_item.pubdate = pubdate

            metadata_items.append(metadata_item)

        return metadata_items
Ejemplo n.º 21
0
    def parse_pubdate(self, pd):
        matches = pd.xpath(self.publish_date_xpath)
        date = None
        if matches:
            date = self.totext(matches[0])

        if not matches:
            matches = pd.xpath(
                '//div[@class="show_info_left" and contains(text(), "出版时间")]/../div'
            )

            if len(matches) > 1:
                date = self.totext(matches[1])

        if date:
            from calibre.utils.date import parse_only_date

            date = date.replace('年', '/').replace('月', '/').replace('日', '') \
                .replace('-', '/').replace('出版时间', '').replace(':', '').strip()
            if date.endswith('/'):
                date = "%s1" % date

            date = self.delocalize_datestr(date)
            return parse_only_date(date, assume_utc=True)
Ejemplo n.º 22
0
def get_basic_data(browser, log, *skus):
    from calibre.utils.date import parse_only_date
    from mechanize import Request
    zeroes = ','.join('0' for sku in skus)
    data = {
        'skus': ','.join(skus),
        'drc': zeroes,
        'startPosition': '0',
        'sequence': '1',
        'selected': zeroes,
        'itemID': '0',
        'orderID': '0',
        'mailingID': '',
        'tContentWidth': '926',
        'originalOrder': ','.join(str(i) for i in range(len(skus))),
        'selectedOrderID': '0',
        'selectedSortColumn': '0',
        'listType': '1',
        'resultType': '32',
        'blockView': '1',
    }
    items_data_url = 'https://www.edelweiss.plus/GetTreelineControl.aspx?controlName=/uc/listviews/ListView_Title_Multi.ascx'
    req = Request(items_data_url, data)
    response = browser.open_novisit(req)
    raw = response.read()
    root = parse_html(raw)
    for item in root.xpath('//div[@data-priority]'):
        row = item.getparent().getparent()
        sku = item.get('id').split('-')[-1]
        isbns = [
            x.strip() for x in row.xpath(
                'descendant::*[contains(@class, "pev_sku")]/text()')[0].split(
                    ',') if check_isbn(x.strip())
        ]
        isbns.sort(key=len, reverse=True)
        try:
            tags = [
                x.strip() for x in astext(
                    row.xpath(
                        'descendant::*[contains(@class, "pev_categories")]')
                    [0]).split('/')
            ]
        except IndexError:
            tags = []
        rating = 0
        for bar in row.xpath(
                'descendant::*[contains(@class, "bgdColorCommunity")]/@style'):
            m = re.search('width: (\d+)px;.*max-width: (\d+)px', bar)
            if m is not None:
                rating = float(m.group(1)) / float(m.group(2))
                break
        try:
            pubdate = parse_only_date(astext(
                row.xpath('descendant::*[contains(@class, "pev_shipDate")]')
                [0]).split(':')[-1].split(u'\xa0')[-1].strip(),
                                      assume_utc=True)
        except Exception:
            log.exception('Error parsing published date')
            pubdate = None
        authors = []
        for x in [
                x.strip() for x in row.xpath(
                    'descendant::*[contains(@class, "pev_contributor")]/@title'
                )
        ]:
            authors.extend(a.strip() for a in x.split(','))
        entry = {
            'sku':
            sku,
            'cover':
            row.xpath('descendant::img/@src')[0].split('?')[0],
            'publisher':
            astext(
                row.xpath('descendant::*[contains(@class, "headerPublisher")]')
                [0]),
            'title':
            astext(row.xpath('descendant::*[@id="title_{}"]'.format(sku))[0]),
            'authors':
            authors,
            'isbns':
            isbns,
            'tags':
            tags,
            'pubdate':
            pubdate,
            'format':
            ' '.join(
                row.xpath(
                    'descendant::*[contains(@class, "pev_format")]/text()')).
            strip(),
            'rating':
            rating,
        }
        if entry['cover'].startswith('/'):
            entry['cover'] = None
        yield entry
Ejemplo n.º 23
0
def adapt_date(x):
    if isinstance(x, (unicode, bytes)):
        x = parse_only_date(x)
    if x is None or is_date_undefined(x):
        x = UNDEFINED_DATE
    return x
Ejemplo n.º 24
0
    def parse(self, raw):
        from calibre.ebooks.metadata.book.base import Metadata
        from calibre.utils.date import parse_only_date, UNDEFINED_DATE

        root = parse_html(raw)
        sku = CSSSelect("div.sku.attGroup")(root)[0]
        info = sku.getparent()
        top = info.getparent().getparent()
        banner = top.find("div")
        spans = banner.findall("span")
        title = ""
        for i, span in enumerate(spans):
            if i == 0 or "12pt" in span.get("style", ""):
                title += astext(span)
            else:
                break
        authors = [re.sub(r"\(.*\)", "", x).strip() for x in astext(spans[-1]).split(",")]
        mi = Metadata(title.strip(), authors)

        # Identifiers
        isbns = [check_isbn(x.strip()) for x in astext(sku).split(",")]
        for isbn in isbns:
            if isbn:
                self.plugin.cache_isbn_to_identifier(isbn, self.sku)
        isbns = sorted(isbns, key=lambda x: len(x) if x else 0, reverse=True)
        if isbns and isbns[0]:
            mi.isbn = isbns[0]
        mi.set_identifier("edelweiss", self.sku)

        # Tags
        bisac = CSSSelect("div.bisac.attGroup")(root)
        if bisac:
            bisac = astext(bisac[0])
            mi.tags = [x.strip() for x in bisac.split(",")]
            mi.tags = [t[1:].strip() if t.startswith("&") else t for t in mi.tags]

        # Publisher
        pub = CSSSelect("div.supplier.attGroup")(root)
        if pub:
            pub = astext(pub[0])
            mi.publisher = pub

        # Pubdate
        pub = CSSSelect("div.shipDate.attGroupItem")(root)
        if pub:
            pub = astext(pub[0])
            parts = pub.partition(":")[0::2]
            pub = parts[1] or parts[0]
            try:
                if ", Ship Date:" in pub:
                    pub = pub.partition(", Ship Date:")[0]
                q = parse_only_date(pub, assume_utc=True)
                if q.year != UNDEFINED_DATE:
                    mi.pubdate = q
            except:
                self.log.exception("Error parsing published date: %r" % pub)

        # Comments
        comm = ""
        general = CSSSelect("div#pd-general-overview-content")(root)
        if general:
            q = self.render_comments(general[0])
            if q != "<p>No title summary available. </p>":
                comm += q
        general = CSSSelect("div#pd-general-contributor-content")(root)
        if general:
            comm += self.render_comments(general[0])
        general = CSSSelect("div#pd-general-quotes-content")(root)
        if general:
            comm += self.render_comments(general[0])
        if comm:
            mi.comments = comm

        # Cover
        img = CSSSelect("img.title-image[src]")(root)
        if img:
            href = img[0].get("src").replace("jacket_covers/medium/", "jacket_covers/flyout/")
            self.plugin.cache_identifier_to_cover_url(self.sku, href)

        mi.has_cover = self.plugin.cached_identifier_to_cover_url(self.sku) is not None

        return mi
Ejemplo n.º 25
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, "replace")
    name = name.rpartition(".")[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get("filename_pattern"))
    name = name.replace("_", " ")
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group("title")
        except IndexError:
            pass
        try:
            au = match.group("author")
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs["swap_author_names"] and mi.authors:

                    def swap(a):
                        if "," in a:
                            parts = a.split(",", 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return " ".join(parts)

                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group("series")
        except IndexError:
            pass
        try:
            si = match.group("series_index")
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group("isbn")
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group("publisher")
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group("published")
            if pubdate:
                from calibre.utils.date import parse_only_date

                mi.pubdate = parse_only_date(pubdate)
        except:
            pass

    if mi.is_null("title"):
        mi.title = name
    return mi
Ejemplo n.º 26
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:
                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)
                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_only_date
                mi.pubdate = parse_only_date(pubdate)
        except:
            pass
        try:
            comments = match.group('comments')
            mi.comments = comments
        except (IndexError, ValueError):
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Ejemplo n.º 27
0
def _parse_pubdate(root, mi, ctx):
    year = ctx.XPath('number(//fb:publish-info/fb:year/text())')(root)
    if float.is_integer(year):
        # only year is available, so use 2nd of June
        mi.pubdate = parse_only_date(type(u'')(int(year)))
Ejemplo n.º 28
0
    def convert_comic_md_to_calibre_md(self, comic_metadata):
        '''
        Maps the entries in the comic_metadata to calibre metadata
        '''
        import unicodedata
        from calibre.ebooks.metadata import MetaInformation
        from calibre.utils.date import parse_only_date
        from datetime import date
        from calibre.utils.localization import calibre_langcode_to_name

        if self.comic_md_in_calibre_format:
            return

        # synonyms for artists
        WRITER = ['writer', 'plotter', 'scripter']
        PENCILLER = ['artist', 'penciller', 'penciler', 'breakdowns']
        INKER = ['inker', 'artist', 'finishes']
        COLORIST = ['colorist', 'colourist', 'colorer', 'colourer']
        LETTERER = ['letterer']
        COVER_ARTIST = ['cover', 'covers', 'coverartist', 'cover artist']
        EDITOR = ['editor']

        # start with a fresh calibre metadata
        mi = MetaInformation(None, None)
        co = comic_metadata

        # shorten some functions
        role = partial(get_role, credits=co.credits)
        update_field = partial(update_calibre_field, target=mi)

        # Get title, if no title, try to assign series infos
        if co.title:
            mi.title = co.title
        elif co.series:
            mi.title = co.series
            if co.issue:
                mi.title += " " + str(co.issue)
        else:
            mi.title = ""

        # tags
        if co.tags != [] and prefs['import_tags']:
            if prefs['overwrite_calibre_tags']:
                mi.tags = co.tags
            else:
                mi.tags = list(set(self.calibre_metadata.tags + co.tags))

        # simple metadata
        update_field("authors", role(WRITER))
        update_field("series", co.series)
        update_field("rating", co.criticalRating)
        update_field("publisher", co.publisher)
        # special cases
        if co.language:
            update_field("language", calibre_langcode_to_name(co.language))
        if co.comments:
            update_field("comments", co.comments.strip())
        # issue
        if co.issue:
            if isinstance(co.issue, unicode):
                mi.series_index = unicodedata.numeric(co.issue)
            else:
                mi.series_index = float(co.issue)
        # pub date
        puby = co.year
        pubm = co.month
        if puby is not None:
            try:
                dt = date(int(puby), 6 if pubm is None else int(pubm), 15)
                dt = parse_only_date(str(dt))
                mi.pubdate = dt
            except:
                pass

        # custom columns
        custom_cols = self.db.field_metadata.custom_field_metadata()
        update_column = partial(update_custom_column, calibre_metadata=mi, custom_cols=custom_cols)
        # artists
        update_column(prefs['penciller_column'], role(PENCILLER))
        update_column(prefs['inker_column'], role(INKER))
        update_column(prefs['colorist_column'], role(COLORIST))
        update_column(prefs['letterer_column'], role(LETTERER))
        update_column(prefs['cover_artist_column'], role(COVER_ARTIST))
        update_column(prefs['editor_column'], role(EDITOR))
        # others
        update_column(prefs['storyarc_column'], co.storyArc)
        update_column(prefs['characters_column'], co.characters)
        update_column(prefs['teams_column'], co.teams)
        update_column(prefs['locations_column'], co.locations)
        update_column(prefs['volume_column'], co.volume)
        update_column(prefs['genre_column'], co.genre)

        self.comic_md_in_calibre_format = mi
Ejemplo n.º 29
0
    def convert_comic_md_to_calibre_md(self, comic_metadata):
        '''
        Maps the entries in the comic_metadata to calibre metadata
        '''
        import unicodedata
        from calibre.ebooks.metadata import MetaInformation
        from calibre.utils.date import parse_only_date
        from datetime import date
        from calibre.utils.localization import calibre_langcode_to_name

        if self.comic_md_in_calibre_format:
            return

        # start with a fresh calibre metadata
        mi = MetaInformation(None, None)
        co = comic_metadata

        # shorten some functions
        role = partial(get_role, credits=co.credits)
        update_field = partial(update_calibre_field, target=mi)

        # Get title, if no title, try to assign series infos
        if co.title:
            mi.title = co.title
        elif co.series:
            mi.title = co.series
            if co.issue:
                mi.title += " " + str(co.issue)
        else:
            mi.title = ""

        # tags
        if co.tags != [] and prefs['import_tags']:
            if prefs['overwrite_calibre_tags']:
                mi.tags = co.tags
            else:
                mi.tags = list(set(self.calibre_metadata.tags + co.tags))

        # simple metadata
        update_field("authors", role(WRITER))
        update_field("series", co.series)
        update_field("rating", co.criticalRating)
        update_field("publisher", co.publisher)
        # special cases
        if co.language:
            update_field("language", calibre_langcode_to_name(co.language))
        if co.comments:
            update_field("comments", co.comments.strip())
        # issue
        if co.issue:
            try:
                if not python3 and isinstance(co.issue, unicode):
                    mi.series_index = unicodedata.numeric(co.issue)
                else:
                    mi.series_index = float(co.issue)
            except ValueError:
                pass
        # pub date
        puby = co.year
        pubm = co.month
        if puby is not None:
            try:
                dt = date(int(puby), 6 if pubm is None else int(pubm), 15)
                dt = parse_only_date(str(dt))
                mi.pubdate = dt
            except:
                pass

        # custom columns
        update_column = partial(
            update_custom_column,
            calibre_metadata=mi,
            custom_cols=self.db.field_metadata.custom_field_metadata())
        # artists
        update_column(prefs['penciller_column'], role(PENCILLER))
        update_column(prefs['inker_column'], role(INKER))
        update_column(prefs['colorist_column'], role(COLORIST))
        update_column(prefs['letterer_column'], role(LETTERER))
        update_column(prefs['cover_artist_column'], role(COVER_ARTIST))
        update_column(prefs['editor_column'], role(EDITOR))
        # others
        update_column(prefs['storyarc_column'], co.storyArc)
        update_column(prefs['characters_column'], co.characters)
        update_column(prefs['teams_column'], co.teams)
        update_column(prefs['locations_column'], co.locations)
        update_column(prefs['genre_column'], co.genre)
        ensure_int(co.issueCount, update_column, prefs['count_column'],
                   co.issueCount)
        ensure_int(co.volume, update_column, prefs['volume_column'], co.volume)
        if prefs['auto_count_pages']:
            update_column(prefs['pages_column'], self.count_pages())
        else:
            update_column(prefs['pages_column'], co.pageCount)
        if prefs['get_image_sizes']:
            update_column(prefs['image_size_column'], self.get_picture_size())
        update_column(prefs['comicvine_column'],
                      '<a href="{}">Comic Vine</a>'.format(co.webLink))
        update_column(prefs['manga_column'], co.manga)

        self.comic_md_in_calibre_format = mi
Ejemplo n.º 30
0
def adapt_date(x):
    if isinstance(x, (unicode, bytes)):
        x = parse_only_date(x)
    if x is None or is_date_undefined(x):
        x = UNDEFINED_DATE
    return x
Ejemplo n.º 31
0
def _parse_pubdate(root, mi, ctx):
    year = ctx.XPath('number(//fb:publish-info/fb:year/text())')(root)
    if float.is_integer(year):
        # only year is available, so use 2nd of June
        mi.pubdate = parse_only_date(unicode_type(int(year)))
Ejemplo n.º 32
0
    def parse(self, raw):
        from calibre.ebooks.metadata.book.base import Metadata
        from calibre.utils.date import parse_only_date, UNDEFINED_DATE
        from css_selectors import Select
        root = parse_html(raw)
        selector = Select(root)
        sku = next(selector('div.sku.attGroup'))
        info = sku.getparent()
        top = info.getparent().getparent()
        banner = top.find('div')
        spans = banner.findall('span')
        title = ''
        for i, span in enumerate(spans):
            if i == 0 or '12pt' in span.get('style', ''):
                title += astext(span)
            else:
                break
        authors = [
            re.sub(r'\(.*\)', '', x).strip()
            for x in astext(spans[-1]).split(',')
        ]
        mi = Metadata(title.strip(), authors)

        # Identifiers
        isbns = [check_isbn(x.strip()) for x in astext(sku).split(',')]
        for isbn in isbns:
            if isbn:
                self.plugin.cache_isbn_to_identifier(isbn, self.sku)
        isbns = sorted(isbns, key=lambda x: len(x) if x else 0, reverse=True)
        if isbns and isbns[0]:
            mi.isbn = isbns[0]
        mi.set_identifier('edelweiss', self.sku)

        # Tags
        bisac = tuple(selector('div.bisac.attGroup'))
        if bisac:
            bisac = astext(bisac[0])
            mi.tags = [x.strip() for x in bisac.split(',')]
            mi.tags = [
                t[1:].strip() if t.startswith('&') else t for t in mi.tags
            ]

        # Publisher
        pub = tuple(selector('div.supplier.attGroup'))
        if pub:
            pub = astext(pub[0])
            mi.publisher = pub

        # Pubdate
        pub = tuple(selector('div.shipDate.attGroupItem'))
        if pub:
            pub = astext(pub[0])
            parts = pub.partition(':')[0::2]
            pub = parts[1] or parts[0]
            try:
                if ', Ship Date:' in pub:
                    pub = pub.partition(', Ship Date:')[0]
                q = parse_only_date(pub, assume_utc=True)
                if q.year != UNDEFINED_DATE:
                    mi.pubdate = q
            except:
                self.log.exception('Error parsing published date: %r' % pub)

        # Comments
        comm = ''
        general = tuple(selector('div#pd-general-overview-content'))
        if general:
            q = self.render_comments(general[0])
            if q != '<p>No title summary available. </p>':
                comm += q
        general = tuple(selector('div#pd-general-contributor-content'))
        if general:
            comm += self.render_comments(general[0])
        general = tuple(selector('div#pd-general-quotes-content'))
        if general:
            comm += self.render_comments(general[0])
        if comm:
            mi.comments = comm

        # Cover
        img = tuple(selector('img.title-image[src]'))
        if img:
            href = img[0].get('src').replace('jacket_covers/medium/',
                                             'jacket_covers/flyout/')
            self.plugin.cache_identifier_to_cover_url(self.sku, href)

        mi.has_cover = self.plugin.cached_identifier_to_cover_url(
            self.sku) is not None

        return mi
Ejemplo n.º 33
0
def get_basic_data(browser, log, *skus):
    from calibre.utils.date import parse_only_date
    from mechanize import Request
    zeroes = ','.join('0' for sku in skus)
    data = {
            'skus': ','.join(skus),
            'drc': zeroes,
            'startPosition': '0',
            'sequence': '1',
            'selected': zeroes,
            'itemID': '0',
            'orderID': '0',
            'mailingID': '',
            'tContentWidth': '926',
            'originalOrder': ','.join(str(i) for i in range(len(skus))),
            'selectedOrderID': '0',
            'selectedSortColumn': '0',
            'listType': '1',
            'resultType': '32',
            'blockView': '1',
    }
    items_data_url = 'https://www.edelweiss.plus/GetTreelineControl.aspx?controlName=/uc/listviews/ListView_Title_Multi.ascx'
    req = Request(items_data_url, data)
    response = browser.open_novisit(req)
    raw = response.read()
    root = parse_html(raw)
    for item in root.xpath('//div[@data-priority]'):
        row = item.getparent().getparent()
        sku = item.get('id').split('-')[-1]
        isbns = [x.strip() for x in row.xpath('descendant::*[contains(@class, "pev_sku")]/text()')[0].split(',') if check_isbn(x.strip())]
        isbns.sort(key=len, reverse=True)
        try:
            tags = [x.strip() for x in astext(row.xpath('descendant::*[contains(@class, "pev_categories")]')[0]).split('/')]
        except IndexError:
            tags = []
        rating = 0
        for bar in row.xpath('descendant::*[contains(@class, "bgdColorCommunity")]/@style'):
            m = re.search(r'width: (\d+)px;.*max-width: (\d+)px', bar)
            if m is not None:
                rating = float(m.group(1)) / float(m.group(2))
                break
        try:
            pubdate = parse_only_date(astext(row.xpath('descendant::*[contains(@class, "pev_shipDate")]')[0]
                ).split(':')[-1].split(u'\xa0')[-1].strip(), assume_utc=True)
        except Exception:
            log.exception('Error parsing published date')
            pubdate = None
        authors = []
        for x in [x.strip() for x in row.xpath('descendant::*[contains(@class, "pev_contributor")]/@title')]:
            authors.extend(a.strip() for a in x.split(','))
        entry = {
                'sku': sku,
                'cover': row.xpath('descendant::img/@src')[0].split('?')[0],
                'publisher': astext(row.xpath('descendant::*[contains(@class, "headerPublisher")]')[0]),
                'title': astext(row.xpath('descendant::*[@id="title_{}"]'.format(sku))[0]),
                'authors': authors,
                'isbns': isbns,
                'tags': tags,
                'pubdate': pubdate,
                'format': ' '.join(row.xpath('descendant::*[contains(@class, "pev_format")]/text()')).strip(),
                'rating': rating,
        }
        if entry['cover'].startswith('/'):
            entry['cover'] = None
        yield entry
Ejemplo n.º 34
0
def metadata_from_filename(name, pat=None, fallback_pat=None):
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    name = name.rpartition('.')[0]
    mi = MetaInformation(None, None)
    if pat is None:
        pat = re.compile(prefs.get('filename_pattern'))
    name = name.replace('_', ' ')
    match = pat.search(name)
    if match is None and fallback_pat is not None:
        match = fallback_pat.search(name)
    if match is not None:
        try:
            mi.title = match.group('title')
        except IndexError:
            pass
        try:
            au = match.group('author')
            aus = string_to_authors(au)
            if aus:
                mi.authors = aus
                if prefs['swap_author_names'] and mi.authors:

                    def swap(a):
                        if ',' in a:
                            parts = a.split(',', 1)
                        else:
                            parts = a.split(None, 1)
                        if len(parts) > 1:
                            t = parts[-1]
                            parts = parts[:-1]
                            parts.insert(0, t)
                        return ' '.join(parts)

                    mi.authors = [swap(x) for x in mi.authors]
        except (IndexError, ValueError):
            pass
        try:
            mi.series = match.group('series')
        except IndexError:
            pass
        try:
            si = match.group('series_index')
            mi.series_index = float(si)
        except (IndexError, ValueError, TypeError):
            pass
        try:
            si = match.group('isbn')
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_only_date
                mi.pubdate = parse_only_date(pubdate)
        except:
            pass
        try:
            comments = match.group('comments')
            mi.comments = comments
        except (IndexError, ValueError):
            pass

    if mi.is_null('title'):
        mi.title = name
    return mi
Ejemplo n.º 35
0
    def parse(self, raw):
        from calibre.ebooks.metadata.book.base import Metadata
        from calibre.utils.date import parse_only_date, UNDEFINED_DATE
        from css_selectors import Select
        root = parse_html(raw)
        selector = Select(root)
        sku = next(selector('div.sku.attGroup'))
        info = sku.getparent()
        top = info.getparent().getparent()
        banner = top.find('div')
        spans = banner.findall('span')
        title = ''
        for i, span in enumerate(spans):
            if i == 0 or '12pt' in span.get('style', ''):
                title += astext(span)
            else:
                break
        authors = [re.sub(r'\(.*\)', '', x).strip() for x in astext(spans[-1]).split(',')]
        mi = Metadata(title.strip(), authors)

        # Identifiers
        isbns = [check_isbn(x.strip()) for x in astext(sku).split(',')]
        for isbn in isbns:
            if isbn:
                self.plugin.cache_isbn_to_identifier(isbn, self.sku)
        isbns = sorted(isbns, key=lambda x:len(x) if x else 0, reverse=True)
        if isbns and isbns[0]:
            mi.isbn = isbns[0]
        mi.set_identifier('edelweiss', self.sku)

        # Tags
        bisac = tuple(selector('div.bisac.attGroup'))
        if bisac:
            bisac = astext(bisac[0])
            mi.tags = [x.strip() for x in bisac.split(',')]
            mi.tags = [t[1:].strip() if t.startswith('&') else t for t in mi.tags]

        # Publisher
        pub = tuple(selector('div.supplier.attGroup'))
        if pub:
            pub = astext(pub[0])
            mi.publisher = pub

        # Pubdate
        pub = tuple(selector('div.shipDate.attGroupItem'))
        if pub:
            pub = astext(pub[0])
            parts = pub.partition(':')[0::2]
            pub = parts[1] or parts[0]
            try:
                if ', Ship Date:' in pub:
                    pub = pub.partition(', Ship Date:')[0]
                q = parse_only_date(pub, assume_utc=True)
                if q.year != UNDEFINED_DATE:
                    mi.pubdate = q
            except:
                self.log.exception('Error parsing published date: %r'%pub)

        # Comments
        comm = ''
        general = tuple(selector('div#pd-general-overview-content'))
        if general:
            q = self.render_comments(general[0])
            if q != '<p>No title summary available. </p>':
                comm += q
        general = tuple(selector('div#pd-general-contributor-content'))
        if general:
            comm += self.render_comments(general[0])
        general = tuple(selector('div#pd-general-quotes-content'))
        if general:
            comm += self.render_comments(general[0])
        if comm:
            mi.comments = comm

        # Cover
        img = tuple(selector('img.title-image[src]'))
        if img:
            href = img[0].get('src').replace('jacket_covers/medium/',
                                             'jacket_covers/flyout/')
            self.plugin.cache_identifier_to_cover_url(self.sku, href)

        mi.has_cover = self.plugin.cached_identifier_to_cover_url(self.sku) is not None

        return mi