Exemple #1
0
def single_identify(title, authors, identifiers):
    log = GUILog()
    patch_plugins()
    results = identify(log, Event(), title=title, authors=authors,
            identifiers=identifiers)
    return [metadata_to_opf(r) for r in results], [r.has_cached_cover_url for
        r in results], dump_caches(), log.dump()
Exemple #2
0
def single_identify(title, authors, identifiers):
    log = GUILog()
    patch_plugins()
    results = identify(log, Event(), title=title, authors=authors,
            identifiers=identifiers)
    return [metadata_to_opf(r) for r in results], [r.has_cached_cover_url for
        r in results], dump_caches(), log.dump()
Exemple #3
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)

    buf = BytesIO()
    log = create_log(buf)
    abort = Event()
    patch_plugins()

    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)

    identifiers = {}
    if opts.isbn:
        identifiers['isbn'] = opts.isbn

    allowed_plugins = frozenset(opts.allowed_plugin)
    results = identify(log,
                       abort,
                       title=opts.title,
                       authors=authors,
                       identifiers=identifiers,
                       timeout=int(opts.timeout),
                       allowed_plugins=allowed_plugins or None)

    if not results:
        print(log, file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]

    cf = None
    if opts.cover and results:
        cover = download_cover(log,
                               title=opts.title,
                               authors=authors,
                               identifiers=result.identifiers,
                               timeout=int(opts.timeout))
        if cover is None and not opts.opf:
            prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover

    log = buf.getvalue()

    result = (metadata_to_opf(result)
              if opts.opf else unicode(result).encode('utf-8'))

    if opts.verbose:
        print(log, file=sys.stderr)

    print(result)
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)

    return 0
Exemple #4
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)

    buf = BytesIO()
    log = create_log(buf)
    abort = Event()
    patch_plugins()

    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)

    identifiers = {}
    for idspec in opts.identifier:
        k, v = idspec.partition(':')[::2]
        if not k or not v:
            raise SystemExit('Not a valid identifier: {}'.format(idspec))
        identifiers[k] = v
    if opts.isbn:
        identifiers['isbn'] = opts.isbn

    allowed_plugins = frozenset(opts.allowed_plugin)
    results = identify(log, abort, title=opts.title, authors=authors,
            identifiers=identifiers, timeout=int(opts.timeout),
            allowed_plugins=allowed_plugins or None)

    if not results:
        print(log, file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]

    cf = None
    if opts.cover and results:
        cover = download_cover(log, title=opts.title, authors=authors,
                identifiers=result.identifiers, timeout=int(opts.timeout))
        if cover is None and not opts.opf:
            prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover

    log = buf.getvalue()

    result = (metadata_to_opf(result) if opts.opf else
                    unicode_type(result).encode('utf-8'))

    if opts.verbose:
        print(log, file=sys.stderr)

    print(result)
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)

    return 0
Exemple #5
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)

    buf = BytesIO()
    log = create_log(buf)
    abort = Event()
    patch_plugins()

    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)

    identifiers = {}
    for idspec in opts.identifier:
        k, v = idspec.partition(':')[::2]
        if not k or not v:
            raise SystemExit('Not a valid identifier: {}'.format(idspec))
        identifiers[k] = v
    if opts.isbn:
        identifiers['isbn'] = opts.isbn

    allowed_plugins = frozenset(opts.allowed_plugin)
    results = identify(log, abort, title=opts.title, authors=authors,
            identifiers=identifiers, timeout=int(opts.timeout),
            allowed_plugins=allowed_plugins or None)

    if not results:
        prints(buf.getvalue(), file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]

    cf = None
    if opts.cover and results:
        cover = download_cover(log, title=opts.title, authors=authors,
                identifiers=result.identifiers, timeout=int(opts.timeout))
        if cover is None:
            if not opts.opf:
                prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover

    if opts.verbose:
        prints(buf.getvalue(), file=sys.stderr)

    if opts.opf:
        getattr(sys.stdout, 'buffer', sys.stdout).write(metadata_to_opf(result))
        print()
    else:
        prints(str(result))
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)

    return 0
Exemple #6
0
def main(do_identify, covers, metadata, ensure_fields, tdir):
    failed_ids = set()
    failed_covers = set()
    all_failed = True
    log = GUILog()
    patch_plugins()

    for book_id, mi in iteritems(metadata):
        mi = OPF(BytesIO(mi), basedir=tdir,
                 populate_spine=False).to_book_metadata()
        title, authors, identifiers = mi.title, mi.authors, mi.identifiers
        cdata = None
        log.clear()

        if do_identify:
            results = []
            try:
                results = identify(log,
                                   Event(),
                                   title=title,
                                   authors=authors,
                                   identifiers=identifiers)
            except:
                pass
            if results:
                all_failed = False
                mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
                identifiers = mi.identifiers
                if not mi.is_null('rating'):
                    # set_metadata expects a rating out of 10
                    mi.rating *= 2
                with open(os.path.join(tdir, '%d.mi' % book_id), 'wb') as f:
                    f.write(metadata_to_opf(mi, default_lang='und'))
            else:
                log.error('Failed to download metadata for', title)
                failed_ids.add(book_id)

        if covers:
            cdata = download_cover(log,
                                   title=title,
                                   authors=authors,
                                   identifiers=identifiers)
            if cdata is None:
                failed_covers.add(book_id)
            else:
                with open(os.path.join(tdir, '%d.cover' % book_id), 'wb') as f:
                    f.write(cdata[-1])
                all_failed = False

        with open(os.path.join(tdir, '%d.log' % book_id), 'wb') as f:
            f.write(log.plain_text.encode('utf-8'))

    return failed_ids, failed_covers, all_failed
Exemple #7
0
def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)

    buf = BytesIO()
    log = create_log(buf)
    abort = Event()

    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)

    identifiers = {}
    if opts.isbn:
        identifiers['isbn'] = opts.isbn

    results = identify(log, abort, title=opts.title, authors=authors,
            identifiers=identifiers, timeout=int(opts.timeout))

    if not results:
        print (log, file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]

    cf = None
    if opts.cover and results:
        cover = download_cover(log, title=opts.title, authors=authors,
                identifiers=result.identifiers, timeout=int(opts.timeout))
        if cover is None:
            prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover


    log = buf.getvalue()


    result = (metadata_to_opf(result) if opts.opf else
                    unicode(result).encode('utf-8'))

    if opts.verbose:
        print (log, file=sys.stderr)

    print (result)
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)

    return 0
Exemple #8
0
def main(do_identify, covers, metadata, ensure_fields, tdir):
    failed_ids = set()
    failed_covers = set()
    all_failed = True
    log = GUILog()
    patch_plugins()

    for book_id, mi in metadata.iteritems():
        mi = OPF(BytesIO(mi), basedir=tdir,
                populate_spine=False).to_book_metadata()
        title, authors, identifiers = mi.title, mi.authors, mi.identifiers
        cdata = None
        log.clear()

        if do_identify:
            results = []
            try:
                results = identify(log, Event(), title=title, authors=authors,
                    identifiers=identifiers)
            except:
                pass
            if results:
                all_failed = False
                mi = merge_result(mi, results[0], ensure_fields=ensure_fields)
                identifiers = mi.identifiers
                if not mi.is_null('rating'):
                    # set_metadata expects a rating out of 10
                    mi.rating *= 2
                with open(os.path.join(tdir, '%d.mi'%book_id), 'wb') as f:
                    f.write(metadata_to_opf(mi, default_lang='und'))
            else:
                log.error('Failed to download metadata for', title)
                failed_ids.add(book_id)

        if covers:
            cdata = download_cover(log, title=title, authors=authors,
                    identifiers=identifiers)
            if cdata is None:
                failed_covers.add(book_id)
            else:
                with open(os.path.join(tdir, '%d.cover'%book_id), 'wb') as f:
                    f.write(cdata[-1])
                all_failed = False

        with open(os.path.join(tdir, '%d.log'%book_id), 'wb') as f:
            f.write(log.plain_text.encode('utf-8'))

    return failed_ids, failed_covers, all_failed
Exemple #9
0
def test_identify(tests):  # {{{
    '''
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
                  test_funcs). args is a dict of keyword arguments to pass to
                  the identify method. test_funcs are callables that accept a
                  Metadata object and return True iff the object passes the
                  test.
    '''
    from calibre.ebooks.metadata.sources.identify import identify

    tdir, lf, log, abort = init_test('Full Identify')
    prints('Log saved to', lf)

    times = []

    for kwargs, test_funcs in tests:
        log('')
        log('#' * 80)
        log('### Running test with:', kwargs)
        log('#' * 80)
        prints('Running test with:', kwargs)
        args = (log, abort)
        start_time = time.time()
        results = identify(*args, **kwargs)
        total_time = time.time() - start_time
        times.append(total_time)
        if not results:
            prints('identify failed to find any results')
            break

        prints('Found', len(results), 'matches:', end=' ')
        prints('Smaller relevance means better match')

        for i, mi in enumerate(results):
            prints('*' * 30, 'Relevance:', i, '*' * 30)
            if mi.rating:
                mi.rating *= 2
            prints(mi)
            prints('\nCached cover URLs    :',
                   [x[0].name for x in get_cached_cover_urls(mi)])
            prints('*' * 75, '\n\n')

        possibles = []
        for mi in results:
            test_failed = False
            for tfunc in test_funcs:
                if not tfunc(mi):
                    test_failed = True
                    break
            if not test_failed:
                possibles.append(mi)

        if not possibles:
            prints('ERROR: No results that passed all tests were found')
            prints('Log saved to', lf)
            log.close()
            dump_log(lf)
            raise SystemExit(1)

        if results[0] is not possibles[0]:
            prints('Most relevant result failed the tests')
            raise SystemExit(1)

        log('\n\n')

    prints('Average time per query', sum(times) / len(times))

    prints('Full log is at:', lf)
Exemple #10
0
    def isbn_api_add(self, isbn):
        '''
        Add the book and return the new db id.
        '''

        try:
            isbn = int(re.sub(r'[^\d]+', '', isbn))
        except:
            raise cherrypy.HTTPError(404, 'Invalid isbn %s is not a number: '%isbn)
        isbn_len = len(str(isbn))
        if isbn_len != 10 and isbn_len != 13:
            raise cherrypy.HTTPError(404, 'Invalid isbn {0} has a wrong length of {1}'.format(isbn, isbn_len))
        mi = MetaInformation(None)
        mi.isbn = str(isbn)
        fmts = []
        new_id = 0
        try:
            new_id = self.db.import_book(mi, fmts)
        except:
            return 'could not add new book with isdb {0}'.format(isbn)

        # Start the threaded download of metadata and return with the id of the added book
        result = ''
        try:
            ids = []
            ids.append(new_id)

            buf = BytesIO()
            log = create_log(buf)
            abort = Event()

            authors = []
            identifiers = {}
            identifiers['isbn'] = mi.isbn

            results = identify(log, abort, title=None, authors=authors,
                    identifiers=identifiers, timeout=int(30000))

            if not results:
                print (log, file=sys.stderr)
                prints('No results found', file=sys.stderr)
                return 'Could not find metadata for isbn {0}'.format(isbn)
            result = results[0]
            self.db.set_metadata(new_id, result)

            #cf = None
            #
            #if opts.cover and results:
            #    cover = download_cover(log, title=None, authors=authors,
            #            identifiers=result.identifiers, timeout=int(30000))
            #    if cover is None and not opts.opf:
            #        prints('No cover found', file=sys.stderr)
            #    else:
            #        save_cover_data_to(cover[-1], opts.cover)
            #        result.cover = cf = opts.cover

            log = buf.getvalue()

            result = unicode(result).encode('utf-8')

        except e:
            return 'Error getting metadata {0}'.format(e)

        return 'Added new book with isbn {0} with new id {1} and metadata {2}'.format(isbn, new_id, result)

    # }}}
Exemple #11
0
def test_identify(tests):  # {{{
    '''
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
                  test_funcs). args is a dict of keyword arguments to pass to
                  the identify method. test_funcs are callables that accept a
                  Metadata object and return True iff the object passes the
                  test.
    '''
    from calibre.ebooks.metadata.sources.identify import identify

    tdir, lf, log, abort = init_test('Full Identify')
    prints('Log saved to', lf)

    times = []

    for kwargs, test_funcs in tests:
        log('#'*80)
        log('### Running test with:', kwargs)
        log('#'*80)
        prints('Running test with:', kwargs)
        args = (log, abort)
        start_time = time.time()
        results = identify(*args, **kwargs)
        total_time = time.time() - start_time
        times.append(total_time)
        if not results:
            prints('identify failed to find any results')
            break

        prints('Found', len(results), 'matches:', end=' ')
        prints('Smaller relevance means better match')

        for i, mi in enumerate(results):
            prints('*'*30, 'Relevance:', i, '*'*30)
            prints(mi)
            prints('\nCached cover URLs    :',
                    [x[0].name for x in get_cached_cover_urls(mi)])
            prints('*'*75, '\n\n')

        possibles = []
        for mi in results:
            test_failed = False
            for tfunc in test_funcs:
                if not tfunc(mi):
                    test_failed = True
                    break
            if not test_failed:
                possibles.append(mi)

        if not possibles:
            prints('ERROR: No results that passed all tests were found')
            prints('Log saved to', lf)
            raise SystemExit(1)

        if results[0] is not possibles[0]:
            prints('Most relevant result failed the tests')
            raise SystemExit(1)

        log('\n\n')

    prints('Average time per query', sum(times)/len(times))

    prints('Full log is at:', lf)