Example #1
0
 def __getitem__(self, book_id):
     with self.lock:
         if not hasattr(self, 'total_size'):
             self._load_index()
         self._invalidate_sizes()
         key = (self.group_id, book_id)
         entry = self.items.pop(key, None)
         if entry is None:
             return None, None
         if entry.thumbnail_size != self.thumbnail_size:
             try:
                 os.remove(entry.path)
             except EnvironmentError as err:
                 if getattr(err, 'errno', None) != errno.ENOENT:
                     self.log('Failed to remove cached thumbnail:', entry.path, as_unicode(err))
             self.total_size -= entry.size
             return None, None
         self.items[key] = entry
         try:
             with open(entry.path, 'rb') as f:
                 data = f.read()
         except EnvironmentError as err:
             self.log('Failed to read cached thumbnail:', entry.path, as_unicode(err))
             return None, None
         return data, entry.timestamp
Example #2
0
 def insert(self, book_id, timestamp, data):
     if self.max_size < len(data):
         return
     with self.lock:
         if not hasattr(self, 'total_size'):
             self._load_index()
         self._invalidate_sizes()
         ts = ('%.2f' % timestamp).replace('.00', '')
         path = '%s%s%s%s%d-%s-%d-%dx%d' % (
             self.group_id, os.sep, book_id % 100, os.sep,
             book_id, ts, len(data), self.thumbnail_size[0], self.thumbnail_size[1])
         path = os.path.join(self.location, path)
         key = (self.group_id, book_id)
         e = self.items.pop(key, None)
         self.total_size -= getattr(e, 'size', 0)
         try:
             with open(path, 'wb') as f:
                 f.write(data)
         except EnvironmentError as err:
             d = os.path.dirname(path)
             if not os.path.exists(d):
                 try:
                     os.makedirs(d)
                     with open(path, 'wb') as f:
                         f.write(data)
                 except EnvironmentError as err:
                     self.log('Failed to write cached thumbnail:', path, as_unicode(err))
                     return self._apply_size()
             else:
                 self.log('Failed to write cached thumbnail:', path, as_unicode(err))
                 return self._apply_size()
         self.items[key] = Entry(path, len(data), timestamp, self.thumbnail_size)
         self.total_size += len(data)
         self._apply_size()
Example #3
0
    def run(self):
        if self.tdir is not None:
            try:
                self.extract()
            except Exception as err:
                import traceback

                traceback.print_exc()
                msg = as_unicode(err)
                self.found.emit(msg)
                return
            self.path = self.tdir

        root = os.path.abspath(self.path)
        try:
            self.walk(root)
        except:
            try:
                if isinstance(root, unicode):
                    root = root.encode(filesystem_encoding)
                self.walk(root)
            except Exception as err:
                import traceback

                traceback.print_exc()
                msg = as_unicode(err)
                self.found.emit(msg)
                return

        self.books = [formats for formats in self.books if formats]

        if not self.canceled:
            self.found.emit(self.books)
Example #4
0
def main(report_error=prints, report_action=prints):
    try:
        if time.time() - cache.mtime() < UPDATE_INTERVAL:
            report_action('Metadata sources cache was recently updated not updating again')
            return
        try:
            report_action('Fetching metadata source hashes...')
            needed = update_needed()
        except Exception as e:
            report_error(
                'Failed to get metadata sources hashes with error: {}'.format(as_unicode(e)))
            return
        if not needed:
            cache.touch()
            return
        updated = {}
        for name, expected_hash in needed.iteritems():
            report_action('Updating metadata source {}...'.format(name))
            try:
                update_plugin(name, updated, expected_hash)
            except Exception as e:
                report_error('Failed to get plugin {} with error: {}'.format(
                    name, as_unicode(e)))
                break
        else:
            hashes = cache.get('hashes', {})
            for name in updated:
                hashes[name] = updated[name][1]
            with cache:
                cache['hashes'] = hashes
                for name in updated:
                    cache[name] = updated[name][0]
    finally:
        update_sources.worker = None
Example #5
0
def daemonize():  # {{{
    try:
        pid = os.fork()
        if pid > 0:
            # exit first parent
            sys.exit(0)
    except OSError as e:
        raise SystemExit('fork #1 failed: %s' % as_unicode(e))

    # decouple from parent environment
    os.chdir("/")
    os.setsid()
    os.umask(0)

    # do second fork
    try:
        pid = os.fork()
        if pid > 0:
            # exit from second parent
            sys.exit(0)
    except OSError as e:
        raise SystemExit('fork #2 failed: %s' % as_unicode(e))

    # Redirect standard file descriptors.
    plugins['speedup'][0].detach(os.devnull)
Example #6
0
File: add.py Project: AEliu/calibre
 def monitor_scan(self):
     self.scan_thread.join(0.05)
     if self.scan_thread.is_alive():
         self.do_one_signal.emit()
         return
     if self.scan_error is not None:
         error_dialog(self.pd, _('Cannot add books'), _(
             'Failed to add any books, click "Show details" for more information.'),
                      det_msg=self.scan_error, show=True)
         self.break_cycles()
         return
     if not self.file_groups:
         error_dialog(self.pd, _('Could not add'), _(
             'No ebook files were found in %s') % self.source, show=True)
         self.break_cycles()
         return
     self.pd.max = len(self.file_groups)
     self.pd.title = _('Reading metadata and adding to library (%d books)...') % self.pd.max
     self.pd.msg = ''
     self.pd.value = 0
     self.pool = Pool(name='AddBooks') if self.pool is None else self.pool
     if self.db is not None:
         if self.add_formats_to_existing:
             self.find_identical_books_data = self.db.data_for_find_identical_books()
         else:
             try:
                 self.pool.set_common_data(self.db.data_for_has_book())
             except Failure as err:
                 error_dialog(self.pd, _('Cannot add books'), _(
                 'Failed to add any books, click "Show details" for more information.'),
                 det_msg=as_unicode(err.failure_message) + '\n' + as_unicode(err.details), show=True)
                 self.pd.canceled = True
     self.groups_to_add = iter(self.file_groups)
     self.do_one = self.do_one_group
     self.do_one_signal.emit()
Example #7
0
def more_books(ctx, rd):
    """
    Get more results from the specified search-query, which must
    be specified as JSON in the request body.

    Optional: ?num=50&library_id=<default library>
    """
    db, library_id = get_library_data(ctx, rd.query)[:2]

    try:
        num = int(rd.query.get("num", DEFAULT_NUMBER_OF_BOOKS))
    except Exception:
        raise HTTPNotFound("Invalid number of books: %r" % rd.query.get("num"))
    try:
        search_query = load_json_file(rd.request_body_file)
        query, offset, sorts, orders = (
            search_query["query"],
            search_query["offset"],
            search_query["sort"],
            search_query["sort_order"],
        )
    except KeyError as err:
        raise HTTPBadRequest("Search query missing key: %s" % as_unicode(err))
    except Exception as err:
        raise HTTPBadRequest("Invalid query: %s" % as_unicode(err))
    ans = {}
    with db.safe_read_lock:
        ans["search_result"] = search_result(ctx, rd, db, query, num, offset, sorts, orders)
        mdata = ans["metadata"] = {}
        for book_id in ans["search_result"]["book_ids"]:
            data = book_as_json(db, book_id)
            if data is not None:
                mdata[book_id] = data

    return ans
Example #8
0
    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=60):  # {{{
        from lxml import etree
        from calibre.ebooks.chardet import xml_to_unicode

        if not self.is_configured():
            return
        query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
        if not query:
            err = u'Insufficient metadata to construct query'
            log.error(err)
            return err

        try:
            raw = self.browser.open_novisit(query).read()

        except Exception as e:
            log.exception(u'Failed to make identify query: %r'%query)
            return as_unicode(e)

        try:
            parser = etree.XMLParser(recover=True, no_network=True)
            feed = etree.fromstring(xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
            entries = feed.xpath('//*[local-name()="SearchItems" or local-name()="ItemDetail"]')
            if entries:
                metadata = self.get_metadata(log, entries, title, authors, identifiers)
                self.get_all_details(log, metadata, abort, result_queue, identifiers, timeout)
        except Exception as e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)
Example #9
0
def daemonize():  # {{{
    try:
        pid = os.fork()
        if pid > 0:
            # exit first parent
            sys.exit(0)
    except OSError as e:
        raise SystemExit("fork #1 failed: %s" % as_unicode(e))

    # decouple from parent environment
    os.chdir("/")
    os.setsid()
    os.umask(0)

    # do second fork
    try:
        pid = os.fork()
        if pid > 0:
            # exit from second parent
            sys.exit(0)
    except OSError as e:
        raise SystemExit("fork #2 failed: %s" % as_unicode(e))

    # Redirect standard file descriptors.
    try:
        plugins["speedup"][0].detach(os.devnull)
    except AttributeError:  # people running from source without updated binaries
        si = os.open(os.devnull, os.O_RDONLY)
        so = os.open(os.devnull, os.O_WRONLY)
        se = os.open(os.devnull, os.O_WRONLY)
        os.dup2(si, sys.stdin.fileno())
        os.dup2(so, sys.stdout.fileno())
        os.dup2(se, sys.stderr.fileno())
Example #10
0
def more_books(ctx, rd):
    '''
    Get more results from the specified search-query, which must
    be specified as JSON in the request body.

    Optional: ?num=50&library_id=<default library>
    '''
    db, library_id = get_library_data(ctx, rd.query)[:2]

    try:
        num = int(rd.query.get('num', DEFAULT_NUMBER_OF_BOOKS))
    except Exception:
        raise HTTPNotFound('Invalid number of books: %r' % rd.query.get('num'))
    try:
        search_query = load_json_file(rd.request_body_file)
        query, offset, sorts, orders = search_query['query'], search_query['offset'], search_query['sort'], search_query['sort_order']
    except KeyError as err:
        raise HTTPBadRequest('Search query missing key: %s' % as_unicode(err))
    except Exception as err:
        raise HTTPBadRequest('Invalid query: %s' % as_unicode(err))
    ans = {}
    with db.safe_read_lock:
        ans['search_result'] = search_result(ctx, rd, db, query, num, offset, sorts, orders)
        mdata = ans['metadata'] = {}
        for book_id in ans['search_result']['book_ids']:
            data = book_as_json(db, book_id)
            if data is not None:
                mdata[book_id] = data

    return ans
Example #11
0
    def parse(self, xml_detail, xml_more_info):
        title = self.parse_title(xml_detail)
        authors = self.parse_authors(xml_detail)
        comments = self.parse_comments(xml_detail)
        rating = self.parse_rating(xml_detail)
        isbn = self.parse_isbn(xml_more_info)
        publisher = self.parse_publisher(xml_detail)
        tags = self.parse_tags(xml_detail, xml_more_info)
        serie, serie_index = self.parse_serie(xml_detail)
        pub_year = self.parse_pub_year(xml_detail, xml_more_info)
        cover = self.parse_cover(xml_detail)

        if title is not None and authors is not None:
            mi = Metadata(as_unicode(title), authors)
            mi.languages = {'ces'}
            mi.comments = as_unicode(comments)
            mi.identifiers = {self.plugin.name:self.ident}
            mi.rating = rating
            mi.tags = tags
            mi.publisher = publisher
            mi.pubdate = pub_year
            mi.isbn = isbn
            mi.series = serie
            mi.series_index = serie_index
            mi.cover_url = cover

            if cover:
                self.plugin.cache_identifier_to_cover_url(self.ident, cover)

            return mi
        else:
            self.log('Result skipped for because title or authors not found')
            return None
Example #12
0
    def download_parse(self, query, timeout):
#         self.downloads_count += 1
#         number = self.downloads_count
        br = self.browser
        try:
            self.log('download page search %s'%query)
            data = urllib.urlencode(query[1])
            raw = br.open(query[0],data,timeout=timeout).read().strip()
        except Exception as e:
            self.log.exception('Failed to make identify query: %r'%query)
            return as_unicode(e)

        try:
            parser = etree.HTMLParser(recover=True)
            clean = clean_ascii_chars(raw)

#             self.log.filelog(clean, "\\tmp\\test.html")
            feed = fromstring(clean, parser=parser)

#             if len(parser.error_log) > 0: #some errors while parsing
#                 self.log('while parsing page occus some errors:')
#                 self.log(parser.error_log)

            return feed
        except Exception as e:
            self.log.exception('Failed to parse identify results')
            return as_unicode(e)
Example #13
0
 def _doit(self, newdb):
     for i, x in enumerate(self.ids):
         try:
             self.do_one(i, x, newdb)
         except Exception as err:
             import traceback
             err = as_unicode(err)
             self.failed_books[x] = (err, as_unicode(traceback.format_exc()))
Example #14
0
 def _doit(self, newdb):
     for i, x in enumerate(self.ids):
         if self.was_canceled:
             self.left_after_cancel = len(self.ids) - i
             break
         try:
             self.do_one(i, x, newdb)
         except Exception as err:
             import traceback
             err = as_unicode(err)
             self.failed_books[x] = (err, as_unicode(traceback.format_exc()))
Example #15
0
 def fset(self, val):
     if self.type in ('checkbox', 'radio'):
         if val:
             self.qwe.setAttribute('checked', 'checked')
         else:
             self.qwe.removeAttribute('checked')
     elif self.type in ('text', 'password', 'hidden', 'email', 'search'):
         self.qwe.setAttribute('value', as_unicode(val))
     elif self.type in ('number', 'range'):
         self.qwe.setAttribute('value', '%d'%int(val))
     else:  # Unknown type treat as text
         self.qwe.setAttribute('value', as_unicode(val))
Example #16
0
    def get_image_urls(self, title, author, log, abort, timeout):
        from calibre.utils.ipc.simple_worker import fork_job, WorkerError
        try:
            return fork_job('calibre.ebooks.metadata.sources.google_images',
                    'search', args=(title, author, self.prefs['size'], timeout), no_output=True, abort=abort, timeout=timeout)['result']
        except WorkerError as e:
            if e.orig_tb:
                log.error(e.orig_tb)
            log.exception('Searching google failed:' + as_unicode(e))
        except Exception as e:
            log.exception('Searching google failed:' + as_unicode(e))

        return []
Example #17
0
    def do_scan(self):
        self.reload_cache()

        if isworker:
            # Dont scan font files in worker processes, use whatever is
            # cached. Font files typically dont change frequently enough to
            # justify a rescan in a worker process.
            self.build_families()
            return

        cached_fonts = self.cached_fonts.copy()
        self.cached_fonts.clear()
        for folder in self.folders:
            if not os.path.isdir(folder):
                continue
            try:
                files = tuple(walk(folder))
            except EnvironmentError as e:
                if DEBUG:
                    prints('Failed to walk font folder:', folder,
                            as_unicode(e))
                continue
            for candidate in files:
                if (candidate.rpartition('.')[-1].lower() not in self.allowed_extensions
                        or not os.path.isfile(candidate)):
                    continue
                candidate = os.path.normcase(os.path.abspath(candidate))
                try:
                    s = os.stat(candidate)
                except EnvironmentError:
                    continue
                fileid = '{0}||{1}:{2}'.format(candidate, s.st_size, s.st_mtime)
                if fileid in cached_fonts:
                    # Use previously cached metadata, since the file size and
                    # last modified timestamp have not changed.
                    self.cached_fonts[fileid] = cached_fonts[fileid]
                    continue
                try:
                    self.read_font_metadata(candidate, fileid)
                except Exception as e:
                    if DEBUG:
                        prints('Failed to read metadata from font file:',
                                candidate, as_unicode(e))
                    continue

        if frozenset(cached_fonts) != frozenset(self.cached_fonts):
            # Write out the cache only if some font files have changed
            self.write_cache()

        self.build_families()
Example #18
0
    def identify(self, log, result_queue, abort, title=None, authors=None,
                 identifiers={}, timeout=90):  # {{{
        from calibre.ebooks.chardet import xml_to_unicode
        from HTMLParser import HTMLParser
        from lxml import etree, html

        if not self.is_configured():
            return
        query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
        if not query:
            err = u'Insufficient metadata to construct query'
            log.error(err)
            return err

        try:
            raw = self.browser.open_novisit(query).read()
        except Exception as e:
            log.exception(u'Failed to make identify query: %r' % query)
            return as_unicode(e)

        try:
            doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
            entries_block = doc.xpath(u'//div[@class="bSearchResult"]')

            if entries_block:
                entries = doc.xpath(u'//div[contains(@itemprop, "itemListElement")]')
                # for entry in entries:
                #   log.debug('entries %s' % entree.tostring(entry))
                metadata = self.get_metadata(log, entries, title, authors, identifiers)
                self.get_all_details(log, metadata, abort, result_queue, identifiers, timeout)
            else:
                # Redirect page: trying to extract ozon_id from javascript data
                h = HTMLParser()
                entry_string = (h.unescape(unicode(etree.tostring(doc, pretty_print=True))))
                id_title_pat = re.compile(u'products":\[{"id":(\d{7}),"name":"([а-яА-Я :\-0-9]+)')
                # result containing ozon_id and entry_title
                entry_info = re.search(id_title_pat, entry_string)
                ozon_id = entry_info.group(1) if entry_info else None
                entry_title = entry_info.group(2) if entry_info else None

                if ozon_id:
                    metadata = self.to_metadata_for_single_entry(log, ozon_id, entry_title, authors)
                    identifiers['ozon'] = ozon_id
                    self.get_all_details(log, [metadata], abort, result_queue, identifiers, timeout, cachedPagesDict={})
                else:
                    log.error('No SearchResults in Ozon.ru response found')

        except Exception as e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)
Example #19
0
def cdb_run(ctx, rd, which, version):
    try:
        m = module_for_cmd(which)
    except ImportError:
        raise HTTPNotFound('No module named: {}'.format(which))
    if not getattr(m, 'readonly', False):
        ctx.check_for_write_access(rd)
    if getattr(m, 'version', 0) != int(version):
        raise HTTPNotFound(('The module {} is not available in version: {}.'
                           'Make sure the version of calibre used for the'
                            ' server and calibredb match').format(which, version))
    db = get_library_data(ctx, rd, strict_library_id=True)[0]
    if ctx.restriction_for(rd, db):
        raise HTTPForbidden('Cannot use the command-line db interface with a user who has per library restrictions')
    raw = rd.read()
    ct = rd.inheaders.get('Content-Type', all=True)
    try:
        if MSGPACK_MIME in ct:
            args = msgpack_loads(raw)
        elif 'application/json' in ct:
            args = json_loads(raw)
        else:
            raise HTTPBadRequest('Only JSON or msgpack requests are supported')
    except Exception:
        raise HTTPBadRequest('args are not valid encoded data')
    if getattr(m, 'needs_srv_ctx', False):
        args = [ctx] + list(args)
    try:
        result = m.implementation(db, partial(ctx.notify_changes, db.backend.library_path), *args)
    except Exception as err:
        import traceback
        return {'err': as_unicode(err), 'tb': traceback.format_exc()}
    return {'result': result}
Example #20
0
def parse_uri(uri, parse_query=True):
    scheme, authority, path = parse_request_uri(uri)
    if b'#' in path:
        raise HTTPSimpleResponse(httplib.BAD_REQUEST, "Illegal #fragment in Request-URI.")

    if scheme:
        try:
            scheme = scheme.decode('ascii')
        except ValueError:
            raise HTTPSimpleResponse(httplib.BAD_REQUEST, 'Un-decodeable scheme')

    path, qs = path.partition(b'?')[::2]
    if parse_query:
        try:
            query = MultiDict.create_from_query_string(qs)
        except Exception:
            raise HTTPSimpleResponse(httplib.BAD_REQUEST, 'Unparseable query string')
    else:
        query = None

    try:
        path = '%2F'.join(unquote(x).decode('utf-8') for x in quoted_slash.split(path))
    except ValueError as e:
        raise HTTPSimpleResponse(httplib.BAD_REQUEST, as_unicode(e))
    path = tuple(filter(None, (x.replace('%2F', '/') for x in path.split('/'))))

    return scheme, path, query
Example #21
0
    def do_bind(self):
        # Get the correct address family for our host (allows IPv6 addresses)
        host, port = self.bind_address
        try:
            info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, socket.SOCK_STREAM, 0, socket.AI_PASSIVE)
        except socket.gaierror:
            if ":" in host:
                info = [(socket.AF_INET6, socket.SOCK_STREAM, 0, "", self.bind_address + (0, 0))]
            else:
                info = [(socket.AF_INET, socket.SOCK_STREAM, 0, "", self.bind_address)]

        self.socket = None
        msg = "No socket could be created"
        for res in info:
            af, socktype, proto, canonname, sa = res
            try:
                self.bind(af, socktype, proto)
            except socket.error as serr:
                msg = "%s -- (%s: %s)" % (msg, sa, as_unicode(serr))
                if self.socket:
                    self.socket.close()
                self.socket = None
                continue
            break
        if not self.socket:
            raise socket.error(msg)
Example #22
0
    def download_cover(self, log, result_queue, abort, title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):  # {{{
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.debug('No cached cover found, running identify')
            rq = Queue()
            self.identify(log, rq, abort, title=title, authors=authors, identifiers=identifiers)
            if abort.is_set():
                return
            results = []
            while True:
                try:
                    results.append(rq.get_nowait())
                except Empty:
                    break
            results.sort(key=self.identify_results_keygen(title=title, authors=authors, identifiers=identifiers))
            for mi in results:
                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break

        if cached_url is None:
            log.info('No cover found')
            return

        if abort.is_set():
            return

        log.debug('Downloading cover from:', cached_url)
        try:
            cdata = self.browser.open_novisit(cached_url, timeout=timeout).read()
            if cdata:
                result_queue.put((self, cdata))
        except Exception as e:
            log.exception(u'Failed to download cover from: %s' % cached_url)
            return as_unicode(e)
Example #23
0
    def books_prepared(self, view, job):
        self.bpd.hide()
        self.bpd = None
        if job.exception is not None:
            self.gui.device_job_exception(job)
            return
        paths = job.result
        ok_paths = [x for x in paths if isinstance(x, basestring)]
        failed_paths = [x for x in paths if isinstance(x, tuple)]
        if failed_paths:
            if not ok_paths:
                msg = _('Could not download files from the device')
                typ = error_dialog
            else:
                msg = _('Could not download some files from the device')
                typ = warning_dialog
            det_msg = [x[0]+ '\n    ' + as_unicode(x[1]) for x in failed_paths]
            det_msg = '\n\n'.join(det_msg)
            typ(self.gui, _('Could not download files'), msg, det_msg=det_msg,
                    show=True)

        if ok_paths:
            from calibre.gui2.add import Adder
            callback = partial(self._add_from_device_adder, on_card=None, model=view.model())
            Adder(ok_paths, db=self.gui.current_db, parent=self.gui, callback=callback, pool=self.gui.spare_pool())
Example #24
0
 def _write_order(self):
     if hasattr(self, 'items'):
         try:
             with open(os.path.join(self.location, 'order'), 'wb') as f:
                 f.write(cPickle.dumps(tuple(map(hash, self.items)), -1))
         except EnvironmentError as err:
             self.log('Failed to save thumbnail cache order:', as_unicode(err))
Example #25
0
def get_books(ctx, rd):
    '''
    Get books for the specified query

    Optional: ?library_id=<default library>&num=50&sort=timestamp.desc&search=''
    '''
    library_id, db, sorts, orders = get_basic_query_data(ctx, rd.query)
    try:
        num = int(rd.query.get('num', DEFAULT_NUMBER_OF_BOOKS))
    except Exception:
        raise HTTPNotFound('Invalid number of books: %r' % rd.query.get('num'))
    searchq = rd.query.get('search', '')
    db = get_library_data(ctx, rd.query)[0]
    ans = {}
    mdata = ans['metadata'] = {}
    with db.safe_read_lock:
        try:
            ans['search_result'] = search_result(ctx, rd, db, searchq, num, 0, ','.join(sorts), ','.join(orders))
        except ParseException as err:
            # This must not be translated as it is used by the front end to
            # detect invalid search expressions
            raise HTTPBadRequest('Invalid search expression: %s' % as_unicode(err))
        for book_id in ans['search_result']['book_ids']:
            data = book_as_json(db, book_id)
            if data is not None:
                mdata[book_id] = data
    return ans
Example #26
0
    def open(self, connected_device, library_uuid):
        self.dev = self._filesystem_cache = None
        try:
            self.dev = self.create_device(connected_device)
        except Exception as e:
            self.blacklisted_devices.add(connected_device)
            raise OpenFailed('Failed to open %s: Error: %s'%(
                    connected_device, as_unicode(e)))

        storage = sorted(self.dev.storage_info, key=operator.itemgetter('id'))
        storage = [x for x in storage if x.get('rw', False)]
        if not storage:
            self.blacklisted_devices.add(connected_device)
            raise OpenFailed('No storage found for device %s'%(connected_device,))
        snum = self.dev.serial_number
        if snum in self.prefs.get('blacklist', []):
            self.blacklisted_devices.add(connected_device)
            self.dev = None
            raise BlacklistedDevice(
                'The %s device has been blacklisted by the user'%(connected_device,))
        self._main_id = storage[0]['id']
        self._carda_id = self._cardb_id = None
        if len(storage) > 1:
            self._carda_id = storage[1]['id']
        if len(storage) > 2:
            self._cardb_id = storage[2]['id']
        self.current_friendly_name = self.dev.friendly_name
        if not self.current_friendly_name:
            self.current_friendly_name = self.dev.model_name or _('Unknown MTP device')
        self.current_serial_num = snum
        self.currently_connected_dev = connected_device
Example #27
0
    def start(self):
        self.is_running = False
        self.exception = None
        cherrypy.tree.mount(root=None, config=self.config)
        try:
            self.start_cherrypy()
        except Exception as e:
            self.exception = e
            import traceback
            traceback.print_exc()
            if callable(self.start_failure_callback):
                try:
                    self.start_failure_callback(as_unicode(e))
                except:
                    pass
            return

        try:
            self.is_running = True
            self.notify_listener()
            cherrypy.engine.block()
        except Exception as e:
            import traceback
            traceback.print_exc()
            self.exception = e
        finally:
            self.is_running = False
            self.notify_listener()
Example #28
0
    def parse(self, xml_detail):
        title = self.parse_title(xml_detail)
        authors = self.parse_authors(xml_detail)
        comments = self.parse_comments(xml_detail)
        rating = self.parse_rating(xml_detail)
        isbn = self.parse_isbn(xml_detail)
        publisher = self.parse_publisher(xml_detail)
        pub_year = self.parse_pubdate(xml_detail)
        tags = self.parse_tags(xml_detail)
        serie, serie_index = self.parse_serie(xml_detail)
        cover = self.parse_cover(xml_detail)

        if title is not None and authors is not None:
            mi = Metadata(title, authors)
            mi.languages = {'ces'}
            mi.comments = as_unicode(comments)
            mi.identifiers = {self.plugin.name:str(self.number)}
            mi.rating = rating
            mi.tags = tags
            mi.publisher = publisher
            mi.pubdate = pub_year
            mi.isbn = isbn
            mi.series = serie
            mi.series_index = serie_index
            mi.cover_url = cover

            if cover:
                self.plugin.cache_identifier_to_cover_url(str(self.number), cover)

            return mi
        else:
            return None
Example #29
0
    def run(self):
        if self.xml is None:
            raw = None
            url = None
            try:
                url = self.plugin.create_query(self.title, self.authors, self.number)
                self.log('download page search %s'%url)
                raw = self.plugin.browser.open(url, timeout=self.timeout).read().strip()
            except Exception as e:
                self.log.exception('Failed to make identify query: %r'%url)
                return as_unicode(e)

            if raw is not None:
                try:
                    parser = etree.HTMLParser()
                    clean = clean_ascii_chars(raw)
                    self.xml = fromstring(clean, parser=parser)
#                     if len(parser.error_log) > 0: #some errors while parsing
#                         self.log('while parsing page occus some errors:')
#                         self.log(parser.error_log)

                except Exception as e:
                    self.log.exception('Failed to parse xml for url: %s'%url)

        self.parse()
Example #30
0
    def books_prepared(self, view, job):
        self.bpd.hide()
        self.bpd = None
        if job.exception is not None:
            self.gui.device_job_exception(job)
            return
        paths = job.result
        ok_paths = [x for x in paths if isinstance(x, basestring)]
        failed_paths = [x for x in paths if isinstance(x, tuple)]
        if failed_paths:
            if not ok_paths:
                msg = _("Could not download files from the device")
                typ = error_dialog
            else:
                msg = _("Could not download some files from the device")
                typ = warning_dialog
            det_msg = [x[0] + "\n    " + as_unicode(x[1]) for x in failed_paths]
            det_msg = "\n\n".join(det_msg)
            typ(self.gui, _("Could not download files"), msg, det_msg=det_msg, show=True)

        if ok_paths:
            from calibre.gui2.add import Adder

            self.__adder_func = partial(self._add_from_device_adder, on_card=None, model=view.model())
            self._adder = Adder(
                self.gui,
                self.gui.library_view.model().db,
                self.Dispatcher(self.__adder_func),
                spare_server=self.gui.spare_server,
            )
            self._adder.add(ok_paths)
Example #31
0
	def identify(self, log, result_queue, abort, title, authors,
			identifiers={}, timeout=30):
		'''
		Note this method will retry without identifiers automatically if no
		match is found with identifiers.
		'''
		matches = []
		databazeknih_id = identifiers.get('databazeknih', None)
		log.info(u'\nTitl1e:%s\nAuthors:%s\n'%(title, authors))
		br = browser()
		if databazeknih_id:
			matches.append(databazeknih.BASE_URL + 'knihy/' + databazeknih_id)
		else:
			query = self.create_query(log, title=title, authors=authors)
			if query is None:
				log.error('Insufficient metadata to construct query')
				return
			try:
				log.info(u'Querying: %s'%query)
			
				response = br.open(query)
			except Exception as e:
				isbn = check_isbn(identifiers.get('isbn', None))
				if isbn and callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
					log.info('Failed to find match for ISBN: %s'%isbn)
				else:
					err = 'Failed to make identify query: %r'%query
					log.info(err)
					return as_unicode(e)
#					return e
			try:
				raw = response.read().strip()
				raw = raw.decode('utf-8', errors='replace')
				if not raw:
					log.error('Failed to get raw result for query: %r'%query)
					return
				root = fromstring(clean_ascii_chars(raw))
			except:
				msg = 'Failed to parse databazeknih page for query: %r'%query
				log.exception(msg)
				return msg
			
			self._parse_search_results(log, title, authors, root, matches, timeout)

		if abort.is_set():
			return
		
		if not matches:
			if identifiers and title and authors:
				log.info('No matches found with identifiers, retrying using only'
						' title and authors')
				return self.identify(log, result_queue, abort, title=title,
						authors=authors, timeout=timeout)
			log.error('No matches found with query: %r'%query)
			return
			
		log.debug('Starting workers for: %s' % (matches,))	
		from calibre_plugins.databazeknih.worker import Worker
		workers = [Worker(url, result_queue, br, log, i, self) for i, url in
				enumerate(matches)]

		for w in workers:
			w.start()
			time.sleep(0.1)

		while not abort.is_set():
			a_worker_is_alive = False
			for w in workers:
				w.join(0.2)
				if abort.is_set():
					break
				if w.is_alive():
					a_worker_is_alive = True
			if not a_worker_is_alive:
				break

		return None
Example #32
0
 def dump_and_restore(self):
     try:
         self.db.dump_and_restore(self.update_msg.emit)
     except Exception as e:
         import traceback
         self.error = (as_unicode(e), traceback.format_exc())
Example #33
0
def subset_all_fonts(container, font_stats, report):
    remove = set()
    total_old = total_new = 0
    changed = False
    for name, mt in container.mime_map.iteritems():
        if (mt in OEB_FONTS or name.rpartition('.')[-1].lower()
                in {'otf', 'ttf'}) and mt != guess_type('a.woff'):
            chars = font_stats.get(name, set())
            with container.open(name, 'rb') as f:
                f.seek(0, os.SEEK_END)
                total_old += f.tell()
            if not chars:
                remove.add(name)
                report('Removed unused font: %s' % name)
                continue
            with container.open(name, 'r+b') as f:
                raw = f.read()
                font_name = get_font_names(raw)[-1]
                warnings = []
                container.log('Subsetting font: %s' % (font_name or name))
                try:
                    nraw, old_sizes, new_sizes = subset(raw,
                                                        chars,
                                                        warnings=warnings)
                except UnsupportedFont as e:
                    container.log.warning(
                        'Unsupported font: %s, ignoring.  Error: %s' %
                        (name, as_unicode(e)))
                    continue

                for w in warnings:
                    container.log.warn(w)
                olen = sum(old_sizes.itervalues())
                nlen = sum(new_sizes.itervalues())
                total_new += len(nraw)
                if nlen == olen:
                    report('The font %s was already subset' % font_name)
                else:
                    report(
                        'Decreased the font %s to %.1f%% of its original size'
                        % (font_name, nlen / olen * 100))
                    changed = True
                f.seek(0), f.truncate(), f.write(nraw)

    for name in remove:
        container.remove_item(name)
        changed = True

    if remove:
        for name, mt in container.mime_map.iteritems():
            if mt in OEB_STYLES:
                sheet = container.parsed(name)
                if remove_font_face_rules(container, sheet, remove, name):
                    container.dirty(name)
            elif mt in OEB_DOCS:
                for style in XPath('//h:style')(container.parsed(name)):
                    if style.get('type',
                                 'text/css') == 'text/css' and style.text:
                        sheet = container.parse_css(style.text, name)
                        if remove_font_face_rules(container, sheet, remove,
                                                  name):
                            style.text = sheet.cssText
                            container.dirty(name)
    if total_old > 0:
        report('Reduced total font size to %.1f%% of original' %
               (total_new / total_old * 100))
    else:
        report('No embedded fonts found')
    return changed
Example #34
0
 def _do_delete(self, path):
     try:
         os.remove(path)
     except EnvironmentError as err:
         self.log('Failed to delete cached thumbnail file:',
                  as_unicode(err))
Example #35
0
def tweak(ebook_file):
    ''' Command line interface to the Tweak Book tool '''
    fmt = ebook_file.rpartition('.')[-1].lower()
    exploder, rebuilder = get_tools(fmt)
    if exploder is None:
        prints(
            'Cannot tweak %s files. Supported formats are: EPUB, HTMLZ, AZW3, MOBI',
            file=sys.stderr)
        raise SystemExit(1)

    with TemporaryDirectory(
            '_tweak_' +
            os.path.basename(ebook_file).rpartition('.')[0]) as tdir:
        try:
            opf = exploder(ebook_file, tdir, question=ask_cli_question)
        except WorkerError as e:
            prints('Failed to unpack', ebook_file)
            prints(e.orig_tb)
            raise SystemExit(1)
        except Error as e:
            prints(as_unicode(e), file=sys.stderr)
            raise SystemExit(1)

        if opf is None:
            # The question was answered with No
            return

        ed = os.environ.get('EDITOR', 'dummy')
        cmd = shlex.split(ed)
        isvim = bool([x for x in cmd[0].split('/') if x.endswith('vim')])

        proceed = False
        prints('Book extracted to', tdir)

        if not isvim:
            prints('Make your tweaks and once you are done,', __appname__,
                   'will rebuild', ebook_file, 'from', tdir)
            print()
            proceed = ask_cli_question('Rebuild ' + ebook_file + '?')
        else:
            base = os.path.basename(ebook_file)
            with TemporaryFile(base + '.zip') as zipf:
                with ZipFile(zipf, 'w') as zf:
                    zf.add_dir(tdir)
                try:
                    subprocess.check_call(cmd + [zipf])
                except:
                    prints(ed, 'failed, aborting...')
                    raise SystemExit(1)
                with ZipFile(zipf, 'r') as zf:
                    shutil.rmtree(tdir)
                    os.mkdir(tdir)
                    zf.extractall(path=tdir)
            proceed = True

        if proceed:
            prints('Rebuilding', ebook_file, 'please wait ...')
            try:
                rebuilder(tdir, ebook_file)
            except WorkerError as e:
                prints('Failed to rebuild', ebook_file)
                prints(e.orig_tb)
                raise SystemExit(1)
            prints(ebook_file, 'successfully tweaked')
Example #36
0
    def identify(self,
                 log,
                 result_queue,
                 abort,
                 title=None,
                 authors=None,
                 identifiers={},
                 timeout=90):  # {{{
        from calibre.ebooks.chardet import xml_to_unicode
        from HTMLParser import HTMLParser
        from lxml import etree, html
        import json

        if not self.is_configured():
            return
        query = self.create_query(log,
                                  title=title,
                                  authors=authors,
                                  identifiers=identifiers)
        if not query:
            err = u'Insufficient metadata to construct query'
            log.error(err)
            return err

        try:
            raw = self.browser.open_novisit(query).read()
        except Exception as e:
            log.exception(u'Failed to make identify query: %r' % query)
            return as_unicode(e)

        try:
            doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
            entries_block = doc.xpath(u'//div[@class="bSearchResult"]')

            # log.debug(u'HTML: %s' % xml_to_unicode(raw, verbose=True)[0])

            if entries_block:
                entries = doc.xpath(
                    u'//div[contains(@itemprop, "itemListElement")]')
                # log.debug(u'entries_block')
                # for entry in entries:
                #   log.debug('entries %s' % entree.tostring(entry))
                metadata = self.get_metadata(log, entries, title, authors,
                                             identifiers)
                self.get_all_details(log, metadata, abort, result_queue,
                                     identifiers, timeout)
            else:
                # Redirect page: trying to extract ozon_id from javascript data
                h = HTMLParser()
                entry_string = (h.unescape(
                    etree.tostring(doc, pretty_print=True,
                                   encoding='unicode')))
                json_pat = re.compile(r'dataLayer\s*=\s*(.+)?;')
                json_info = re.search(json_pat, entry_string)
                jsondata = json_info.group(1) if json_info else None
                if jsondata:
                    idx = jsondata.rfind('}]')
                    if idx > 0:
                        jsondata = jsondata[:idx + 2]

                # log.debug(u'jsondata: %s' % jsondata)
                dataLayer = json.loads(jsondata) if jsondata else None

                ozon_id = None
                if dataLayer and dataLayer[0] and 'ecommerce' in dataLayer[0]:
                    jsproduct = dataLayer[0]['ecommerce']['detail'][
                        'products'][0]
                    ozon_id = as_unicode(jsproduct['id'])
                    entry_title = as_unicode(jsproduct['name'])

                    log.debug(u'ozon_id %s' % ozon_id)
                    log.debug(u'entry_title %s' % entry_title)

                    if ozon_id:
                        metadata = self.to_metadata_for_single_entry(
                            log, ozon_id, entry_title, authors)
                        identifiers['ozon'] = ozon_id
                        self.get_all_details(log, [metadata],
                                             abort,
                                             result_queue,
                                             identifiers,
                                             timeout,
                                             cachedPagesDict={})

                if not ozon_id:
                    log.error('No SearchResults in Ozon.ru response found!')

        except Exception as e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)
Example #37
0
    def __init__(self,
                 tree,
                 path,
                 oeb,
                 opts,
                 profile=None,
                 extra_css='',
                 user_css='',
                 base_css=''):
        self.oeb, self.opts = oeb, opts
        self.profile = profile
        if self.profile is None:
            # Use the default profile. This should really be using
            # opts.output_profile, but I don't want to risk changing it, as
            # doing so might well have hard to debug font size effects.
            from calibre.customize.ui import output_profiles
            for x in output_profiles():
                if x.short_name == 'default':
                    self.profile = x
                    break
        if self.profile is None:
            # Just in case the default profile is removed in the future :)
            self.profile = opts.output_profile
        self.body_font_size = self.profile.fbase
        self.logger = oeb.logger
        item = oeb.manifest.hrefs[path]
        basename = os.path.basename(path)
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [html_css_stylesheet()]
        if base_css:
            stylesheets.append(parseString(base_css, validate=False))
        style_tags = xpath(tree,
                           '//*[local-name()="style" or local-name()="link"]')

        # Add cssutils parsing profiles from output_profile
        for profile in self.opts.output_profile.extra_css_modules:
            cssprofiles.addProfile(profile['name'], profile['props'],
                                   profile['macros'])

        parser = CSSParser(fetcher=self._fetch_css_file,
                           log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in style_tags:
            if (elem.tag == XHTML('style')
                    and elem.get('type', CSS_MIME) in OEB_STYLES
                    and media_ok(elem.get('media'))):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = oeb.css_preprocessor(text)
                    # We handle @import rules separately
                    parser.setFetcher(lambda x: ('utf-8', b''))
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    parser.setFetcher(self._fetch_css_file)
                    for rule in stylesheet.cssRules:
                        if rule.type == rule.IMPORT_RULE:
                            ihref = item.abshref(rule.href)
                            if not media_ok(rule.media.mediaText):
                                continue
                            hrefs = self.oeb.manifest.hrefs
                            if ihref not in hrefs:
                                self.logger.warn(
                                    'Ignoring missing stylesheet in @import rule:',
                                    rule.href)
                                continue
                            sitem = hrefs[ihref]
                            if sitem.media_type not in OEB_STYLES:
                                self.logger.warn(
                                    'CSS @import of non-CSS file %r' %
                                    rule.href)
                                continue
                            stylesheets.append(sitem.data)
                    for rule in tuple(
                            stylesheet.cssRules.rulesOfType(
                                CSSRule.PAGE_RULE)):
                        stylesheet.cssRules.remove(rule)
                    # Make links to resources absolute, since these rules will
                    # be folded into a stylesheet at the root
                    replaceUrls(stylesheet,
                                item.abshref,
                                ignoreImportRules=True)
                    stylesheets.append(stylesheet)
            elif (elem.tag == XHTML('link') and elem.get('href')
                  and elem.get('rel', 'stylesheet').lower() == 'stylesheet'
                  and elem.get('type', CSS_MIME).lower() in OEB_STYLES
                  and media_ok(elem.get('media'))):
                href = urlnormalize(elem.attrib['href'])
                path = item.abshref(href)
                sitem = oeb.manifest.hrefs.get(path, None)
                if sitem is None:
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r not in manifest' %
                        (path, item.href))
                    continue
                if not hasattr(sitem.data, 'cssRules'):
                    self.logger.warn(
                        'Stylesheet %r referenced by file %r is not CSS' %
                        (path, item.href))
                    continue
                stylesheets.append(sitem.data)
        csses = {'extra_css': extra_css, 'user_css': user_css}
        for w, x in csses.items():
            if x:
                try:
                    text = x
                    stylesheet = parser.parseString(text,
                                                    href=cssname,
                                                    validate=False)
                    stylesheets.append(stylesheet)
                except:
                    self.logger.exception('Failed to parse %s, ignoring.' % w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
        rules = []
        index = 0
        self.stylesheets = set()
        self.page_rule = {}
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            self.stylesheets.add(href)
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    if media_ok(rule.media.mediaText):
                        for subrule in rule.cssRules:
                            rules.extend(
                                self.flatten_rule(
                                    subrule,
                                    href,
                                    index,
                                    is_user_agent_sheet=sheet_index == 0))
                            index += 1
                else:
                    rules.extend(
                        self.flatten_rule(
                            rule,
                            href,
                            index,
                            is_user_agent_sheet=sheet_index == 0))
                    index = index + 1
        rules.sort()
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(
            ur':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)

        for _, _, cssdict, text, _ in rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
            except SelectorError as err:
                self.logger.error(
                    'Ignoring CSS rule with invalid selector: %r (%s)' %
                    (text, as_unicode(err)))
                continue

            if fl is not None:
                fl = fl.group(1)
                if fl == 'first-letter' and getattr(
                        self.oeb, 'plumber_output_format',
                        '').lower() in {u'mobi', u'docx'}:
                    # Fake first-letter
                    from lxml.builder import ElementMaker
                    E = ElementMaker(namespace=XHTML_NS)
                    for elem in matches:
                        for x in elem.iter('*'):
                            if x.text:
                                punctuation_chars = []
                                text = unicode(x.text)
                                while text:
                                    category = unicodedata.category(text[0])
                                    if category[0] not in {'P', 'Z'}:
                                        break
                                    punctuation_chars.append(text[0])
                                    text = text[1:]

                                special_text = u''.join(punctuation_chars) + \
                                        (text[0] if text else u'')
                                span = E.span(special_text)
                                span.set('data-fake-first-letter', '1')
                                span.tail = text[1:]
                                x.text = None
                                x.insert(0, span)
                                self.style(span)._update_cssdict(cssdict)
                                break
                else:  # Element pseudo-class
                    for elem in matches:
                        self.style(elem)._update_pseudo_class(fl, cssdict)
            else:
                for elem in matches:
                    self.style(elem)._update_cssdict(cssdict)
        for elem in xpath(tree, '//h:*[@style]'):
            self.style(elem)._apply_style_attr(url_replacer=item.abshref)
        num_pat = re.compile(r'[0-9.]+$')
        for elem in xpath(tree, '//h:img[@width or @height]'):
            style = self.style(elem)
            # Check if either height or width is not default
            is_styled = style._style.get('width', 'auto') != 'auto' or \
                    style._style.get('height', 'auto') != 'auto'
            if not is_styled:
                # Update img style dimension using width and height
                upd = {}
                for prop in ('width', 'height'):
                    val = elem.get(prop, '').strip()
                    try:
                        del elem.attrib[prop]
                    except:
                        pass
                    if val:
                        if num_pat.match(val) is not None:
                            val += 'px'
                        upd[prop] = val
                if upd:
                    style._update_cssdict(upd)
Example #38
0
    def load_ebook(self, pathtoebook, open_at=None, reopen_at=None):
        if self.iterator is not None:
            self.save_current_position()
            self.iterator.__exit__()
        self.iterator = EbookIterator(pathtoebook)
        self.history.clear()
        self.open_progress_indicator(_('Loading ebook...'))
        worker = Worker(
            target=partial(self.iterator.__enter__, view_kepub=True))
        worker.start()
        while worker.isAlive():
            worker.join(0.1)
            QApplication.processEvents()
        if worker.exception is not None:
            if isinstance(worker.exception, DRMError):
                from calibre.gui2.dialogs.drm_error import DRMErrorMessage
                DRMErrorMessage(self).exec_()
            else:
                r = getattr(worker.exception, 'reason', worker.exception)
                error_dialog(self,
                             _('Could not open ebook'),
                             as_unicode(r) or _('Unknown error'),
                             det_msg=worker.traceback,
                             show=True)
            self.close_progress_indicator()
        else:
            self.metadata.show_opf(self.iterator.opf,
                                   self.iterator.book_format)
            self.view.current_language = self.iterator.language
            title = self.iterator.opf.title
            if not title:
                title = os.path.splitext(os.path.basename(pathtoebook))[0]
            if self.iterator.toc:
                self.toc_model = TOC(self.iterator.spine, self.iterator.toc)
                self.toc.setModel(self.toc_model)
                if self.show_toc_on_open:
                    self.action_table_of_contents.setChecked(True)
            else:
                self.toc_model = TOC(self.iterator.spine)
                self.toc.setModel(self.toc_model)
                self.action_table_of_contents.setChecked(False)
            if isbytestring(pathtoebook):
                pathtoebook = force_unicode(pathtoebook, filesystem_encoding)
            vh = vprefs.get('viewer_open_history', [])
            try:
                vh.remove(pathtoebook)
            except:
                pass
            vh.insert(0, pathtoebook)
            vprefs.set('viewer_open_history', vh[:50])
            self.build_recent_menu()

            self.footnotes_dock.close()
            self.action_table_of_contents.setDisabled(not self.iterator.toc)
            self.current_book_has_toc = bool(self.iterator.toc)
            self.current_title = title
            self.setWindowTitle(title + ' [%s]' % self.iterator.book_format +
                                ' - ' + self.base_window_title)
            self.pos.setMaximum(sum(self.iterator.pages))
            self.pos.setSuffix(' / %d' % sum(self.iterator.pages))
            self.vertical_scrollbar.setMinimum(100)
            self.vertical_scrollbar.setMaximum(100 * sum(self.iterator.pages))
            self.vertical_scrollbar.setSingleStep(10)
            self.vertical_scrollbar.setPageStep(100)
            self.set_vscrollbar_value(1)
            self.current_index = -1
            QApplication.instance().alert(self, 5000)
            previous = self.set_bookmarks(self.iterator.bookmarks)
            if reopen_at is not None:
                previous = reopen_at
            if open_at is None and previous is not None:
                self.goto_bookmark(previous)
            else:
                if open_at is None:
                    self.next_document()
                else:
                    if open_at > self.pos.maximum():
                        open_at = self.pos.maximum()
                    if open_at < self.pos.minimum():
                        open_at = self.pos.minimum()
                    self.goto_page(open_at, loaded_check=False)
Example #39
0
    def tick(self):
        now = monotonic()
        read_needed, write_needed, readable, remove, close_needed = [], [], [], [], []
        has_ssl = self.ssl_context is not None
        for s, conn in self.connection_map.iteritems():
            if now - conn.last_activity > self.opts.timeout:
                if conn.handle_timeout():
                    conn.last_activity = now
                else:
                    remove.append((s, conn))
                    continue
            wf = conn.wait_for
            if wf is READ or wf is RDWR:
                if wf is RDWR:
                    write_needed.append(s)
                if conn.read_buffer.has_data:
                    readable.append(s)
                else:
                    if has_ssl:
                        conn.drain_ssl_buffer()
                        if conn.ready:
                            (readable if conn.read_buffer.has_data else read_needed).append(s)
                        else:
                            close_needed.append((s, conn))
                    else:
                        read_needed.append(s)
            elif wf is WRITE:
                write_needed.append(s)

        for s, conn in remove:
            self.log('Closing connection because of extended inactivity: %s' % conn.state_description)
            self.close(s, conn)

        for x, conn in close_needed:
            self.close(s, conn)

        if readable:
            writable = []
        else:
            try:
                readable, writable, _ = select.select([self.socket.fileno(), self.control_out.fileno()] + read_needed, write_needed, [], self.opts.timeout)
            except ValueError:  # self.socket.fileno() == -1
                self.ready = False
                self.log.error('Listening socket was unexpectedly terminated')
                return
            except (select.error, socket.error) as e:
                # select.error has no errno attribute. errno is instead
                # e.args[0]
                if getattr(e, 'errno', e.args[0]) in socket_errors_eintr:
                    return
                for s, conn in tuple(self.connection_map.iteritems()):
                    try:
                        select.select([s], [], [], 0)
                    except (select.error, socket.error) as e:
                        if getattr(e, 'errno', e.args[0]) not in socket_errors_eintr:
                            self.close(s, conn)  # Bad socket, discard
                return

        if not self.ready:
            return

        ignore = set()
        for s, conn, event in self.get_actions(readable, writable):
            if s in ignore:
                continue
            try:
                conn.handle_event(event)
                if not conn.ready:
                    self.close(s, conn)
            except JobQueueFull:
                self.log.exception('Server busy handling request: %s' % conn.state_description)
                if conn.ready:
                    if conn.response_started:
                        self.close(s, conn)
                    else:
                        try:
                            conn.report_busy()
                        except Exception:
                            self.close(s, conn)
            except Exception as e:
                ignore.add(s)
                ssl_terminated = getattr(conn, 'ssl_terminated', False)
                if ssl_terminated:
                    self.log.warn('Client tried to initiate SSL renegotiation, closing connection')
                    self.close(s, conn)
                else:
                    self.log.exception('Unhandled exception in state: %s' % conn.state_description)
                    if conn.ready:
                        if conn.response_started:
                            self.close(s, conn)
                        else:
                            try:
                                conn.report_unhandled_exception(e, traceback.format_exc())
                            except Exception:
                                self.close(s, conn)
                    else:
                        self.log.error('Error in SSL handshake, terminating connection: %s' % as_unicode(e))
                        self.close(s, conn)
 def upgrade_connection_to_ws(self, buf, inheaders, event):
     if self.write(buf):
         if self.websocket_handler is None:
             self.websocket_handler = DummyHandler()
         self.read_frame, self.current_recv_opcode = ReadFrame(), None
         self.in_websocket_mode = True
         try:
             self.websocket_handler.handle_websocket_upgrade(self.websocket_connection_id, weakref.ref(self), inheaders)
         except Exception as err:
             self.log.exception('Error in WebSockets upgrade handler:')
             self.websocket_close(UNEXPECTED_ERROR, 'Unexpected error in handler: %r' % as_unicode(err))
         self.handle_event = self.ws_duplex
         self.set_ws_state()
         self.end_send_optimization()
Example #41
0
    def identify(  # {{{
        self,
        log,
        result_queue,
        abort,
        title=None,
        authors=None,
        identifiers={},
        timeout=30
    ):
        from lxml import etree
        entry = XPath('//atom:entry')

        query = self.create_query(
            log, title=title, authors=authors, identifiers=identifiers
        )
        if not query:
            log.error('Insufficient metadata to construct query')
            return
        br = self.browser
        log('Making query:', query)
        try:
            raw = br.open_novisit(query, timeout=timeout).read()
        except Exception as e:
            log.exception('Failed to make identify query: %r' % query)
            return as_unicode(e)

        try:
            parser = etree.XMLParser(recover=True, no_network=True)
            feed = etree.fromstring(
                xml_to_unicode(clean_ascii_chars(raw), strip_encoding_pats=True)[0],
                parser=parser
            )
            entries = entry(feed)
        except Exception as e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)

        if not entries and title and not abort.is_set():
            if identifiers:
                log('No results found, retrying without identifiers')
                return self.identify(
                    log,
                    result_queue,
                    abort,
                    title=title,
                    authors=authors,
                    timeout=timeout
                )
            ntitle = cleanup_title(title)
            if ntitle and ntitle != title:
                log('No results found, retrying without sub-title')
                return self.identify(
                    log,
                    result_queue,
                    abort,
                    title=ntitle,
                    authors=authors,
                    timeout=timeout
                )

        # There is no point running these queries in threads as google
        # throttles requests returning 403 Forbidden errors
        self.get_all_details(br, log, entries, abort, result_queue, timeout)
Example #42
0
    def parse_request_line(self, buf, event, first=False):  # {{{
        line = self.readline(buf)
        if line is None:
            return
        if line == b'\r\n':
            # Ignore a single leading empty line, as per RFC 2616 sec 4.1
            if first:
                return self.set_state(READ, self.parse_request_line,
                                      Accumulator())
            return self.simple_response(
                httplib.BAD_REQUEST,
                'Multiple leading empty lines not allowed')

        try:
            method, uri, req_protocol = line.strip().split(b' ', 2)
            rp = int(req_protocol[5]), int(req_protocol[7])
            self.method = method.decode('ascii').upper()
        except Exception:
            return self.simple_response(httplib.BAD_REQUEST,
                                        "Malformed Request-Line")

        if self.method not in HTTP_METHODS:
            return self.simple_response(httplib.BAD_REQUEST,
                                        "Unknown HTTP method")

        try:
            self.request_protocol = protocol_map[rp]
        except KeyError:
            return self.simple_response(httplib.HTTP_VERSION_NOT_SUPPORTED)
        self.response_protocol = protocol_map[min((1, 1), rp)]
        scheme, authority, path = parse_request_uri(uri)
        if b'#' in path:
            return self.simple_response(httplib.BAD_REQUEST,
                                        "Illegal #fragment in Request-URI.")

        if scheme:
            try:
                self.scheme = scheme.decode('ascii')
            except ValueError:
                return self.simple_response(httplib.BAD_REQUEST,
                                            'Un-decodeable scheme')

        qs = b''
        if b'?' in path:
            path, qs = path.split(b'?', 1)
            try:
                self.query = MultiDict.create_from_query_string(qs)
            except Exception:
                return self.simple_response(httplib.BAD_REQUEST,
                                            'Unparseable query string')

        try:
            path = '%2F'.join(
                unquote(x).decode('utf-8') for x in quoted_slash.split(path))
        except ValueError as e:
            return self.simple_response(httplib.BAD_REQUEST, as_unicode(e))
        self.path = tuple(
            filter(None, (x.replace('%2F', '/') for x in path.split('/'))))
        self.header_line_too_long_error_code = httplib.REQUEST_ENTITY_TOO_LARGE
        self.request_line = line.rstrip()
        self.set_state(READ, self.parse_header_line, HTTPHeaderParser(),
                       Accumulator())
Example #43
0
def main(args=sys.argv):
    # Ensure viewer can continue to function if GUI is closed
    os.environ.pop('CALIBRE_WORKER_TEMP_DIR', None)
    reset_base_dir()
    scheme = QWebEngineUrlScheme(FAKE_PROTOCOL.encode('ascii'))
    scheme.setSyntax(QWebEngineUrlScheme.Syntax.Host)
    scheme.setFlags(QWebEngineUrlScheme.SecureScheme)
    QWebEngineUrlScheme.registerScheme(scheme)
    override = 'calibre-ebook-viewer' if islinux else None
    processed_args = []
    internal_book_data = internal_book_data_path = None
    for arg in args:
        if arg.startswith('--internal-book-data='):
            internal_book_data_path = arg.split('=', 1)[1]
            continue
        processed_args.append(arg)
    if internal_book_data_path:
        try:
            with lopen(internal_book_data_path, 'rb') as f:
                internal_book_data = json.load(f)
        finally:
            try:
                os.remove(internal_book_data_path)
            except EnvironmentError:
                pass
    args = processed_args
    app = Application(args,
                      override_program_name=override,
                      windows_app_uid=VIEWER_APP_UID)

    parser = option_parser()
    opts, args = parser.parse_args(args)
    oat = opts.open_at
    if oat and not (oat.startswith('toc:') or oat.startswith('toc-href:')
                    or oat.startswith('toc-href-contains:')
                    or oat.startswith('epubcfi(/') or is_float(oat)
                    or oat.startswith('ref:')):
        raise SystemExit('Not a valid --open-at value: {}'.format(
            opts.open_at))

    listener = None
    if get_session_pref('singleinstance', False):
        try:
            listener = ensure_single_instance(args, opts.open_at)
        except Exception as e:
            import traceback
            error_dialog(None,
                         _('Failed to start viewer'),
                         as_unicode(e),
                         det_msg=traceback.format_exc(),
                         show=True)
            raise SystemExit(1)

    acc = EventAccumulator(app)
    app.file_event_hook = acc
    app.load_builtin_fonts()
    app.setWindowIcon(QIcon(I('viewer.png')))
    migrate_previous_viewer_prefs()
    main = EbookViewer(open_at=opts.open_at,
                       continue_reading=opts.continue_reading,
                       force_reload=opts.force_reload,
                       calibre_book_data=internal_book_data)
    main.set_exception_handler()
    if len(args) > 1:
        acc.events.append(os.path.abspath(args[-1]))
    acc.got_file.connect(main.handle_commandline_arg)
    main.show()
    main.msg_from_anotherinstance.connect(main.another_instance_wants_to_talk,
                                          type=Qt.QueuedConnection)
    if listener is not None:
        t = Thread(name='ConnListener',
                   target=listen,
                   args=(listener, main.msg_from_anotherinstance))
        t.daemon = True
        t.start()
    QTimer.singleShot(0, acc.flush)
    if opts.raise_window:
        main.raise_()
    if opts.full_screen:
        main.set_full_screen(True)

    app.exec_()
    if listener is not None:
        listener.close()
Example #44
0
    def _load_index(self):
        'Load the index, automatically removing incorrectly sized thumbnails and pruning to fit max_size'
        try:
            os.makedirs(self.location)
        except OSError as err:
            if err.errno != errno.EEXIST:
                self.log('Failed to make thumbnail cache dir:',
                         as_unicode(err))
        self.total_size = 0
        self.items = OrderedDict()
        order = self._read_order()

        def listdir(*args):
            try:
                return os.listdir(os.path.join(*args))
            except EnvironmentError:
                return ()  # not a directory or no permission or whatever

        entries = ('/'.join((parent, subdir, entry))
                   for parent in listdir(self.location)
                   for subdir in listdir(self.location, parent)
                   for entry in listdir(self.location, parent, subdir))

        invalidate = set()
        try:
            with open(os.path.join(self.location, 'invalidate'), 'rb') as f:
                raw = f.read()
        except EnvironmentError as err:
            if getattr(err, 'errno', None) != errno.ENOENT:
                self.log('Failed to read thumbnail invalidate data:',
                         as_unicode(err))
        else:
            try:
                os.remove(os.path.join(self.location, 'invalidate'))
            except EnvironmentError as err:
                self.log('Failed to remove thumbnail invalidate data:',
                         as_unicode(err))
            else:

                def record(line):
                    try:
                        uuid, book_id = line.partition(' ')[0::2]
                        book_id = int(book_id)
                        return (uuid, book_id)
                    except Exception:
                        return None

                invalidate = {record(x) for x in raw.splitlines()}
        items = []
        try:
            for entry in entries:
                try:
                    uuid, name = entry.split('/')[0::2]
                    book_id, timestamp, size, thumbnail_size = name.split('-')
                    book_id, timestamp, size = int(book_id), float(
                        timestamp), int(size)
                    thumbnail_size = tuple(
                        map(int,
                            thumbnail_size.partition('x')[0::2]))
                except (ValueError, TypeError, IndexError, KeyError,
                        AttributeError):
                    continue
                key = (uuid, book_id)
                path = os.path.join(self.location, entry)
                if self.thumbnail_size == thumbnail_size and key not in invalidate:
                    items.append(
                        (key, Entry(path, size, timestamp, thumbnail_size)))
                    self.total_size += size
                else:
                    self._do_delete(path)
        except EnvironmentError as err:
            self.log('Failed to read thumbnail cache dir:', as_unicode(err))

        self.items = OrderedDict(
            sorted(items, key=lambda x: order.get(hash(x[0]), 0)))
        self._apply_size()
    def identify(self,
                 log,
                 result_queue,
                 abort,
                 title=None,
                 authors=None,
                 identifiers={},
                 timeout=30):
        '''
        .. note::
            this method will retry without identifiers automatically if no
            match is found with identifiers.
        '''
        matches = []
        # Unlike the other metadata sources, if we have a shelfari id then we
        # do not need to fire a "search" at Shelfari.com. Instead we will be
        # able to go straight to the URL for that book.
        shelfari_id = identifiers.get('shelfari', None)
        isbn = check_isbn(identifiers.get('isbn', None))
        br = self.browser
        if shelfari_id:
            matches.append('%s/books/%s' % (Shelfari.BASE_URL, shelfari_id))
        else:
            query = self._create_query(log,
                                       title=title,
                                       authors=authors,
                                       identifiers=identifiers)
            if query is None:
                log.error('Insufficient metadata to construct query')
                return
            try:
                log.info('Querying: %s' % query)
                response = br.open_novisit(query, timeout=timeout)
                if isbn:
                    # Check whether we got redirected to a book page for ISBN searches.
                    # If we did, will use the url.
                    # If we didn't then treat it as no matches on Shelfari
                    location = response.geturl()
                    if '/search/' not in location:
                        log.info('ISBN match location: %r' % location)
                        matches.append(location)
            except Exception as e:
                err = 'Failed to make identify query: %r' % query
                log.exception(err)
                return as_unicode(e)

            # For ISBN based searches we have already done everything we need to
            # So anything from this point below is for title/author based searches.
            if not isbn:
                try:
                    raw = response.read().strip()
                    #open('E:\\t.html', 'wb').write(raw)
                    raw = raw.decode('utf-8', errors='replace')
                    if not raw:
                        log.error('Failed to get raw result for query: %r' %
                                  query)
                        return
                    root = fromstring(clean_ascii_chars(raw))
                except:
                    msg = 'Failed to parse shelfari page for query: %r' % query
                    log.exception(msg)
                    return msg
                # Now grab the first value from the search results, provided the
                # title and authors appear to be for the same book
                self._parse_search_results(log, title, authors, root, matches,
                                           timeout)

        if abort.is_set():
            return

        if not matches:
            # If there's no matches, normally we would try to query with less info, but shelfari's search is already fuzzy
            log.error('No matches found with query: %r' % query)
            return

        # Setup worker threads to look more thoroughly at matching books to extract information
        workers = [
            Worker(url, result_queue, br, log, i, self)
            for i, url in enumerate(matches)
        ]

        # Start the workers and stagger them so we don't hammer shelfari :)
        for w in workers:
            w.start()
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None
Example #46
0
    def parse_comments(self, root, raw):
        from urllib import unquote
        ans = ''
        ns = tuple(self.selector('#bookDescription_feature_div noscript'))
        if ns:
            ns = ns[0]
            if len(ns) == 0 and ns.text:
                import html5lib
                # html5lib parsed noscript as CDATA
                ns = html5lib.parseFragment('<div>%s</div>' % (ns.text), treebuilder='lxml', namespaceHTMLElements=False)[0]
            else:
                ns.tag = 'div'
            ans = self._render_comments(ns)
        else:
            desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]')
            if desc:
                ans = self._render_comments(desc[0])

        desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
        if desc:
            ans += self._render_comments(desc[0])
        else:
            # Idiot chickens from amazon strike again. This data is now stored
            # in a JS variable inside a script tag URL encoded.
            m = re.search(b'var\s+iframeContent\s*=\s*"([^"]+)"', raw)
            if m is not None:
                try:
                    text = unquote(m.group(1)).decode('utf-8')
                    nr = html5lib.parse(text, treebuilder='lxml', namespaceHTMLElements=False)
                    desc = nr.xpath('//div[@id="productDescription"]/*[@class="content"]')
                    if desc:
                        ans += self._render_comments(desc[0])
                except Exception as e:
                    self.log.warn('Parsing of obfuscated product description failed with error: %s' % as_unicode(e))

        return ans
Example #47
0
def mobi_exploder(path, tdir, question=lambda x: True):
    from calibre.ebooks.mobi.tweak import explode, BadFormat
    try:
        return explode(path, tdir, question=question)
    except BadFormat as e:
        raise Error(as_unicode(e))
Example #48
0
    def identify(
            self,
            log,
            result_queue,
            abort,
            title=None,
            authors=None,  # {{{
            identifiers={},
            timeout=30):
        import json

        br = self.browser
        br.addheaders = [
            ('Referer', 'https://www.edelweiss.plus/'),
            ('X-Requested-With', 'XMLHttpRequest'),
            ('Cache-Control', 'no-cache'),
            ('Pragma', 'no-cache'),
        ]
        if 'edelweiss' in identifiers:
            items = [identifiers['edelweiss']]
        else:
            log.error(
                'Currently Edelweiss returns random books for search queries')
            return
            query = self.create_query(log,
                                      title=title,
                                      authors=authors,
                                      identifiers=identifiers)
            if not query:
                log.error('Insufficient metadata to construct query')
                return
            log('Using query URL:', query)
            try:
                raw = br.open(query, timeout=timeout).read().decode('utf-8')
            except Exception as e:
                log.exception('Failed to make identify query: %r' % query)
                return as_unicode(e)
            items = re.search(r'window[.]items\s*=\s*(.+?);', raw)
            if items is None:
                log.error('Failed to get list of matching items')
                log.debug('Response text:')
                log.debug(raw)
                return
            items = json.loads(items.group(1))

        if (not items and identifiers and title and authors
                and not abort.is_set()):
            return self.identify(log,
                                 result_queue,
                                 abort,
                                 title=title,
                                 authors=authors,
                                 timeout=timeout)

        if not items:
            return

        workers = []
        items = items[:5]
        for i, item in enumerate(get_basic_data(self.browser, log, *items)):
            sku = item['sku']
            for isbn in item['isbns']:
                self.cache_isbn_to_identifier(isbn, sku)
            if item['cover']:
                self.cache_identifier_to_cover_url(sku, item['cover'])
            fmt = item['format'].lower()
            if 'audio' in fmt or 'mp3' in fmt:
                continue  # Audio-book, ignore
            workers.append(
                Worker(item, i, result_queue, br.clone_browser(), timeout, log,
                       self))

        if not workers:
            return

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break
Example #49
0
    def identify(self,
                 log,
                 result_queue,
                 abort,
                 title=None,
                 authors=None,
                 identifiers={},
                 timeout=30,
                 nested=False):
        matches = []

        ipython(locals())

        CBDB_id = identifiers.get('cbdb', None)
        isbn = check_isbn(identifiers.get('isbn', None))
        br = self.browser

        if CBDB_id:
            matches.append(BASE_BOOK_URL % (BASE_URL, CBDB_id))
        else:
            query = self.create_query(log,
                                      title=title,
                                      authors=authors,
                                      identifiers=identifiers)
            if query is None:
                log.error('Insufficient metadata to construct query')
                return
            try:
                log.info('Querying: %s' % query)
                response = br.open_novisit(query, timeout=timeout)
                if isbn:
                    # Check whether we got redirected to a book page for ISBN searches.
                    # If we did, will use the url.
                    # If we didn't then treat it as no matches on CBDB
                    location = response.geturl()
                    if '/kniha-' in location:
                        log.info('ISBN match location: %r' % location)
                        matches.append(location)
            except IOError as e:
                err = 'Connection problem. Check your Internet connection'
                log.warning(err)
                return as_unicode(e)

            except Exception as e:
                err = 'Failed to make identify query: %r' % query
                # testing w/o inet
                log.exception(err)
                return as_unicode(e)

            # For ISBN based searches we have already done everything we need to
            # So anything from this point below is for title/author based searches.
            # CBDB doesn't redirect anymore when there's just one match
            if not isbn or (isbn and matches.__len__() == 0):
                try:
                    raw = response.read().strip()
                    # open('E:\\t.html', 'wb').write(raw)
                    # raw = open('S:\\t.html', 'rb').read()
                    raw = raw.decode('utf-8', errors='replace')
                    if not raw:
                        log.error('Failed to get raw result for query: %r' %
                                  query)
                        return

                    cln = clean_ascii_chars(raw)
                    idxs = cln.find('<!DOCTYPE')
                    if (idxs == -1):
                        log.error('Failed to find HTML document')
                        return

                    vld = cln[idxs:]
                    # log.info(vld)

                    idxs = vld.find("<head>")
                    if (idxs == -1):
                        log.error('Failed to find HEAD element')
                        return

                    # <!DOCTYPE .. <head>
                    hdr = vld[:idxs]

                    idxs = vld.find('<h2>Nalezeno')
                    if (idxs == -1):
                        log.error('Incorrect document structure 1')
                        return

                    idxe = vld.find('</h2>', idxs)
                    if (idxe == -1):
                        log.error('Incorrect document structure 2')
                        return

                    arr = vld[idxs:idxe].split(':')
                    if (arr.__len__() != 2):
                        log.error('Incorrect document structure 3')
                        return

                    cnt = int(arr[1])
                    # a publication found
                    if (cnt != 0):
                        hdr += '<HEAD/>' + '<BODY>' + \
                            '<H3>' + str(cnt) + '</H3>'

                        idxs = vld.find('<table', idxe)
                        if (idxs == -1):
                            log.error('Incorrect document structure 11')
                            return

                        idxe = vld.find('</table>', idxs)
                        if (idxe == -1):
                            log.error('Incorrect document structure 12')
                            return

                        hdr += vld[idxs:(idxe + 8)] + '</BODY>' + '</HTML>'

                        # rebuild HTML to contain just relevant data
                        # first line ~ result count
                        # table ~ results
                        vld = hdr
                    else:
                        # nothing found, so send an empty HTML
                        vld = '<HTML/>'

                    # log.info('vld')
                    # log.info(vld)
                    root = fromstring(vld)

                except:
                    msg = 'Failed to parse CBDB page for query: %r' % query
                    log.exception(msg)
                    return msg

                # Now grab values from the search results, provided the
                # title and authors appear to be for the same book
                # isnb of course will only have one result
                if isbn:
                    self._parse_isbn_search_results(log, root, matches)
                else:
                    self._parse_search_results(log, title, authors, root,
                                               matches, timeout)

        if abort.is_set():
            return

        if (matches.__len__() == 0):
            if nested:
                return

            log.info('No matches found, trying to strip accents')

            if (not self.identify(log,
                                  result_queue,
                                  abort,
                                  title=self.strip_accents(title),
                                  authors=self.strip_accents(authors),
                                  timeout=30,
                                  nested=True)):
                log.info('No matches found, trying to strip numbers')

                if (not self.identify(log,
                                      result_queue,
                                      abort,
                                      title=self.strip_accents(
                                          title.rstrip(string.digits)),
                                      authors=self.strip_accents(authors),
                                      timeout=30,
                                      nested=True)):
                    log.error('No matches found with query: %r' % query)

            return

        # log.info('Lets process matches ...')
        from calibre_plugins.CBDB.worker import Worker
        workers = [
            Worker(url, result_queue, br, log, i, self)
            for i, url in enumerate(matches)
        ]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None
Example #50
0
def get_usb_info(usbdev, debug=False):  # {{{
    '''
    The USB info (manufacturer/product names and serial number) Requires communication with the hub the device is connected to.

    :param usbdev: A usb device as returned by :function:`scan_usb_devices`
    '''
    ans = {}
    hub_map = {
        devinfo.DevInst: path
        for devinfo, path in DeviceSet(
            guid=GUID_DEVINTERFACE_USB_HUB).interfaces()
    }
    for parent in iterancestors(usbdev.devinst):
        parent_path = hub_map.get(parent)
        if parent_path is not None:
            break
    else:
        if debug:
            prints(
                'Cannot get USB info as parent of device is not a HUB or device has no parent (was probably disconnected)'
            )
        return ans
    for devlist, devinfo in DeviceSet(
            guid=GUID_DEVINTERFACE_USB_DEVICE).devices():
        if devinfo.DevInst == usbdev.devinst:
            device_port = get_device_registry_property(devlist, byref(devinfo),
                                                       SPDRP_ADDRESS)[1]
            break
    else:
        return ans
    if not device_port:
        if debug:
            prints(
                'Cannot get usb info as the SPDRP_ADDRESS property is not present in the registry (can happen with broken USB hub drivers)'
            )
        return ans
    handle = CreateFile(parent_path, GENERIC_READ | GENERIC_WRITE,
                        FILE_SHARE_READ | FILE_SHARE_WRITE, None,
                        OPEN_EXISTING, 0, None)
    try:
        buf, dd = get_device_descriptor(handle, device_port)
        if dd.idVendor == usbdev.vendor_id and dd.idProduct == usbdev.product_id and dd.bcdDevice == usbdev.bcd:
            # Dont need to read language since we only care about english names
            # buf, langs = get_device_languages(handle, device_port)
            # print(111, langs)
            for index, name in ((dd.iManufacturer, 'manufacturer'),
                                (dd.iProduct, 'product'), (dd.iSerialNumber,
                                                           'serial_number')):
                if index:
                    try:
                        buf, ans[name] = get_device_string(handle,
                                                           device_port,
                                                           index,
                                                           buf=buf)
                    except OSError as err:
                        if debug:
                            # Note that I have observed that this fails
                            # randomly after some time of my Kindle being
                            # connected. Disconnecting and reconnecting causes
                            # it to start working again.
                            prints(
                                'Failed to read %s from device, with error: [%d] %s'
                                % (name, err.winerror, as_unicode(err)))
    finally:
        CloseHandle(handle)
    return ans
Example #51
0
 def format_errorstack(self, errs):
     return '\n'.join('%d:%s' % (code, as_unicode(msg))
                      for code, msg in errs)
Example #52
0
    def open(self, connected_device, library_uuid):
        self.dev = self._filesystem_cache = None

        try:
            self.dev = self.create_device(connected_device)
        except Exception as e:
            self.blacklisted_devices.add(connected_device)
            raise OpenFailed('Failed to open %s: Error: %s' %
                             (connected_device, as_unicode(e)))

        try:
            storage = sorted(self.dev.storage_info,
                             key=operator.itemgetter('id'))
        except self.libmtp.MTPError as e:
            if "The device has no storage information." in unicode_type(e):
                # This happens on newer Android devices while waiting for
                # the user to allow access. Apparently what happens is
                # that when the user clicks allow, the device disconnects
                # and re-connects as a new device.
                name = self.dev.friendly_name or ''
                if not name:
                    if connected_device.manufacturer:
                        name = connected_device.manufacturer
                    if connected_device.product:
                        name = name and (name + ' ')
                        name += connected_device.product
                    name = name or _('Unnamed device')
                raise OpenActionNeeded(
                    name,
                    _('The device {0} is not allowing connections.'
                      ' Unlock the screen on the {0}, tap "Allow" on any connection popup message you see,'
                      ' then either wait a minute or restart calibre. You might'
                      ' also have to change the mode of the USB connection on the {0}'
                      ' to "Media Transfer mode (MTP)" or similar.').format(
                          name), (name, self.dev.serial_number))
            raise

        storage = [x for x in storage if x.get('rw', False)]
        if not storage:
            self.blacklisted_devices.add(connected_device)
            raise OpenFailed('No storage found for device %s' %
                             (connected_device, ))
        snum = self.dev.serial_number
        if snum in self.prefs.get('blacklist', []):
            self.blacklisted_devices.add(connected_device)
            self.dev = None
            raise BlacklistedDevice(
                'The %s device has been blacklisted by the user' %
                (connected_device, ))
        self._main_id = storage[0]['id']
        self._carda_id = self._cardb_id = None
        if len(storage) > 1:
            self._carda_id = storage[1]['id']
        if len(storage) > 2:
            self._cardb_id = storage[2]['id']
        self.current_friendly_name = self.dev.friendly_name
        if not self.current_friendly_name:
            self.current_friendly_name = self.dev.model_name or _(
                'Unknown MTP device')
        self.current_serial_num = snum
        self.currently_connected_dev = connected_device
Example #53
0
    def identify(self,
                 log,
                 result_queue,
                 abort,
                 title=None,
                 authors=None,
                 identifiers={},
                 timeout=90):  # {{{
        from calibre.ebooks.chardet import xml_to_unicode
        from HTMLParser import HTMLParser
        from lxml import etree, html

        if not self.is_configured():
            return
        query = self.create_query(log,
                                  title=title,
                                  authors=authors,
                                  identifiers=identifiers)
        if not query:
            err = u'Insufficient metadata to construct query'
            log.error(err)
            return err

        try:
            raw = self.browser.open_novisit(query).read()
        except Exception as e:
            log.exception(u'Failed to make identify query: %r' % query)
            return as_unicode(e)

        try:
            doc = html.fromstring(xml_to_unicode(raw, verbose=True)[0])
            entries_block = doc.xpath(u'//div[@class="bSearchResult"]')

            if entries_block:
                entries = doc.xpath(
                    u'//div[contains(@itemprop, "itemListElement")]')
                # for entry in entries:
                #   log.debug('entries %s' % entree.tostring(entry))
                metadata = self.get_metadata(log, entries, title, authors,
                                             identifiers)
                self.get_all_details(log, metadata, abort, result_queue,
                                     identifiers, timeout)
            else:
                # Redirect page: trying to extract ozon_id from javascript data
                h = HTMLParser()
                entry_string = (h.unescape(
                    unicode(etree.tostring(doc, pretty_print=True))))
                id_title_pat = re.compile(
                    u'products":\[{"id":(\d{7}),"name":"([а-яА-Я :\-0-9]+)')
                # result containing ozon_id and entry_title
                entry_info = re.search(id_title_pat, entry_string)
                ozon_id = entry_info.group(1) if entry_info else None
                entry_title = entry_info.group(2) if entry_info else None

                if ozon_id:
                    metadata = self.to_metadata_for_single_entry(
                        log, ozon_id, entry_title, authors)
                    identifiers['ozon'] = ozon_id
                    self.get_all_details(log, [metadata],
                                         abort,
                                         result_queue,
                                         identifiers,
                                         timeout,
                                         cachedPagesDict={})
                else:
                    log.error('No SearchResults in Ozon.ru response found')

        except Exception as e:
            log.exception('Failed to parse identify results')
            return as_unicode(e)
    def ws_data_received(self, data, opcode, frame_starting, frame_finished, is_final_frame_of_message):
        if opcode in CONTROL_CODES:
            return self.ws_control_frame(opcode, data)

        message_starting = self.current_recv_opcode is None
        if message_starting:
            if opcode == CONTINUATION:
                self.log.error('Client sent continuation frame with no message to continue')
                self.websocket_close(PROTOCOL_ERROR, 'Continuation frame without any message to continue')
                return
            self.current_recv_opcode = opcode
        elif frame_starting and opcode != CONTINUATION:
            self.log.error('Client sent continuation frame with non-zero opcode')
            self.websocket_close(PROTOCOL_ERROR, 'Continuation frame with non-zero opcode')
            return
        message_finished = frame_finished and is_final_frame_of_message
        if self.current_recv_opcode == TEXT:
            if message_starting:
                self.frag_decoder.reset()
            empty_data = len(data) == 0
            try:
                data = self.frag_decoder(data)
            except ValueError:
                self.frag_decoder.reset()
                self.log.error('Client sent undecodeable UTF-8')
                return self.websocket_close(INCONSISTENT_DATA, 'Not valid UTF-8')
            if message_finished:
                if (not data and not empty_data) or self.frag_decoder.state:
                    self.frag_decoder.reset()
                    self.log.error('Client sent undecodeable UTF-8')
                    return self.websocket_close(INCONSISTENT_DATA, 'Not valid UTF-8')
        if message_finished:
            self.current_recv_opcode = None
            self.frag_decoder.reset()
        try:
            self.handle_websocket_data(data, message_starting, message_finished)
        except Exception as err:
            self.log.exception('Error in WebSockets data handler:')
            self.websocket_close(UNEXPECTED_ERROR, 'Unexpected error in handler: %r' % as_unicode(err))
Example #55
0
    def identify(self, log, result_queue, abort, title=None, authors=None,  # {{{
            identifiers={}, timeout=30):
        from urlparse import parse_qs

        book_url = self._get_book_url(identifiers.get('edelweiss', None))
        br = self.browser
        if book_url:
            entries = [(book_url, identifiers['edelweiss'])]
        else:
            entries = []
            query = self.create_query(log, title=title, authors=authors,
                    identifiers=identifiers)
            if not query:
                log.error('Insufficient metadata to construct query')
                return
            log('Using query URL:', query)
            try:
                raw = br.open_novisit(query, timeout=timeout).read()
            except Exception as e:
                log.exception('Failed to make identify query: %r'%query)
                return as_unicode(e)

            try:
                root = parse_html(raw)
            except Exception as e:
                log.exception('Failed to parse identify results')
                return as_unicode(e)
            from css_selectors import Select
            select = Select(root)
            has_isbn = check_isbn(identifiers.get('isbn', None)) is not None
            if not has_isbn:
                author_tokens = set(x.lower() for x in self.get_author_tokens(authors, only_first_author=True))
            for entry in select('div.listRow div.listRowMain'):
                a = entry.xpath('descendant::a[contains(@href, "sku=") and contains(@href, "productDetailPage.aspx")]')
                if not a:
                    continue
                href = a[0].get('href')
                prefix, qs = href.partition('?')[0::2]
                sku = parse_qs(qs).get('sku', None)
                if sku and sku[0]:
                    sku = sku[0]
                    div = tuple(select('div.sku.attGroup'))
                    if div:
                        text = astext(div[0])
                        isbns = [check_isbn(x.strip()) for x in text.split(',')]
                        for isbn in isbns:
                            if isbn:
                                self.cache_isbn_to_identifier(isbn, sku)
                    for img in entry.xpath('descendant::img[contains(@src, "/jacket_covers/thumbnail/")]'):
                        self.cache_identifier_to_cover_url(sku, img.get('src').replace('/thumbnail/', '/flyout/'))

                    div = tuple(select('div.format.attGroup'))
                    text = astext(div[0]).lower()
                    if 'audio' in text or 'mp3' in text:  # Audio-book, ignore
                        continue
                    if not has_isbn:
                        # edelweiss returns matches based only on title, so we
                        # filter by author manually
                        div = tuple(select('div.contributor.attGroup'))
                        try:
                            entry_authors = set(self.get_author_tokens([x.strip() for x in astext(div[0]).lower().split(',')]))
                        except IndexError:
                            entry_authors = set()
                        if not entry_authors.issuperset(author_tokens):
                            continue
                    entries.append((self._get_book_url(sku), sku))

        if (not entries and identifiers and title and authors and
                not abort.is_set()):
            return self.identify(log, result_queue, abort, title=title,
                    authors=authors, timeout=timeout)

        if not entries:
            return

        workers = [Worker(skul, url, i, result_queue, br.clone_browser(), timeout, log, self)
                   for i, (url, skul) in enumerate(entries[:5])]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break
Example #56
0
 def vacuum(self):
     try:
         self.db.vacuum()
     except Exception as e:
         import traceback
         self.error = (as_unicode(e), traceback.format_exc())
Example #57
0
def subset_all_fonts(container, font_stats, report):
    remove = set()
    total_old = total_new = 0
    changed = False
    for name, mt in iter_subsettable_fonts(container):
        chars = font_stats.get(name, set())
        with container.open(name, 'rb') as f:
            f.seek(0, os.SEEK_END)
            total_old += f.tell()
        if not chars:
            remove.add(name)
            report(_('Removed unused font: %s')%name)
            continue
        with container.open(name, 'r+b') as f:
            raw = f.read()
            try:
                font_name = get_font_names(raw)[-1]
            except Exception as e:
                container.log.warning(
                    'Corrupted font: %s, ignoring.  Error: %s'%(
                        name, as_unicode(e)))
                continue
            warnings = []
            container.log('Subsetting font: %s'%(font_name or name))
            try:
                nraw, old_sizes, new_sizes = subset(raw, chars,
                                                warnings=warnings)
            except UnsupportedFont as e:
                container.log.warning(
                    'Unsupported font: %s, ignoring.  Error: %s'%(
                        name, as_unicode(e)))
                continue

            for w in warnings:
                container.log.warn(w)
            olen = sum(itervalues(old_sizes))
            nlen = sum(itervalues(new_sizes))
            total_new += len(nraw)
            if nlen == olen:
                report(_('The font %s was already subset')%font_name)
            else:
                report(_('Decreased the font {0} to {1} of its original size').format(
                    font_name, ('%.1f%%' % (nlen/olen * 100))))
                changed = True
            f.seek(0), f.truncate(), f.write(nraw)

    for name in remove:
        container.remove_item(name)
        changed = True

    if remove:
        for name, mt in iteritems(container.mime_map):
            if mt in OEB_STYLES:
                sheet = container.parsed(name)
                if remove_font_face_rules(container, sheet, remove, name):
                    container.dirty(name)
            elif mt in OEB_DOCS:
                for style in XPath('//h:style')(container.parsed(name)):
                    if style.get('type', 'text/css') == 'text/css' and style.text:
                        sheet = container.parse_css(style.text, name)
                        if remove_font_face_rules(container, sheet, remove, name):
                            style.text = css_text(sheet)
                            container.dirty(name)
    if total_old > 0:
        report(_('Reduced total font size to %.1f%% of original')%(
            total_new/total_old*100))
    else:
        report(_('No embedded fonts found'))
    return changed
    def identify(self,
                 log,
                 result_queue,
                 abort,
                 title=None,
                 authors=None,
                 identifiers={},
                 timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        matches = []
        # Unlike the other metadata sources, if we have a kyobobook id then we
        # do not need to fire a "search" at kyobobook.com. Instead we will be
        # able to go straight to the URL for that book.
        kyobobook_id = identifiers.get('kyobobook', None)
        isbn = check_isbn(identifiers.get('isbn', None))
        br = self.browser
        if kyobobook_id:
            matches.append('%s/product/detailViewKor.laf?barcode=%s' %
                           (Kyobobook.BASE_URL, kyobobook_id))
        else:
            query = self.create_query(log,
                                      title=title,
                                      authors=authors,
                                      identifiers=identifiers)
            if query is None:
                log.error('Insufficient metadata to construct query')
                return
            try:
                log.info('Querying: %s' % query)
                response = br.open_novisit(query, timeout=timeout)

                try:
                    raw = response.read().strip()
                    # open('E:\\t11.html', 'wb').write(raw) # XXXX

                    # by sseeookk
                    # euc-kr at kyobobook
                    # raw = raw.decode('utf-8', errors='replace')
                    # raw = raw.decode('euc-kr', errors='replace')
                    raw = raw.decode('euc-kr', errors='ignore')
                    if not raw:
                        log.error('Failed to get raw result for query: %r' %
                                  query)
                        return
                    root = fromstring(clean_ascii_chars(raw))
                except:
                    msg = 'Failed to parse kyobobook page for query: %r' % query
                    log.exception(msg)
                    return msg

                if isbn:
                    self._parse_search_isbn_results(log, isbn, root, matches,
                                                    timeout)

                # For ISBN based searches we have already done everything we need to
                # So anything from this point below is for title/author based searches.
                if not isbn:
                    # Now grab the first value from the search results, provided the
                    # title and authors appear to be for the same book
                    self._parse_search_results(log, title, authors, root,
                                               matches, timeout)

            except Exception as e:
                err = 'Failed to make identify query: %r' % query
                log.exception(err)
                return as_unicode(e)

        if abort.is_set():
            return

        if not matches:
            if identifiers and title and authors:
                log.info(
                    'No matches found with identifiers, retrying using only'
                    ' title and authors')
                return self.identify(log,
                                     result_queue,
                                     abort,
                                     title=title,
                                     authors=authors,
                                     timeout=timeout)
            log.error('No matches found with query: %r' % query)
            return

        from calibre_plugins.kyobobook.worker import Worker
        workers = [
            Worker(url, result_queue, br, log, i, self)
            for i, url in enumerate(matches)
        ]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None
Example #59
0
    def find_page_breaks(self, item):
        if self.page_break_selectors is None:
            self.page_break_selectors = set()
            stylesheets = [
                x.data for x in self.oeb.manifest if x.media_type in OEB_STYLES
            ]
            for rule in rules(stylesheets):
                before = getattr(
                    rule.style.getPropertyCSSValue('page-break-before'),
                    'cssText', '').strip().lower()
                after = getattr(
                    rule.style.getPropertyCSSValue('page-break-after'),
                    'cssText', '').strip().lower()
                try:
                    if before and before not in {'avoid', 'auto', 'inherit'}:
                        self.page_break_selectors.add(
                            (rule.selectorText, True))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty('page-break-before')
                except:
                    pass
                try:
                    if after and after not in {'avoid', 'auto', 'inherit'}:
                        self.page_break_selectors.add(
                            (rule.selectorText, False))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty('page-break-after')
                except:
                    pass
        page_breaks = set()
        select = Select(item.data)
        if not self.page_break_selectors:
            return [], []
        body = item.data.xpath('//h:body', namespaces=NAMESPACES)
        if not body:
            return [], []
        descendants = frozenset(body[0].iterdescendants('*'))

        for selector, before in self.page_break_selectors:
            try:
                for elem in select(selector):
                    if elem in descendants and elem.tag.rpartition(
                            '}')[2].lower() not in {
                                'html', 'body', 'head', 'style', 'script',
                                'meta', 'link'
                            }:
                        elem.set('pb_before', '1' if before else '0')
                        page_breaks.add(elem)
            except SelectorError as err:
                self.log.warn(
                    'Ignoring page breaks specified with invalid CSS selector: %r (%s)'
                    % (selector, as_unicode(err)))

        for i, elem in enumerate(item.data.iter('*')):
            try:
                elem.set('pb_order', str(i))
            except TypeError:  # Cant set attributes on comment nodes etc.
                continue

        page_breaks = list(page_breaks)
        page_breaks.sort(key=lambda x: int(x.get('pb_order')))
        page_break_ids, page_breaks_ = [], []
        for i, x in enumerate(page_breaks):
            x.set('id', x.get('id', 'calibre_pb_%d' % i))
            id = x.get('id')
            try:
                xp = XPath('//*[@id="%s"]' % id)
            except:
                try:
                    xp = XPath("//*[@id='%s']" % id)
                except:
                    # The id has both a quote and an apostrophe or some other
                    # Just replace it since I doubt its going to work anywhere else
                    # either
                    id = 'calibre_pb_%d' % i
                    x.set('id', id)
                    xp = XPath('//*[@id=%r]' % id)
            page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
            page_break_ids.append(id)

        for elem in item.data.iter(etree.Element):
            elem.attrib.pop('pb_order', False)
            elem.attrib.pop('pb_before', False)

        return page_breaks_, page_break_ids
Example #60
0
    def identify(
            self,
            log,
            result_queue,
            abort,
            title=None,
            authors=None,  # {{{
            identifiers={},
            timeout=30):
        '''
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        from calibre.utils.cleantext import clean_ascii_chars
        from calibre.ebooks.chardet import xml_to_unicode
        from lxml.html import tostring
        import html5lib

        testing = getattr(self, 'running_a_test', False)

        query, domain = self.create_query(log,
                                          title=title,
                                          authors=authors,
                                          identifiers=identifiers)
        if query is None:
            log.error('Insufficient metadata to construct query')
            return
        br = self.browser
        if testing:
            print('Using user agent for amazon: %s' % self.user_agent)
        try:
            raw = br.open_novisit(query, timeout=timeout).read().strip()
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and \
                    e.getcode() == 404:
                log.error('Query malformed: %r' % query)
                return
            attr = getattr(e, 'args', [None])
            attr = attr if attr else [None]
            if isinstance(attr[0], socket.timeout):
                msg = _('Amazon timed out. Try again later.')
                log.error(msg)
            else:
                msg = 'Failed to make identify query: %r' % query
                log.exception(msg)
            return as_unicode(msg)

        raw = clean_ascii_chars(
            xml_to_unicode(raw,
                           strip_encoding_pats=True,
                           resolve_entities=True)[0])

        if testing:
            import tempfile
            with tempfile.NamedTemporaryFile(prefix='amazon_results_',
                                             suffix='.html',
                                             delete=False) as f:
                f.write(raw.encode('utf-8'))
            print('Downloaded html for results page saved in', f.name)

        matches = []
        found = '<title>404 - ' not in raw

        if found:
            try:
                root = html5lib.parse(raw,
                                      treebuilder='lxml',
                                      namespaceHTMLElements=False)
            except:
                msg = 'Failed to parse amazon page for query: %r' % query
                log.exception(msg)
                return msg

                errmsg = root.xpath('//*[@id="errorMessage"]')
                if errmsg:
                    msg = tostring(errmsg, method='text',
                                   encoding=unicode).strip()
                    log.error(msg)
                    # The error is almost always a not found error
                    found = False

        if found:
            matches = self.parse_results_page(root)

        if abort.is_set():
            return

        if not matches:
            if identifiers and title and authors:
                log('No matches found with identifiers, retrying using only'
                    ' title and authors. Query: %r' % query)
                return self.identify(log,
                                     result_queue,
                                     abort,
                                     title=title,
                                     authors=authors,
                                     timeout=timeout)
            log.error('No matches found with query: %r' % query)
            return

        workers = [
            Worker(url,
                   result_queue,
                   br,
                   log,
                   i,
                   domain,
                   self,
                   testing=testing) for i, url in enumerate(matches)
        ]

        for w in workers:
            w.start()
            # Don't send all requests at the same time
            time.sleep(0.1)

        while not abort.is_set():
            a_worker_is_alive = False
            for w in workers:
                w.join(0.2)
                if abort.is_set():
                    break
                if w.is_alive():
                    a_worker_is_alive = True
            if not a_worker_is_alive:
                break

        return None