Ejemplo n.º 1
0
    def run(self):
        job = ParallelJob('move_library',
                'Move library from %s to %s'%(self.from_, self.to),
                lambda x,y:x,
                args=[self.from_, self.to])
        server = Server(pool_size=1)
        server.add_job(job)

        while not job.is_finished:
            time.sleep(0.2)
            job.update(consume_notifications=False)
            while True:
                try:
                    title = job.notifications.get_nowait()[0]
                    self.count += 1
                    self.result_queue.put((float(self.count)/self.total, title))
                except Empty:
                    break

        job.update()
        server.close()
        if not job.result:
            self.failed = True
            self.details = job.details

        if os.path.exists(job.log_path):
            os.remove(job.log_path)
Ejemplo n.º 2
0
def set_metadata(stream, mi):
    if not podofo:
        raise Unavailable(podofo_err)
    with TemporaryFile('_podofo_read.pdf') as inputf, \
            TemporaryFile('_podofo_write.pdf') as outputf:
        server = Server(pool_size=1)
        with open(inputf, 'wb') as f:
            shutil.copyfileobj(stream, f)
        job = ParallelJob('write_pdf_metadata', 'Write pdf metadata',
            lambda x,y:x,  args=[inputf, outputf, mi.title, mi.authors,
                mi.book_producer, mi.tags])
        server.add_job(job)
        while not job.is_finished:
            time.sleep(0.1)
            job.update()

        job.update()
        server.close()
        if job.failed:
            prints(job.details)
        elif job.result:
            with open(outputf, 'rb') as f:
                f.seek(0, 2)
                if f.tell() > 100:
                    f.seek(0)
                    stream.seek(0)
                    stream.truncate()
                    shutil.copyfileobj(f, stream)
                    stream.flush()
    stream.seek(0)
Ejemplo n.º 3
0
def get_metadata(stream, cpath=None):
    if not podofo:
        raise Unavailable(podofo_err)
    pt = PersistentTemporaryFile('_podofo.pdf')
    pt.write(stream.read())
    pt.close()
    server = Server(pool_size=1)
    job = ParallelJob('read_pdf_metadata', 'Read pdf metadata',
        lambda x,y:x,  args=[pt.name, cpath])
    server.add_job(job)
    while not job.is_finished:
        time.sleep(0.1)
        job.update()

    job.update()
    server.close()
    if job.result is None:
        raise ValueError('Failed to read metadata: ' + job.details)
    title, authors, creator, tags, ok = job.result
    if not ok:
        print 'Failed to extract cover:'
        print job.details
    if title == '_':
        title = getattr(stream, 'name', _('Unknown'))
        title = os.path.splitext(title)[0]

    mi = MetaInformation(title, authors)
    if creator:
        mi.book_producer = creator
    if tags:
        mi.tags = tags
    if os.path.exists(pt.name): os.remove(pt.name)
    if ok:
        mi.cover = cpath
    return mi
Ejemplo n.º 4
0
def do_store_locations(books_to_scan, options, notification=lambda x,y:x):
    '''
    Master job, to launch child jobs to modify each ePub
    '''
    debug_print("start")
    server = Server()
    
    debug_print("options=%s" % (options))
    # Queue all the jobs
#     args = ['calibre_plugins.sonyutilities.jobs', 'do_sonyutilities_all',
    args = ['calibre_plugins.sonyutilities.jobs', 'do_store_bookmarks',
            (books_to_scan, options)]
#    debug_print("args=%s" % (args))
    debug_print("len(books_to_scan)=%d" % (len(books_to_scan)))
    job = ParallelJob('arbitrary', "Store locations", done=None, args=args)
    server.add_job(job)

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, 'Reading device database')

    # dequeue the job results as they arrive, saving the results
    total = 1
    count = 0
    stored_locations = dict()
    while True:
        job = server.changed_jobs_queue.get()
        # A job can 'change' when it is not finished, for example if it
        # produces a notification. Ignore these.
        job.update()
        if not job.is_finished:
            debug_print("Job not finished")
            continue
#        debug_print("Job finished")
        # A job really finished. Get the information.
        stored_locations = job.result
        import pydevd;pydevd.settrace()
#        book_id = job._book_id
#        stored_locations[book_id] = stored_location
        count += 1
        notification(float(count)/total, 'Storing locations')
        # Add this job's output to the current log
        #debug_print("Stored_location=", stored_locations)
        number_bookmarks = len(stored_locations) if stored_locations else 0
        debug_print("Stored_location count=%d" % number_bookmarks)
        debug_print(job.details)
        if count >= total:
            # All done!
            break

    server.close()
    debug_print("finished")
    # return the map as the job result
    return stored_locations, options
Ejemplo n.º 5
0
    def run(self):
        jobs, ids = set([]), set([])
        for t in self.tasks:
            for b in t:
                ids.add(b[0])
        progress = Progress(self.result_queue, self.tdir)
        server = Server() if self.spare_server is None else self.spare_server
        try:
            for i, task in enumerate(self.tasks):
                job = ParallelJob('read_metadata',
                    'Read metadata (%d of %d)'%(i, len(self.tasks)),
                    lambda x,y:x,  args=[task, self.tdir])
                jobs.add(job)
                server.add_job(job)

            while not self.canceled:
                time.sleep(0.2)
                running = False
                for job in jobs:
                    while True:
                        try:
                            id = job.notifications.get_nowait()[-1]
                            if id in ids:
                                progress(id)
                                ids.remove(id)
                        except Empty:
                            break
                    job.update(consume_notifications=False)
                    if not job.is_finished:
                        running = True

                if not running:
                    break
        finally:
            server.close()
        time.sleep(1)

        if self.canceled:
            return

        for id in ids:
            progress(id)

        for job in jobs:
            if job.failed:
                prints(job.details)
            if os.path.exists(job.log_path):
                try:
                    os.remove(job.log_path)
                except:
                    pass
Ejemplo n.º 6
0
    def __init__(self):
        QAbstractTableModel.__init__(self)
        SearchQueryParser.__init__(self, ['all'])

        self.wait_icon = (QIcon(I('jobs.png')))
        self.running_icon = (QIcon(I('exec.png')))
        self.error_icon = (QIcon(I('dialog_error.png')))
        self.done_icon = (QIcon(I('ok.png')))

        self.jobs = []
        self.add_job = Dispatcher(self._add_job)
        self.server = Server(limit=int(config['worker_limit'] / 2.0),
                             enforce_cpu_limit=config['enforce_cpu_limit'])
        self.threaded_server = ThreadedJobServer()
        self.changed_queue = Queue()

        self.timer = QTimer(self)
        self.timer.timeout.connect(self.update, type=Qt.QueuedConnection)
        self.timer.start(1000)
Ejemplo n.º 7
0
def set_metadata(stream, mi):
    if not podofo:
        raise Unavailable(podofo_err)
    with TemporaryFile('_podofo_read.pdf') as inputf, \
            TemporaryFile('_podofo_write.pdf') as outputf:
        server = Server(pool_size=1)
        with open(inputf, 'wb') as f:
            shutil.copyfileobj(stream, f)
        job = ParallelJob('write_pdf_metadata',
                          'Write pdf metadata',
                          lambda x, y: x,
                          args=[
                              inputf, outputf, mi.title, mi.authors,
                              mi.book_producer, mi.tags
                          ])
        server.add_job(job)
        while not job.is_finished:
            time.sleep(0.1)
            job.update()

        job.update()
        server.close()
        if job.failed:
            prints(job.details)
        elif job.result:
            with open(outputf, 'rb') as f:
                f.seek(0, 2)
                if f.tell() > 100:
                    f.seek(0)
                    stream.seek(0)
                    stream.truncate()
                    shutil.copyfileobj(f, stream)
                    stream.flush()
    stream.seek(0)
Ejemplo n.º 8
0
    def run(self):
        job = ParallelJob('move_library',
                          'Move library from %s to %s' % (self.from_, self.to),
                          lambda x, y: x,
                          args=[self.from_, self.to])
        server = Server(pool_size=1)
        server.add_job(job)

        while not job.is_finished:
            time.sleep(0.2)
            job.update(consume_notifications=False)
            while True:
                try:
                    title = job.notifications.get_nowait()[0]
                    self.count += 1
                    self.result_queue.put(
                        (float(self.count) / self.total, title))
                except Empty:
                    break

        job.update()
        server.close()
        if not job.result:
            self.failed = True
            self.details = job.details

        if os.path.exists(job.log_path):
            os.remove(job.log_path)
Ejemplo n.º 9
0
    def run(self):
        jobs, ids = set([]), set([])
        for t in self.tasks:
            for b in t:
                ids.add(b[0])
        progress = Progress(self.result_queue, self.tdir)
        server = Server() if self.spare_server is None else self.spare_server
        try:
            for i, task in enumerate(self.tasks):
                job = ParallelJob('read_metadata',
                                  'Read metadata (%d of %d)' %
                                  (i, len(self.tasks)),
                                  lambda x, y: x,
                                  args=[task, self.tdir])
                jobs.add(job)
                server.add_job(job)

            while not self.canceled:
                time.sleep(0.2)
                running = False
                for job in jobs:
                    while True:
                        try:
                            id = job.notifications.get_nowait()[-1]
                            if id in ids:
                                progress(id)
                                ids.remove(id)
                        except Empty:
                            break
                    job.update(consume_notifications=False)
                    if not job.is_finished:
                        running = True

                if not running:
                    break
        finally:
            server.close()
        time.sleep(1)

        if self.canceled:
            return

        for id in ids:
            progress(id)

        for job in jobs:
            if job.failed:
                prints(job.details)
            if os.path.exists(job.log_path):
                try:
                    os.remove(job.log_path)
                except:
                    pass
Ejemplo n.º 10
0
    def __init__(self):
        QAbstractTableModel.__init__(self)
        SearchQueryParser.__init__(self, ["all"])

        self.wait_icon = QVariant(QIcon(I("jobs.png")))
        self.running_icon = QVariant(QIcon(I("exec.png")))
        self.error_icon = QVariant(QIcon(I("dialog_error.png")))
        self.done_icon = QVariant(QIcon(I("ok.png")))

        self.jobs = []
        self.add_job = Dispatcher(self._add_job)
        self.server = Server(limit=int(config["worker_limit"] / 2.0), enforce_cpu_limit=config["enforce_cpu_limit"])
        self.threaded_server = ThreadedJobServer()
        self.changed_queue = Queue()

        self.timer = QTimer(self)
        self.timer.timeout.connect(self.update, type=Qt.QueuedConnection)
        self.timer.start(1000)
Ejemplo n.º 11
0
def process_pages(pages, opts, update, tdir):
    '''
    Render all identified comic pages.
    '''
    progress = Progress(len(pages), update)
    server = Server()
    jobs = []
    tasks = [(p, os.path.join(tdir, os.path.basename(p))) for p in pages]
    tasks = server.split(pages)
    for task in tasks:
        jobs.append(
            ParallelJob('render_pages', '', progress, args=[task, tdir, opts]))
        server.add_job(jobs[-1])
    while True:
        time.sleep(1)
        running = False
        for job in jobs:
            while True:
                try:
                    x = job.notifications.get_nowait()
                    progress(*x)
                except Empty:
                    break
            job.update()
            if not job.is_finished:
                running = True
        if not running:
            break
    server.close()
    ans, failures = [], []

    for job in jobs:
        if job.failed or job.result is None:
            raise Exception(
                _('Failed to process comic: \n\n%s') % job.log_file.read())
        pages, failures_ = job.result
        ans += pages
        failures += failures_
    return ans, failures
Ejemplo n.º 12
0
def process_pages(pages, opts, update, tdir):
    '''
    Render all identified comic pages.
    '''
    progress = Progress(len(pages), update)
    server = Server()
    jobs = []
    tasks = [(p, os.path.join(tdir, os.path.basename(p))) for p in pages]
    tasks = server.split(pages)
    for task in tasks:
        jobs.append(ParallelJob('render_pages', '', progress,
                                args=[task, tdir, opts]))
        server.add_job(jobs[-1])
    while True:
        time.sleep(1)
        running = False
        for job in jobs:
            while True:
                try:
                    x = job.notifications.get_nowait()
                    progress(*x)
                except Empty:
                    break
            job.update()
            if not job.is_finished:
                running = True
        if not running:
            break
    server.close()
    ans, failures = [], []

    for job in jobs:
        if job.failed or job.result is None:
            raise Exception(_('Failed to process comic: \n\n%s')%
                    job.log_file.read())
        pages, failures_ = job.result
        ans += pages
        failures += failures_
    return ans, failures
Ejemplo n.º 13
0
def get_metadata(stream, cpath=None):
    if not podofo:
        raise Unavailable(podofo_err)
    pt = PersistentTemporaryFile('_podofo.pdf')
    pt.write(stream.read())
    pt.close()
    server = Server(pool_size=1)
    job = ParallelJob('read_pdf_metadata',
                      'Read pdf metadata',
                      lambda x, y: x,
                      args=[pt.name, cpath])
    server.add_job(job)
    while not job.is_finished:
        time.sleep(0.1)
        job.update()

    job.update()
    server.close()
    if job.result is None:
        raise ValueError('Failed to read metadata: ' + job.details)
    title, authors, creator, tags, ok = job.result
    if not ok:
        print 'Failed to extract cover:'
        print job.details
    if title == '_':
        title = getattr(stream, 'name', _('Unknown'))
        title = os.path.splitext(title)[0]

    mi = MetaInformation(title, authors)
    if creator:
        mi.book_producer = creator
    if tags:
        mi.tags = tags
    if os.path.exists(pt.name): os.remove(pt.name)
    if ok:
        mi.cover = cpath
    return mi
Ejemplo n.º 14
0
def do_download_worker(book_list,
                       options,
                       cpus,
                       merge=False,
                       notification=lambda x,y:x):
    '''
    Master job, to launch child jobs to extract ISBN for a set of books
    This is run as a worker job in the background to keep the UI more
    responsive and get around the memory leak issues as it will launch
    a child job for each book as a worker process
    '''
    server = Server(pool_size=cpus)

    logger.info(options['version'])
    total = 0
    alreadybad = []
    # Queue all the jobs
    logger.info("Adding jobs for URLs:")
    for book in book_list:
        logger.info("%s"%book['url'])
        if book['good']:
            total += 1
            args = ['calibre_plugins.fanficfare_plugin.jobs',
                    'do_download_for_worker',
                    (book,options,merge)]
            job = ParallelJob('arbitrary_n',
                              "url:(%s) id:(%s)"%(book['url'],book['calibre_id']),
                              done=None,
                              args=args)
            job._book = book
            server.add_job(job)
        else:
            # was already bad before the subprocess ever started.
            alreadybad.append(book)
    
    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, _('Downloading FanFiction Stories'))

    # dequeue the job results as they arrive, saving the results
    count = 0
    while True:
        job = server.changed_jobs_queue.get()
        # A job can 'change' when it is not finished, for example if it
        # produces a notification. Ignore these.
        job.update()
        if not job.is_finished:
            continue
        # A job really finished. Get the information.
        book_list.remove(job._book)
        book_list.append(job.result)
        book_id = job._book['calibre_id']
        count = count + 1
        notification(float(count)/total, '%d of %d stories finished downloading'%(count,total))
        # Add this job's output to the current log
        logger.info('Logfile for book ID %s (%s)'%(book_id, job._book['title']))
        logger.info(job.details)

        if count >= total:
            ## ordering first by good vs bad, then by listorder.
            good_list = filter(lambda x : x['good'], book_list)
            bad_list = filter(lambda x : not x['good'], book_list)
            good_list = sorted(good_list,key=lambda x : x['listorder'])
            bad_list = sorted(bad_list,key=lambda x : x['listorder'])
            
            logger.info("\n"+_("Download Results:")+"\n%s\n"%("\n".join([ "%(url)s %(comment)s" % book for book in good_list+bad_list])))
            
            logger.info("\n"+_("Successful:")+"\n%s\n"%("\n".join([book['url'] for book in good_list])))
            logger.info("\n"+_("Unsuccessful:")+"\n%s\n"%("\n".join([book['url'] for book in bad_list])))
            break

    server.close()
    
    # return the book list as the job result
    return book_list
Ejemplo n.º 15
0
class JobManager(QAbstractTableModel, AdaptSQP):  # {{{

    job_added = pyqtSignal(int)
    job_done = pyqtSignal(int)

    def __init__(self):
        QAbstractTableModel.__init__(self)
        SearchQueryParser.__init__(self, ['all'])

        self.wait_icon = (QIcon(I('jobs.png')))
        self.running_icon = (QIcon(I('exec.png')))
        self.error_icon = (QIcon(I('dialog_error.png')))
        self.done_icon = (QIcon(I('ok.png')))

        self.jobs = []
        self.add_job = Dispatcher(self._add_job)
        self.server = Server(limit=config['worker_limit'] // 2,
                             enforce_cpu_limit=config['enforce_cpu_limit'])
        self.threaded_server = ThreadedJobServer()
        self.changed_queue = Queue()

        self.timer = QTimer(self)
        self.timer.timeout.connect(self.update,
                                   type=Qt.ConnectionType.QueuedConnection)
        self.timer.start(1000)

    def columnCount(self, parent=QModelIndex()):
        return 5

    def rowCount(self, parent=QModelIndex()):
        return len(self.jobs)

    def headerData(self, section, orientation, role):
        if role != Qt.ItemDataRole.DisplayRole:
            return None
        if orientation == Qt.Orientation.Horizontal:
            return ({
                0: _('Job'),
                1: _('Status'),
                2: _('Progress'),
                3: _('Running time'),
                4: _('Start time'),
            }.get(section, ''))
        else:
            return (section + 1)

    def show_tooltip(self, arg):
        widget, pos = arg
        QToolTip.showText(pos, self.get_tooltip())

    def get_tooltip(self):
        running_jobs = [j for j in self.jobs if j.run_state == j.RUNNING]
        waiting_jobs = [j for j in self.jobs if j.run_state == j.WAITING]
        lines = [
            ngettext('There is a running job:', 'There are {} running jobs:',
                     len(running_jobs)).format(len(running_jobs))
        ]
        for job in running_jobs:
            desc = job.description
            if not desc:
                desc = _('Unknown job')
            p = 100. if job.is_finished else job.percent
            lines.append('%s:  %.0f%% done' % (desc, p))
        l = ngettext('There is a waiting job', 'There are {} waiting jobs',
                     len(waiting_jobs)).format(len(waiting_jobs))
        lines.extend(['', l])
        for job in waiting_jobs:
            desc = job.description
            if not desc:
                desc = _('Unknown job')
            lines.append(desc)
        return '\n'.join(['calibre', ''] + lines)

    def data(self, index, role):
        try:
            if role not in (Qt.ItemDataRole.DisplayRole,
                            Qt.ItemDataRole.DecorationRole):
                return None
            row, col = index.row(), index.column()
            job = self.jobs[row]

            if role == Qt.ItemDataRole.DisplayRole:
                if col == 0:
                    desc = job.description
                    if not desc:
                        desc = _('Unknown job')
                    return (desc)
                if col == 1:
                    return (job.status_text)
                if col == 2:
                    p = 100. if job.is_finished else job.percent
                    return (p)
                if col == 3:
                    rtime = job.running_time
                    if rtime is None:
                        return None
                    return human_readable_interval(rtime)
                if col == 4 and job.start_time is not None:
                    return (strftime('%H:%M -- %d %b',
                                     time.localtime(job.start_time)))
            if role == Qt.ItemDataRole.DecorationRole and col == 0:
                state = job.run_state
                if state == job.WAITING:
                    return self.wait_icon
                if state == job.RUNNING:
                    return self.running_icon
                if job.killed or job.failed:
                    return self.error_icon
                return self.done_icon
        except:
            import traceback
            traceback.print_exc()
        return None

    def update(self):
        try:
            self._update()
        except BaseException:
            import traceback
            traceback.print_exc()

    def _update(self):
        # Update running time
        for i, j in enumerate(self.jobs):
            if j.run_state == j.RUNNING:
                idx = self.index(i, 3)
                self.dataChanged.emit(idx, idx)

        # Update parallel jobs
        jobs = set()
        while True:
            try:
                jobs.add(self.server.changed_jobs_queue.get_nowait())
            except Empty:
                break

        # Update device jobs
        while True:
            try:
                jobs.add(self.changed_queue.get_nowait())
            except Empty:
                break

        # Update threaded jobs
        while True:
            try:
                jobs.add(self.threaded_server.changed_jobs.get_nowait())
            except Empty:
                break

        if jobs:
            needs_reset = False
            for job in jobs:
                orig_state = job.run_state
                job.update()
                if orig_state != job.run_state:
                    needs_reset = True
                    if job.is_finished:
                        self.job_done.emit(len(self.unfinished_jobs()))
            if needs_reset:
                self.modelAboutToBeReset.emit()
                self.jobs.sort()
                self.modelReset.emit()
            else:
                for job in jobs:
                    idx = self.jobs.index(job)
                    self.dataChanged.emit(self.index(idx, 0),
                                          self.index(idx, 3))

        # Kill parallel jobs that have gone on too long
        try:
            wmax_time = gprefs['worker_max_time'] * 60
        except:
            wmax_time = 0

        if wmax_time > 0:
            for job in self.jobs:
                if isinstance(job, ParallelJob):
                    rtime = job.running_time
                    if (rtime is not None and rtime > wmax_time
                            and job.duration is None):
                        job.timed_out = True
                        self.server.kill_job(job)

    def _add_job(self, job):
        self.modelAboutToBeReset.emit()
        self.jobs.append(job)
        self.jobs.sort()
        self.job_added.emit(len(self.unfinished_jobs()))
        self.modelReset.emit()

    def done_jobs(self):
        return [j for j in self.jobs if j.is_finished]

    def unfinished_jobs(self):
        return [j for j in self.jobs if not j.is_finished]

    def row_to_job(self, row):
        return self.jobs[row]

    def rows_to_jobs(self, rows):
        return [self.jobs[row] for row in rows]

    def has_device_jobs(self, queued_also=False):
        for job in self.jobs:
            if isinstance(job, DeviceJob):
                if job.duration is None:  # Running or waiting
                    if (job.is_running or queued_also):
                        return True
        return False

    def has_jobs(self):
        for job in self.jobs:
            if job.is_running:
                return True
        return False

    def run_job(self,
                done,
                name,
                args=[],
                kwargs={},
                description='',
                core_usage=1):
        job = ParallelJob(name, description, done, args=args, kwargs=kwargs)
        job.core_usage = core_usage
        self.add_job(job)
        self.server.add_job(job)
        return job

    def run_threaded_job(self, job):
        self.add_job(job)
        self.threaded_server.add_job(job)

    def launch_gui_app(self, name, args=(), kwargs=None, description=''):
        job = ParallelJob(name,
                          description,
                          lambda x: x,
                          args=list(args),
                          kwargs=kwargs or {})
        self.server.run_job(job, gui=True, redirect_output=False)

    def _kill_job(self, job):
        if isinstance(job, ParallelJob):
            self.server.kill_job(job)
        elif isinstance(job, ThreadedJob):
            self.threaded_server.kill_job(job)
        else:
            job.kill_on_start = True

    def hide_jobs(self, rows):
        for r in rows:
            self.jobs[r].hidden_in_gui = True
        for r in rows:
            self.dataChanged.emit(self.index(r, 0), self.index(r, 0))

    def show_hidden_jobs(self):
        for j in self.jobs:
            j.hidden_in_gui = False
        for r in range(len(self.jobs)):
            self.dataChanged.emit(self.index(r, 0), self.index(r, 0))

    def kill_job(self, job, view):
        if isinstance(job, DeviceJob):
            return error_dialog(
                view, _('Cannot kill job'),
                _('Cannot kill jobs that communicate with the device')).exec_(
                )
        if job.duration is not None:
            return error_dialog(view, _('Cannot kill job'),
                                _('Job has already run')).exec_()
        if not getattr(job, 'killable', True):
            return error_dialog(view,
                                _('Cannot kill job'),
                                _('This job cannot be stopped'),
                                show=True)
        self._kill_job(job)

    def kill_multiple_jobs(self, jobs, view):
        devjobs = [j for j in jobs if isinstance(j, DeviceJob)]
        if devjobs:
            error_dialog(
                view, _('Cannot kill job'),
                _('Cannot kill jobs that communicate with the device')).exec_(
                )
            jobs = [j for j in jobs if not isinstance(j, DeviceJob)]
        jobs = [j for j in jobs if j.duration is None]
        unkillable = [j for j in jobs if not getattr(j, 'killable', True)]
        if unkillable:
            names = '\n'.join(as_unicode(j.description) for j in unkillable)
            error_dialog(
                view,
                _('Cannot kill job'),
                _('Some of the jobs cannot be stopped. Click "Show details"'
                  ' to see the list of unstoppable jobs.'),
                det_msg=names,
                show=True)
            jobs = [j for j in jobs if getattr(j, 'killable', True)]
        jobs = [j for j in jobs if j.duration is None]
        for j in jobs:
            self._kill_job(j)

    def kill_all_jobs(self):
        for job in self.jobs:
            if (isinstance(job, DeviceJob) or job.duration is not None
                    or not getattr(job, 'killable', True)):
                continue
            self._kill_job(job)

    def terminate_all_jobs(self):
        self.server.killall()
        for job in self.jobs:
            if (isinstance(job, DeviceJob) or job.duration is not None
                    or not getattr(job, 'killable', True)):
                continue
            if not isinstance(job, ParallelJob):
                self._kill_job(job)

    def universal_set(self):
        return {
            i
            for i, j in enumerate(self.jobs)
            if not getattr(j, 'hidden_in_gui', False)
        }

    def get_matches(self, location, query, candidates=None):
        if candidates is None:
            candidates = self.universal_set()
        ans = set()
        if not query:
            return ans
        query = lower(query)
        for j in candidates:
            job = self.jobs[j]
            if job.description and query in lower(job.description):
                ans.add(j)
        return ans

    def find(self, query):
        query = query.strip()
        rows = self.parse(query)
        return rows
Ejemplo n.º 16
0
class JobManager(QAbstractTableModel, SearchQueryParser):  # {{{

    job_added = pyqtSignal(int)
    job_done  = pyqtSignal(int)

    def __init__(self):
        QAbstractTableModel.__init__(self)
        SearchQueryParser.__init__(self, ['all'])

        self.wait_icon     = QVariant(QIcon(I('jobs.png')))
        self.running_icon  = QVariant(QIcon(I('exec.png')))
        self.error_icon    = QVariant(QIcon(I('dialog_error.png')))
        self.done_icon     = QVariant(QIcon(I('ok.png')))

        self.jobs          = []
        self.add_job       = Dispatcher(self._add_job)
        self.server        = Server(limit=int(config['worker_limit']/2.0),
                                enforce_cpu_limit=config['enforce_cpu_limit'])
        self.threaded_server = ThreadedJobServer()
        self.changed_queue = Queue()

        self.timer         = QTimer(self)
        self.timer.timeout.connect(self.update, type=Qt.QueuedConnection)
        self.timer.start(1000)

    def columnCount(self, parent=QModelIndex()):
        return 5

    def rowCount(self, parent=QModelIndex()):
        return len(self.jobs)

    def headerData(self, section, orientation, role):
        if role != Qt.DisplayRole:
            return NONE
        if orientation == Qt.Horizontal:
            return QVariant({
              0: _('Job'),
              1: _('Status'),
              2: _('Progress'),
              3: _('Running time'),
              4: _('Start time'),
            }.get(section, ''))
        else:
            return QVariant(section+1)

    def show_tooltip(self, arg):
        widget, pos = arg
        QToolTip.showText(pos, self.get_tooltip())

    def get_tooltip(self):
        running_jobs = [j for j in self.jobs if j.run_state == j.RUNNING]
        waiting_jobs = [j for j in self.jobs if j.run_state == j.WAITING]
        lines = [_('There are %d running jobs:')%len(running_jobs)]
        for job in running_jobs:
            desc = job.description
            if not desc:
                desc = _('Unknown job')
            p = 100. if job.is_finished else job.percent
            lines.append('%s:  %.0f%% done'%(desc, p))
        lines.extend(['', _('There are %d waiting jobs:')%len(waiting_jobs)])
        for job in waiting_jobs:
            desc = job.description
            if not desc:
                desc = _('Unknown job')
            lines.append(desc)
        return '\n'.join(['calibre', '']+ lines)

    def data(self, index, role):
        try:
            if role not in (Qt.DisplayRole, Qt.DecorationRole):
                return NONE
            row, col = index.row(), index.column()
            job = self.jobs[row]

            if role == Qt.DisplayRole:
                if col == 0:
                    desc = job.description
                    if not desc:
                        desc = _('Unknown job')
                    return QVariant(desc)
                if col == 1:
                    return QVariant(job.status_text)
                if col == 2:
                    p = 100. if job.is_finished else job.percent
                    return QVariant(p)
                if col == 3:
                    rtime = job.running_time
                    if rtime is None:
                        return NONE
                    return QVariant('%dm %ds'%(int(rtime)//60, int(rtime)%60))
                if col == 4 and job.start_time is not None:
                    return QVariant(time.strftime('%H:%M -- %d %b', time.localtime(job.start_time)))
            if role == Qt.DecorationRole and col == 0:
                state = job.run_state
                if state == job.WAITING:
                    return self.wait_icon
                if state == job.RUNNING:
                    return self.running_icon
                if job.killed or job.failed:
                    return self.error_icon
                return self.done_icon
        except:
            import traceback
            traceback.print_exc()
        return NONE

    def update(self):
        try:
            self._update()
        except BaseException:
            import traceback
            traceback.print_exc()

    def _update(self):
        # Update running time
        for i, j in enumerate(self.jobs):
            if j.run_state == j.RUNNING:
                idx = self.index(i, 3)
                self.dataChanged.emit(idx, idx)

        # Update parallel jobs
        jobs = set([])
        while True:
            try:
                jobs.add(self.server.changed_jobs_queue.get_nowait())
            except Empty:
                break

        # Update device jobs
        while True:
            try:
                jobs.add(self.changed_queue.get_nowait())
            except Empty:
                break

        # Update threaded jobs
        while True:
            try:
                jobs.add(self.threaded_server.changed_jobs.get_nowait())
            except Empty:
                break

        if jobs:
            needs_reset = False
            for job in jobs:
                orig_state = job.run_state
                job.update()
                if orig_state != job.run_state:
                    needs_reset = True
                    if job.is_finished:
                        self.job_done.emit(len(self.unfinished_jobs()))
            if needs_reset:
                self.layoutAboutToBeChanged.emit()
                self.jobs.sort()
                self.layoutChanged.emit()
            else:
                for job in jobs:
                    idx = self.jobs.index(job)
                    self.dataChanged.emit(
                        self.index(idx, 0), self.index(idx, 3))

        # Kill parallel jobs that have gone on too long
        try:
            wmax_time = gprefs['worker_max_time'] * 60
        except:
            wmax_time = 0

        if wmax_time > 0:
            for job in self.jobs:
                if isinstance(job, ParallelJob):
                    rtime = job.running_time
                    if (rtime is not None and rtime > wmax_time and
                            job.duration is None):
                        job.timed_out = True
                        self.server.kill_job(job)

    def _add_job(self, job):
        self.layoutAboutToBeChanged.emit()
        self.jobs.append(job)
        self.jobs.sort()
        self.job_added.emit(len(self.unfinished_jobs()))
        self.layoutChanged.emit()

    def done_jobs(self):
        return [j for j in self.jobs if j.is_finished]

    def unfinished_jobs(self):
        return [j for j in self.jobs if not j.is_finished]

    def row_to_job(self, row):
        return self.jobs[row]

    def has_device_jobs(self, queued_also=False):
        for job in self.jobs:
            if isinstance(job, DeviceJob):
                if job.duration is None:  # Running or waiting
                    if (job.is_running or queued_also):
                        return True
        return False

    def has_jobs(self):
        for job in self.jobs:
            if job.is_running:
                return True
        return False

    def run_job(self, done, name, args=[], kwargs={},
                           description='', core_usage=1):
        job = ParallelJob(name, description, done, args=args, kwargs=kwargs)
        job.core_usage = core_usage
        self.add_job(job)
        self.server.add_job(job)
        return job

    def run_threaded_job(self, job):
        self.add_job(job)
        self.threaded_server.add_job(job)

    def launch_gui_app(self, name, args=[], kwargs={}, description=''):
        job = ParallelJob(name, description, lambda x: x,
                args=args, kwargs=kwargs)
        self.server.run_job(job, gui=True, redirect_output=False)

    def _kill_job(self, job):
        if isinstance(job, ParallelJob):
            self.server.kill_job(job)
        elif isinstance(job, ThreadedJob):
            self.threaded_server.kill_job(job)
        else:
            job.kill_on_start = True

    def hide_jobs(self, rows):
        for r in rows:
            self.jobs[r].hidden_in_gui = True
        for r in rows:
            self.dataChanged.emit(self.index(r, 0), self.index(r, 0))

    def show_hidden_jobs(self):
        for j in self.jobs:
            j.hidden_in_gui = False
        for r in xrange(len(self.jobs)):
            self.dataChanged.emit(self.index(r, 0), self.index(r, 0))

    def kill_job(self, row, view):
        job = self.jobs[row]
        if isinstance(job, DeviceJob):
            return error_dialog(view, _('Cannot kill job'),
                         _('Cannot kill jobs that communicate with the device')).exec_()
        if job.duration is not None:
            return error_dialog(view, _('Cannot kill job'),
                         _('Job has already run')).exec_()
        if not getattr(job, 'killable', True):
            return error_dialog(view, _('Cannot kill job'),
                    _('This job cannot be stopped'), show=True)
        self._kill_job(job)

    def kill_multiple_jobs(self, rows, view):
        jobs = [self.jobs[row] for row in rows]
        devjobs = [j for j in jobs if isinstance(j, DeviceJob)]
        if devjobs:
            error_dialog(view, _('Cannot kill job'),
                         _('Cannot kill jobs that communicate with the device')).exec_()
            jobs = [j for j in jobs if not isinstance(j, DeviceJob)]
        jobs = [j for j in jobs if j.duration is None]
        unkillable = [j for j in jobs if not getattr(j, 'killable', True)]
        if unkillable:
            names = u'\n'.join(as_unicode(j.description) for j in unkillable)
            error_dialog(view, _('Cannot kill job'),
                    _('Some of the jobs cannot be stopped. Click Show details'
                        ' to see the list of unstoppable jobs.'), det_msg=names,
                    show=True)
            jobs = [j for j in jobs if getattr(j, 'killable', True)]
        jobs = [j for j in jobs if j.duration is None]
        for j in jobs:
            self._kill_job(j)

    def kill_all_jobs(self):
        for job in self.jobs:
            if (isinstance(job, DeviceJob) or job.duration is not None or
                    not getattr(job, 'killable', True)):
                continue
            self._kill_job(job)

    def terminate_all_jobs(self):
        self.server.killall()
        for job in self.jobs:
            if (isinstance(job, DeviceJob) or job.duration is not None or
                    not getattr(job, 'killable', True)):
                continue
            if not isinstance(job, ParallelJob):
                self._kill_job(job)

    def universal_set(self):
        return set([i for i, j in enumerate(self.jobs) if not getattr(j,
            'hidden_in_gui', False)])

    def get_matches(self, location, query, candidates=None):
        if candidates is None:
            candidates = self.universal_set()
        ans = set()
        if not query:
            return ans
        query = lower(query)
        for j in candidates:
            job = self.jobs[j]
            if job.description and query in lower(job.description):
                ans.add(j)
        return ans

    def find(self, query):
        query = query.strip()
        rows = self.parse(query)
        return rows
Ejemplo n.º 17
0
 def add_spare_server(self, *args):
     self.spare_servers.append(
         Server(limit=int(config['worker_limit'] / 2.0)))
Ejemplo n.º 18
0
    def _run(self, tdir):
        from calibre.library.save_to_disk import config
        server = Server() if self.spare_server is None else self.spare_server
        ids = set(self.ids)
        tasks = server.split(list(ids))
        jobs = set([])
        c = config()
        recs = {}
        for pref in c.preferences:
            recs[pref.name] = getattr(self.opts, pref.name)

        plugboards = self.db.prefs.get('plugboards', {})
        template_functions = self.db.prefs.get('user_template_functions', [])

        for i, task in enumerate(tasks):
            tids = [x[-1] for x in task]
            data = self.collect_data(tids, tdir)
            dpath = os.path.join(tdir, '%d.json'%i)
            with open(dpath, 'wb') as f:
                f.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))

            job = ParallelJob('save_book',
                    'Save books (%d of %d)'%(i, len(tasks)),
                    lambda x,y:x,
                    args=[tids, dpath, plugboards, template_functions, self.path, recs])
            jobs.add(job)
            server.add_job(job)


        while not self.canceled:
            time.sleep(0.2)
            running = False
            for job in jobs:
                self.get_notifications(job, ids)
                if not job.is_finished:
                    running = True

            if not running:
                break

        for job in jobs:
            if not job.result:
                continue
            for id_, title, ok, tb in job.result:
                if id_ in ids:
                    self.result_queue.put((id_, title, ok, tb))
                    ids.remove(id_)

        server.close()
        time.sleep(1)

        if self.canceled:
            return

        for job in jobs:
            if job.failed:
                prints(job.details)
                self.error = job.details
            if os.path.exists(job.log_path):
                try:
                    os.remove(job.log_path)
                except:
                    pass
Ejemplo n.º 19
0
    def _run(self, tdir):
        from calibre.library.save_to_disk import config
        server = Server() if self.spare_server is None else self.spare_server
        ids = set(self.ids)
        tasks = server.split(list(ids))
        jobs = set([])
        c = config()
        recs = {}
        for pref in c.preferences:
            recs[pref.name] = getattr(self.opts, pref.name)

        plugboards = self.db.prefs.get('plugboards', {})
        template_functions = self.db.prefs.get('user_template_functions', [])

        for i, task in enumerate(tasks):
            tids = [x[-1] for x in task]
            data = self.collect_data(tids, tdir)
            dpath = os.path.join(tdir, '%d.json' % i)
            with open(dpath, 'wb') as f:
                f.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))

            job = ParallelJob('save_book',
                              'Save books (%d of %d)' % (i, len(tasks)),
                              lambda x, y: x,
                              args=[
                                  tids, dpath, plugboards, template_functions,
                                  self.path, recs
                              ])
            jobs.add(job)
            server.add_job(job)

        while not self.canceled:
            time.sleep(0.2)
            running = False
            for job in jobs:
                self.get_notifications(job, ids)
                if not job.is_finished:
                    running = True

            if not running:
                break

        for job in jobs:
            if not job.result:
                continue
            for id_, title, ok, tb in job.result:
                if id_ in ids:
                    self.result_queue.put((id_, title, ok, tb))
                    ids.remove(id_)

        server.close()
        time.sleep(1)

        if self.canceled:
            return

        for job in jobs:
            if job.failed:
                prints(job.details)
                self.error = job.details
            if os.path.exists(job.log_path):
                try:
                    os.remove(job.log_path)
                except:
                    pass
Ejemplo n.º 20
0
def do_download_worker(book_list, options, cpus, notification=lambda x, y: x):
    '''
    Master job, to launch child jobs to extract ISBN for a set of books
    This is run as a worker job in the background to keep the UI more
    responsive and get around the memory leak issues as it will launch
    a child job for each book as a worker process
    '''
    server = Server(pool_size=cpus)

    logger.info(options['version'])
    total = 0
    alreadybad = []
    # Queue all the jobs
    logger.info("Adding jobs for URLs:")
    for book in book_list:
        logger.info("%s" % book['url'])
        if book['good']:
            total += 1
            args = [
                'calibre_plugins.fanficfare_plugin.jobs',
                'do_download_for_worker', (book, options)
            ]
            job = ParallelJob('arbitrary_n',
                              "url:(%s) id:(%s)" %
                              (book['url'], book['calibre_id']),
                              done=None,
                              args=args)
            job._book = book
            server.add_job(job)
        else:
            # was already bad before the subprocess ever started.
            alreadybad.append(book)

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, _('Downloading FanFiction Stories'))

    # dequeue the job results as they arrive, saving the results
    count = 0
    while True:
        job = server.changed_jobs_queue.get()
        # A job can 'change' when it is not finished, for example if it
        # produces a notification. Ignore these.
        job.update()
        if not job.is_finished:
            continue
        # A job really finished. Get the information.
        book_list.remove(job._book)
        book_list.append(job.result)
        book_id = job._book['calibre_id']
        count = count + 1
        notification(
            float(count) / total,
            '%d of %d stories finished downloading' % (count, total))
        # Add this job's output to the current log
        logger.info('Logfile for book ID %s (%s)' %
                    (book_id, job._book['title']))
        logger.info(job.details)

        if count >= total:
            logger.info("\n" + _("Successful:") + "\n%s\n" % ("\n".join([
                book['url'] for book in filter(lambda x: x['good'], book_list)
            ])))
            logger.info("\n" + _("Unsuccessful:") + "\n%s\n" % ("\n".join([
                book['url']
                for book in filter(lambda x: not x['good'], book_list)
            ])))
            break

    server.close()

    # return the book list as the job result
    return book_list
Ejemplo n.º 21
0
def do_download_worker(book_list,
                       options,
                       cpus,
                       merge=False,
                       notification=lambda x, y: x):
    '''
    Coordinator job, to launch child jobs to extract ISBN for a set of books
    This is run as a worker job in the background to keep the UI more
    responsive and get around the memory leak issues as it will launch
    a child job for each book as a worker process
    '''
    server = Server(pool_size=cpus)

    logger.info(options['version'])
    total = 0
    alreadybad = []
    # Queue all the jobs
    logger.info("Adding jobs for URLs:")
    for book in book_list:
        logger.info("%s" % book['url'])
        if book['good']:
            total += 1
            args = [
                'calibre_plugins.fanficfare_plugin.jobs',
                'do_download_for_worker', (book, options, merge)
            ]
            job = ParallelJob('arbitrary_n',
                              "url:(%s) id:(%s)" %
                              (book['url'], book['calibre_id']),
                              done=None,
                              args=args)
            job._book = book
            server.add_job(job)
        else:
            # was already bad before the subprocess ever started.
            alreadybad.append(book)

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, _('Downloading FanFiction Stories'))

    # dequeue the job results as they arrive, saving the results
    count = 0
    while True:
        job = server.changed_jobs_queue.get()
        # A job can 'change' when it is not finished, for example if it
        # produces a notification. Ignore these.
        job.update()
        if not job.is_finished:
            continue
        # A job really finished. Get the information.
        book_list.remove(job._book)
        book_list.append(job.result)
        book_id = job._book['calibre_id']
        count = count + 1
        notification(
            float(count) / total,
            _('%d of %d stories finished downloading') % (count, total))
        # Add this job's output to the current log
        logger.info('Logfile for book ID %s (%s)' %
                    (book_id, job._book['title']))
        logger.info(job.details)

        if count >= total:
            book_list = sorted(book_list, key=lambda x: x['listorder'])
            logger.info("\n" + _("Download Results:") + "\n%s\n" % ("\n".join([
                "%(status)s %(url)s %(comment)s" % book for book in book_list
            ])))

            good_lists = defaultdict(list)
            bad_lists = defaultdict(list)
            for book in book_list:
                if book['good']:
                    good_lists[book['status']].append(book)
                else:
                    bad_lists[book['status']].append(book)

            order = [
                _('Add'),
                _('Update'),
                _('Meta'),
                _('Different URL'),
                _('Rejected'),
                _('Skipped'),
                _('Bad'),
                _('Error'),
            ]
            j = 0
            for d in [good_lists, bad_lists]:
                for status in order:
                    if d[status]:
                        l = d[status]
                        logger.info("\n" + status + "\n%s\n" %
                                    ("\n".join([book['url'] for book in l])))
                        for book in l:
                            book['reportorder'] = j
                            j += 1
                    del d[status]
                # just in case a status is added but doesn't appear in order.
                for status in d.keys():
                    logger.info("\n" + status + "\n%s\n" %
                                ("\n".join([book['url']
                                            for book in d[status]])))
            break

    server.close()

    # return the book list as the job result
    return book_list
Ejemplo n.º 22
0
def do_download_worker(book_list,
                       options,
                       cpus,
                       merge=False,
                       notification=lambda x, y: x):
    '''
    Coordinator job, to launch child jobs to do downloads.
    This is run as a worker job in the background to keep the UI more
    responsive and get around any memory leak issues as it will launch
    a child job for each book as a worker process
    '''
    ## Now running one BG proc per site, which downloads for the same
    ## site in serial.
    logger.info("CPUs:%s" % cpus)
    server = Server(pool_size=cpus)

    logger.info(options['version'])

    sites_lists = defaultdict(list)
    [sites_lists[x['site']].append(x) for x in book_list if x['good']]

    totals = {}
    # can't do direct assignment in list comprehension?  I'm sure it
    # makes sense to some pythonista.
    # [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
    [totals.update({x['url']: 0.0}) for x in book_list if x['good']]
    # logger.debug(sites_lists.keys())

    # Queue all the jobs
    jobs_running = 0
    for site in sites_lists.keys():
        site_list = sites_lists[site]
        logger.info(
            _("Launch background process for site %s:") % site + "\n" +
            "\n".join([x['url'] for x in site_list]))
        # logger.debug([ x['url'] for x in site_list])
        args = [
            'calibre_plugins.fanficfare_plugin.jobs', 'do_download_site',
            (site, site_list, options, merge)
        ]
        job = ParallelJob('arbitrary_n',
                          "site:(%s)" % site,
                          done=None,
                          args=args)
        job._site_list = site_list
        job._processed = False
        server.add_job(job)
        jobs_running += 1

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, _('Downloading FanFiction Stories'))

    # dequeue the job results as they arrive, saving the results
    count = 0
    while True:
        job = server.changed_jobs_queue.get()
        # logger.debug("job get job._processed:%s"%job._processed)
        # A job can 'change' when it is not finished, for example if it
        # produces a notification.
        msg = None
        try:
            ## msg = book['url']
            (percent, msg) = job.notifications.get_nowait()
            # logger.debug("%s<-%s"%(percent,msg))
            if percent == 10.0:  # Only when signaling d/l done.
                count += 1
                totals[msg] = 1.0 / len(totals)
                # logger.info("Finished: %s"%msg)
            else:
                totals[msg] = percent / len(totals)
            notification(
                max(0.01, sum(totals.values())),
                _('%(count)d of %(total)d stories finished downloading') % {
                    'count': count,
                    'total': len(totals)
                })
        except Empty:
            pass
        # without update, is_finished will never be set.  however, we
        # do want to get all the notifications for status so we don't
        # miss the 'done' ones.
        job.update(consume_notifications=False)

        # if not job._processed:
        #     sleep(0.5)
        ## Can have a race condition where job.is_finished before
        ## notifications for all downloads have been processed.
        ## Or even after the job has been finished.
        # logger.debug("job.is_finished(%s) or job._processed(%s)"%(job.is_finished, job._processed))
        if not job.is_finished:
            continue

        ## only process each job once.  We can get more than one loop
        ## after job.is_finished.
        if not job._processed:
            # sleep(1)
            # A job really finished. Get the information.

            ## This is where bg proc details end up in GUI log.
            ## job.details is the whole debug log for each proc.
            logger.info("\n\n" + ("=" * 80) + " " +
                        job.details.replace('\r', ''))
            # logger.debug("Finished background process for site %s:\n%s"%(job._site_list[0]['site'],"\n".join([ x['url'] for x in job._site_list ])))
            for b in job._site_list:
                book_list.remove(b)
            book_list.extend(job.result)
            job._processed = True
            jobs_running -= 1

        ## Can't use individual count--I've seen stories all reported
        ## finished before results of all jobs processed.
        if jobs_running == 0:
            book_list = sorted(book_list, key=lambda x: x['listorder'])
            logger.info("\n" + _("Download Results:") + "\n%s\n" % ("\n".join([
                "%(status)s %(url)s %(comment)s" % book for book in book_list
            ])))

            good_lists = defaultdict(list)
            bad_lists = defaultdict(list)
            for book in book_list:
                if book['good']:
                    good_lists[book['status']].append(book)
                else:
                    bad_lists[book['status']].append(book)

            order = [
                _('Add'),
                _('Update'),
                _('Meta'),
                _('Different URL'),
                _('Rejected'),
                _('Skipped'),
                _('Bad'),
                _('Error'),
            ]
            j = 0
            for d in [good_lists, bad_lists]:
                for status in order:
                    if d[status]:
                        l = d[status]
                        logger.info("\n" + status + "\n%s\n" %
                                    ("\n".join([book['url'] for book in l])))
                        for book in l:
                            book['reportorder'] = j
                            j += 1
                    del d[status]
                # just in case a status is added but doesn't appear in order.
                for status in d.keys():
                    logger.info("\n" + status + "\n%s\n" %
                                ("\n".join([book['url']
                                            for book in d[status]])))
            break

    server.close()

    # return the book list as the job result
    return book_list
Ejemplo n.º 23
0
def do_extract_worker(books_to_scan, failed_ids, no_format_ids,
                      cpus, notification=lambda x,y:x):
    '''
    Master job, to launch child jobs to extract ISBN for a set of books
    This is run as a worker job in the background to keep the UI more
    responsive and get around the memory leak issues as it will launch
    a child job for each book as a worker process
    '''
    server = Server(pool_size=cpus)

    # Queue all the jobs
    for book_id, title, modified_date, existing_isbn, paths_for_formats in books_to_scan:
        args = ['calibre_plugins.extract_isbn.jobs', 'do_extract_isbn_for_book_worker',
                (title, paths_for_formats)]
        job = ParallelJob('arbitrary', str(book_id), done=None, args=args)
        job._book_id = book_id
        job._title = title
        job._modified_date = modified_date
        job._existing_isbn = existing_isbn
        server.add_job(job)

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, 'Extracting ISBN')

    # dequeue the job results as they arrive, saving the results
    total = len(books_to_scan)
    count = 0
    extracted_ids, same_isbn_ids = [], []
    while True:
        job = server.changed_jobs_queue.get()
        # A job can 'change' when it is not finished, for example if it
        # produces a notification. Ignore these.
        job.update()
        if not job.is_finished:
            continue
        # A job really finished. Get the information.
        isbn = job.result
        book_id = job._book_id
        title = job._title
        count = count + 1
        notification(float(count)/total, 'Extracted ISBN')
        # Add this job's output to the current log
        print('Logfile for book ID %d (%s)'%(book_id, title))
        print(job.details)
        if isbn:
            if job._existing_isbn == isbn:
                print('  Identical ISBN extracted of: %s'%(isbn,))
                same_isbn_ids.append((book_id, title))
            else:
                print('  New ISBN extracted of: %s'%(isbn,))
                extracted_ids.append((book_id, title, job._modified_date, isbn))
        else:
            print('  Failed to extract ISBN')
            failed_ids.append((book_id, title))
        print('===================================================')

        if count >= total:
            # All done!
            break

    server.close()
    # return the map as the job result
    return extracted_ids, same_isbn_ids, failed_ids, no_format_ids
Ejemplo n.º 24
0
def do_count_statistics(books_to_scan, pages_algorithm, use_goodreads, nltk_pickle, cpus, notification=lambda x, y: x):
    """
    Master job, to launch child jobs to count pages in this list of books
    """
    server = Server(pool_size=cpus)

    # Queue all the jobs
    for book_id, title, book_path, goodreads_id, statistics_to_run in books_to_scan:
        args = [
            "calibre_plugins.count_pages.jobs",
            "do_statistics_for_book",
            (book_path, pages_algorithm, goodreads_id, use_goodreads, statistics_to_run, nltk_pickle),
        ]
        job = ParallelJob("arbitrary", str(book_id), done=None, args=args)
        job._book_id = book_id
        job._title = title
        job._pages_algorithm = pages_algorithm
        job._goodreads_id = goodreads_id
        job._use_goodreads = use_goodreads
        job._statistics_to_run = statistics_to_run
        server.add_job(job)

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, "Counting Statistics")

    # dequeue the job results as they arrive, saving the results
    total = len(books_to_scan)
    count = 0
    book_stats_map = dict()
    while True:
        job = server.changed_jobs_queue.get()
        # A job can 'change' when it is not finished, for example if it
        # produces a notification. Ignore these.
        job.update()
        if not job.is_finished:
            continue
        # A job really finished. Get the information.
        results = job.result
        book_id = job._book_id
        book_stats_map[book_id] = results
        count = count + 1
        notification(float(count) / total, "Counting Statistics")

        # Add this job's output to the current log
        print("-------------------------------")
        print("Logfile for book ID %d (%s)" % (book_id, job._title))

        for stat in job._statistics_to_run:
            if stat == cfg.STATISTIC_PAGE_COUNT:
                if job._use_goodreads:
                    if job._goodreads_id is not None:
                        if stat in results and results[stat]:
                            print("\tGoodreads edition has %d pages" % results[stat])
                        else:
                            print("\tFAILED TO GET PAGE COUNT FROM GOODREADS")
                else:
                    if stat in results and results[stat]:
                        print("\tFound %d pages" % results[stat])
            elif stat == cfg.STATISTIC_WORD_COUNT:
                if stat in results and results[stat]:
                    print("\tFound %d words" % results[stat])
            elif stat == cfg.STATISTIC_FLESCH_READING:
                if stat in results and results[stat]:
                    print("\tComputed %.1f Flesch Reading" % results[stat])
            elif stat == cfg.STATISTIC_FLESCH_GRADE:
                if stat in results and results[stat]:
                    print("\tComputed %.1f Flesch-Kincaid Grade" % results[stat])
            elif stat == cfg.STATISTIC_GUNNING_FOG:
                if stat in results and results[stat]:
                    print("\tComputed %.1f Gunning Fog Index" % results[stat])

        print(job.details)

        if count >= total:
            # All done!
            break

    server.close()
    # return the map as the job result
    return book_stats_map