Exemple #1
0
 def launch_gui_app(self, name, args=(), kwargs=None, description=''):
     job = ParallelJob(name,
                       description,
                       lambda x: x,
                       args=list(args),
                       kwargs=kwargs or {})
     self.server.run_job(job, gui=True, redirect_output=False)
Exemple #2
0
def set_metadata(stream, mi):
    if not podofo:
        raise Unavailable(podofo_err)
    with TemporaryFile('_podofo_read.pdf') as inputf, \
            TemporaryFile('_podofo_write.pdf') as outputf:
        server = Server(pool_size=1)
        with open(inputf, 'wb') as f:
            shutil.copyfileobj(stream, f)
        job = ParallelJob('write_pdf_metadata',
                          'Write pdf metadata',
                          lambda x, y: x,
                          args=[
                              inputf, outputf, mi.title, mi.authors,
                              mi.book_producer, mi.tags
                          ])
        server.add_job(job)
        while not job.is_finished:
            time.sleep(0.1)
            job.update()

        job.update()
        server.close()
        if job.failed:
            prints(job.details)
        elif job.result:
            with open(outputf, 'rb') as f:
                f.seek(0, 2)
                if f.tell() > 100:
                    f.seek(0)
                    stream.seek(0)
                    stream.truncate()
                    shutil.copyfileobj(f, stream)
                    stream.flush()
    stream.seek(0)
Exemple #3
0
 def launch_gui_app(self, name, args=[], kwargs={}, description=''):
     job = ParallelJob(name,
                       description,
                       lambda x: x,
                       args=args,
                       kwargs=kwargs)
     self.server.run_job(job, gui=True, redirect_output=False)
Exemple #4
0
 def run_job(self, done, name, args=[], kwargs={},
                        description='', core_usage=1):
     job = ParallelJob(name, description, done, args=args, kwargs=kwargs)
     job.core_usage = core_usage
     self.add_job(job)
     self.server.add_job(job)
     return job
Exemple #5
0
    def run(self):
        job = ParallelJob('move_library',
                          'Move library from %s to %s' % (self.from_, self.to),
                          lambda x, y: x,
                          args=[self.from_, self.to])
        server = Server(pool_size=1)
        server.add_job(job)

        while not job.is_finished:
            time.sleep(0.2)
            job.update(consume_notifications=False)
            while True:
                try:
                    title = job.notifications.get_nowait()[0]
                    self.count += 1
                    self.result_queue.put(
                        (float(self.count) / self.total, title))
                except Empty:
                    break

        job.update()
        server.close()
        if not job.result:
            self.failed = True
            self.details = job.details

        if os.path.exists(job.log_path):
            os.remove(job.log_path)
Exemple #6
0
    def run(self):
        jobs, ids = set([]), set([])
        for t in self.tasks:
            for b in t:
                ids.add(b[0])
        progress = Progress(self.result_queue, self.tdir)
        server = Server() if self.spare_server is None else self.spare_server
        try:
            for i, task in enumerate(self.tasks):
                job = ParallelJob('read_metadata',
                                  'Read metadata (%d of %d)' %
                                  (i, len(self.tasks)),
                                  lambda x, y: x,
                                  args=[task, self.tdir])
                jobs.add(job)
                server.add_job(job)

            while not self.canceled:
                time.sleep(0.2)
                running = False
                for job in jobs:
                    while True:
                        try:
                            id = job.notifications.get_nowait()[-1]
                            if id in ids:
                                progress(id)
                                ids.remove(id)
                        except Empty:
                            break
                    job.update(consume_notifications=False)
                    if not job.is_finished:
                        running = True

                if not running:
                    break
        finally:
            server.close()
        time.sleep(1)

        if self.canceled:
            return

        for id in ids:
            progress(id)

        for job in jobs:
            if job.failed:
                prints(job.details)
            if os.path.exists(job.log_path):
                try:
                    os.remove(job.log_path)
                except:
                    pass
Exemple #7
0
def process_pages(pages, opts, update, tdir):
    '''
    Render all identified comic pages.
    '''
    progress = Progress(len(pages), update)
    server = Server()
    jobs = []
    tasks = [(p, os.path.join(tdir, os.path.basename(p))) for p in pages]
    tasks = server.split(pages)
    for task in tasks:
        jobs.append(
            ParallelJob('render_pages', '', progress, args=[task, tdir, opts]))
        server.add_job(jobs[-1])
    while True:
        time.sleep(1)
        running = False
        for job in jobs:
            while True:
                try:
                    x = job.notifications.get_nowait()
                    progress(*x)
                except Empty:
                    break
            job.update()
            if not job.is_finished:
                running = True
        if not running:
            break
    server.close()
    ans, failures = [], []

    for job in jobs:
        if job.failed or job.result is None:
            raise Exception(
                _('Failed to process comic: \n\n%s') % job.log_file.read())
        pages, failures_ = job.result
        ans += pages
        failures += failures_
    return ans, failures
Exemple #8
0
def get_metadata(stream, cpath=None):
    if not podofo:
        raise Unavailable(podofo_err)
    pt = PersistentTemporaryFile('_podofo.pdf')
    pt.write(stream.read())
    pt.close()
    server = Server(pool_size=1)
    job = ParallelJob('read_pdf_metadata',
                      'Read pdf metadata',
                      lambda x, y: x,
                      args=[pt.name, cpath])
    server.add_job(job)
    while not job.is_finished:
        time.sleep(0.1)
        job.update()

    job.update()
    server.close()
    if job.result is None:
        raise ValueError('Failed to read metadata: ' + job.details)
    title, authors, creator, tags, ok = job.result
    if not ok:
        print 'Failed to extract cover:'
        print job.details
    if title == '_':
        title = getattr(stream, 'name', _('Unknown'))
        title = os.path.splitext(title)[0]

    mi = MetaInformation(title, authors)
    if creator:
        mi.book_producer = creator
    if tags:
        mi.tags = tags
    if os.path.exists(pt.name): os.remove(pt.name)
    if ok:
        mi.cover = cpath
    return mi
Exemple #9
0
def do_extract_worker(books_to_scan, failed_ids, no_format_ids,
                      cpus, notification=lambda x,y:x):
    '''
    Master job, to launch child jobs to extract ISBN for a set of books
    This is run as a worker job in the background to keep the UI more
    responsive and get around the memory leak issues as it will launch
    a child job for each book as a worker process
    '''
    server = Server(pool_size=cpus)

    # Queue all the jobs
    for book_id, title, modified_date, existing_isbn, paths_for_formats in books_to_scan:
        args = ['calibre_plugins.extract_isbn.jobs', 'do_extract_isbn_for_book_worker',
                (title, paths_for_formats)]
        job = ParallelJob('arbitrary', str(book_id), done=None, args=args)
        job._book_id = book_id
        job._title = title
        job._modified_date = modified_date
        job._existing_isbn = existing_isbn
        server.add_job(job)

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, 'Extracting ISBN')

    # dequeue the job results as they arrive, saving the results
    total = len(books_to_scan)
    count = 0
    extracted_ids, same_isbn_ids = [], []
    while True:
        job = server.changed_jobs_queue.get()
        # A job can 'change' when it is not finished, for example if it
        # produces a notification. Ignore these.
        job.update()
        if not job.is_finished:
            continue
        # A job really finished. Get the information.
        isbn = job.result
        book_id = job._book_id
        title = job._title
        count = count + 1
        notification(float(count)/total, 'Extracted ISBN')
        # Add this job's output to the current log
        print('Logfile for book ID %d (%s)'%(book_id, title))
        print(job.details)
        if isbn:
            if job._existing_isbn == isbn:
                print('  Identical ISBN extracted of: %s'%(isbn,))
                same_isbn_ids.append((book_id, title))
            else:
                print('  New ISBN extracted of: %s'%(isbn,))
                extracted_ids.append((book_id, title, job._modified_date, isbn))
        else:
            print('  Failed to extract ISBN')
            failed_ids.append((book_id, title))
        print('===================================================')

        if count >= total:
            # All done!
            break

    server.close()
    # return the map as the job result
    return extracted_ids, same_isbn_ids, failed_ids, no_format_ids
Exemple #10
0
def do_download_worker(book_list,
                       options,
                       cpus,
                       merge=False,
                       notification=lambda x, y: x):
    '''
    Coordinator job, to launch child jobs to extract ISBN for a set of books
    This is run as a worker job in the background to keep the UI more
    responsive and get around the memory leak issues as it will launch
    a child job for each book as a worker process
    '''
    server = Server(pool_size=cpus)

    logger.info(options['version'])
    total = 0
    alreadybad = []
    # Queue all the jobs
    logger.info("Adding jobs for URLs:")
    for book in book_list:
        logger.info("%s" % book['url'])
        if book['good']:
            total += 1
            args = [
                'calibre_plugins.fanficfare_plugin.jobs',
                'do_download_for_worker', (book, options, merge)
            ]
            job = ParallelJob('arbitrary_n',
                              "url:(%s) id:(%s)" %
                              (book['url'], book['calibre_id']),
                              done=None,
                              args=args)
            job._book = book
            server.add_job(job)
        else:
            # was already bad before the subprocess ever started.
            alreadybad.append(book)

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, _('Downloading FanFiction Stories'))

    # dequeue the job results as they arrive, saving the results
    count = 0
    while True:
        job = server.changed_jobs_queue.get()
        # A job can 'change' when it is not finished, for example if it
        # produces a notification. Ignore these.
        job.update()
        if not job.is_finished:
            continue
        # A job really finished. Get the information.
        book_list.remove(job._book)
        book_list.append(job.result)
        book_id = job._book['calibre_id']
        count = count + 1
        notification(
            float(count) / total,
            _('%d of %d stories finished downloading') % (count, total))
        # Add this job's output to the current log
        logger.info('Logfile for book ID %s (%s)' %
                    (book_id, job._book['title']))
        logger.info(job.details)

        if count >= total:
            book_list = sorted(book_list, key=lambda x: x['listorder'])
            logger.info("\n" + _("Download Results:") + "\n%s\n" % ("\n".join([
                "%(status)s %(url)s %(comment)s" % book for book in book_list
            ])))

            good_lists = defaultdict(list)
            bad_lists = defaultdict(list)
            for book in book_list:
                if book['good']:
                    good_lists[book['status']].append(book)
                else:
                    bad_lists[book['status']].append(book)

            order = [
                _('Add'),
                _('Update'),
                _('Meta'),
                _('Different URL'),
                _('Rejected'),
                _('Skipped'),
                _('Bad'),
                _('Error'),
            ]
            j = 0
            for d in [good_lists, bad_lists]:
                for status in order:
                    if d[status]:
                        l = d[status]
                        logger.info("\n" + status + "\n%s\n" %
                                    ("\n".join([book['url'] for book in l])))
                        for book in l:
                            book['reportorder'] = j
                            j += 1
                    del d[status]
                # just in case a status is added but doesn't appear in order.
                for status in d.keys():
                    logger.info("\n" + status + "\n%s\n" %
                                ("\n".join([book['url']
                                            for book in d[status]])))
            break

    server.close()

    # return the book list as the job result
    return book_list
Exemple #11
0
def do_download_worker(book_list,
                       options,
                       cpus,
                       merge=False,
                       notification=lambda x, y: x):
    '''
    Coordinator job, to launch child jobs to do downloads.
    This is run as a worker job in the background to keep the UI more
    responsive and get around any memory leak issues as it will launch
    a child job for each book as a worker process
    '''
    ## Now running one BG proc per site, which downloads for the same
    ## site in serial.
    logger.info("CPUs:%s" % cpus)
    server = Server(pool_size=cpus)

    logger.info(options['version'])

    sites_lists = defaultdict(list)
    [sites_lists[x['site']].append(x) for x in book_list if x['good']]

    totals = {}
    # can't do direct assignment in list comprehension?  I'm sure it
    # makes sense to some pythonista.
    # [ totals[x['url']]=0.0 for x in book_list if x['good'] ]
    [totals.update({x['url']: 0.0}) for x in book_list if x['good']]
    # logger.debug(sites_lists.keys())

    # Queue all the jobs
    jobs_running = 0
    for site in sites_lists.keys():
        site_list = sites_lists[site]
        logger.info(
            _("Launch background process for site %s:") % site + "\n" +
            "\n".join([x['url'] for x in site_list]))
        # logger.debug([ x['url'] for x in site_list])
        args = [
            'calibre_plugins.fanficfare_plugin.jobs', 'do_download_site',
            (site, site_list, options, merge)
        ]
        job = ParallelJob('arbitrary_n',
                          "site:(%s)" % site,
                          done=None,
                          args=args)
        job._site_list = site_list
        job._processed = False
        server.add_job(job)
        jobs_running += 1

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, _('Downloading FanFiction Stories'))

    # dequeue the job results as they arrive, saving the results
    count = 0
    while True:
        job = server.changed_jobs_queue.get()
        # logger.debug("job get job._processed:%s"%job._processed)
        # A job can 'change' when it is not finished, for example if it
        # produces a notification.
        msg = None
        try:
            ## msg = book['url']
            (percent, msg) = job.notifications.get_nowait()
            # logger.debug("%s<-%s"%(percent,msg))
            if percent == 10.0:  # Only when signaling d/l done.
                count += 1
                totals[msg] = 1.0 / len(totals)
                # logger.info("Finished: %s"%msg)
            else:
                totals[msg] = percent / len(totals)
            notification(
                max(0.01, sum(totals.values())),
                _('%(count)d of %(total)d stories finished downloading') % {
                    'count': count,
                    'total': len(totals)
                })
        except Empty:
            pass
        # without update, is_finished will never be set.  however, we
        # do want to get all the notifications for status so we don't
        # miss the 'done' ones.
        job.update(consume_notifications=False)

        # if not job._processed:
        #     sleep(0.5)
        ## Can have a race condition where job.is_finished before
        ## notifications for all downloads have been processed.
        ## Or even after the job has been finished.
        # logger.debug("job.is_finished(%s) or job._processed(%s)"%(job.is_finished, job._processed))
        if not job.is_finished:
            continue

        ## only process each job once.  We can get more than one loop
        ## after job.is_finished.
        if not job._processed:
            # sleep(1)
            # A job really finished. Get the information.

            ## This is where bg proc details end up in GUI log.
            ## job.details is the whole debug log for each proc.
            logger.info("\n\n" + ("=" * 80) + " " +
                        job.details.replace('\r', ''))
            # logger.debug("Finished background process for site %s:\n%s"%(job._site_list[0]['site'],"\n".join([ x['url'] for x in job._site_list ])))
            for b in job._site_list:
                book_list.remove(b)
            book_list.extend(job.result)
            job._processed = True
            jobs_running -= 1

        ## Can't use individual count--I've seen stories all reported
        ## finished before results of all jobs processed.
        if jobs_running == 0:
            book_list = sorted(book_list, key=lambda x: x['listorder'])
            logger.info("\n" + _("Download Results:") + "\n%s\n" % ("\n".join([
                "%(status)s %(url)s %(comment)s" % book for book in book_list
            ])))

            good_lists = defaultdict(list)
            bad_lists = defaultdict(list)
            for book in book_list:
                if book['good']:
                    good_lists[book['status']].append(book)
                else:
                    bad_lists[book['status']].append(book)

            order = [
                _('Add'),
                _('Update'),
                _('Meta'),
                _('Different URL'),
                _('Rejected'),
                _('Skipped'),
                _('Bad'),
                _('Error'),
            ]
            j = 0
            for d in [good_lists, bad_lists]:
                for status in order:
                    if d[status]:
                        l = d[status]
                        logger.info("\n" + status + "\n%s\n" %
                                    ("\n".join([book['url'] for book in l])))
                        for book in l:
                            book['reportorder'] = j
                            j += 1
                    del d[status]
                # just in case a status is added but doesn't appear in order.
                for status in d.keys():
                    logger.info("\n" + status + "\n%s\n" %
                                ("\n".join([book['url']
                                            for book in d[status]])))
            break

    server.close()

    # return the book list as the job result
    return book_list
Exemple #12
0
def do_download_worker(book_list, options, cpus, notification=lambda x, y: x):
    '''
    Master job, to launch child jobs to extract ISBN for a set of books
    This is run as a worker job in the background to keep the UI more
    responsive and get around the memory leak issues as it will launch
    a child job for each book as a worker process
    '''
    server = Server(pool_size=cpus)

    logger.info(options['version'])
    total = 0
    alreadybad = []
    # Queue all the jobs
    logger.info("Adding jobs for URLs:")
    for book in book_list:
        logger.info("%s" % book['url'])
        if book['good']:
            total += 1
            args = [
                'calibre_plugins.fanficfare_plugin.jobs',
                'do_download_for_worker', (book, options)
            ]
            job = ParallelJob('arbitrary_n',
                              "url:(%s) id:(%s)" %
                              (book['url'], book['calibre_id']),
                              done=None,
                              args=args)
            job._book = book
            server.add_job(job)
        else:
            # was already bad before the subprocess ever started.
            alreadybad.append(book)

    # This server is an arbitrary_n job, so there is a notifier available.
    # Set the % complete to a small number to avoid the 'unavailable' indicator
    notification(0.01, _('Downloading FanFiction Stories'))

    # dequeue the job results as they arrive, saving the results
    count = 0
    while True:
        job = server.changed_jobs_queue.get()
        # A job can 'change' when it is not finished, for example if it
        # produces a notification. Ignore these.
        job.update()
        if not job.is_finished:
            continue
        # A job really finished. Get the information.
        book_list.remove(job._book)
        book_list.append(job.result)
        book_id = job._book['calibre_id']
        count = count + 1
        notification(
            float(count) / total,
            '%d of %d stories finished downloading' % (count, total))
        # Add this job's output to the current log
        logger.info('Logfile for book ID %s (%s)' %
                    (book_id, job._book['title']))
        logger.info(job.details)

        if count >= total:
            logger.info("\n" + _("Successful:") + "\n%s\n" % ("\n".join([
                book['url'] for book in filter(lambda x: x['good'], book_list)
            ])))
            logger.info("\n" + _("Unsuccessful:") + "\n%s\n" % ("\n".join([
                book['url']
                for book in filter(lambda x: not x['good'], book_list)
            ])))
            break

    server.close()

    # return the book list as the job result
    return book_list
Exemple #13
0
    def _run(self, tdir):
        from calibre.library.save_to_disk import config
        server = Server() if self.spare_server is None else self.spare_server
        ids = set(self.ids)
        tasks = server.split(list(ids))
        jobs = set([])
        c = config()
        recs = {}
        for pref in c.preferences:
            recs[pref.name] = getattr(self.opts, pref.name)

        plugboards = self.db.prefs.get('plugboards', {})
        template_functions = self.db.prefs.get('user_template_functions', [])

        for i, task in enumerate(tasks):
            tids = [x[-1] for x in task]
            data = self.collect_data(tids, tdir)
            dpath = os.path.join(tdir, '%d.json' % i)
            with open(dpath, 'wb') as f:
                f.write(json.dumps(data, ensure_ascii=False).encode('utf-8'))

            job = ParallelJob('save_book',
                              'Save books (%d of %d)' % (i, len(tasks)),
                              lambda x, y: x,
                              args=[
                                  tids, dpath, plugboards, template_functions,
                                  self.path, recs
                              ])
            jobs.add(job)
            server.add_job(job)

        while not self.canceled:
            time.sleep(0.2)
            running = False
            for job in jobs:
                self.get_notifications(job, ids)
                if not job.is_finished:
                    running = True

            if not running:
                break

        for job in jobs:
            if not job.result:
                continue
            for id_, title, ok, tb in job.result:
                if id_ in ids:
                    self.result_queue.put((id_, title, ok, tb))
                    ids.remove(id_)

        server.close()
        time.sleep(1)

        if self.canceled:
            return

        for job in jobs:
            if job.failed:
                prints(job.details)
                self.error = job.details
            if os.path.exists(job.log_path):
                try:
                    os.remove(job.log_path)
                except:
                    pass