Python ProxySet Examples

Programming Language: Python

Namespace/Package Name: shared.proxies

Class/Type: ProxySet

Examples at hotexamples.com: 4

Python ProxySet - 4 examples found. These are the top rated real world Python examples of shared.proxies.ProxySet extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ProxySet(1)

note_proxy_offline(1)

note_proxy_online(1)

start_a_proxy(1)

Example #1

Show file

File: s_capture.py Project: marwan116/tbbscraper

    def __call__(self, mon, thr):
        self.mon = mon
        self.status_queue = queue.PriorityQueue()
        self.mon.register_event_queue(self.status_queue,
                                      (self._MON_SAYS_STOP, -1))

        self.mon.set_status_prefix("d")
        self.mon.report_status("loading...")

        self.proxies = ProxySet(self, self.mon, self.args,
                                self.proxy_sort_key)
        self.mon.report_status("loading... (proxies OK)")

        self.db = url_database.ensure_database(self.args)
        self.prepare_database()

        for _ in range(self.args.total_workers):
            wt = CaptureWorker(self)
            self.mon.add_work_thread(wt)
            self.idle_workers.add(wt)

        self.dispatcher_loop()

Example #2

Show file

File: s_traceroutes.py Project: marwan116/tbbscraper

    def __call__(self, mon, thr):
        self.mon = mon
        self.status_queue = queue.Queue()
        self.mon.register_event_queue(self.status_queue, (self._MON_SAYS_STOP,))

        self.mon.set_status_prefix("d")
        self.mon.report_status("loading...")

        self.proxies = ProxySet(self, mon, self.args)
        self.mon.report_status("loading... (proxies OK)")

        self.locations = {
            loc: LocationState(loc, self.args.destinations, self.output_dir) for loc in self.proxies.locations.keys()
        }
        self.mon.report_status("loading... (locations OK)")

        # We only need one worker thread per proxy, because scamper
        # parallelizes work internally.
        for _ in range(self.args.max_simultaneous_proxies):
            wt = TracerouteWorker(self)
            self.mon.add_work_thread(wt)
            self.idle_workers.add(wt)
        self.mon.report_status("loading... (work threads OK)")

        # kick things off by starting one proxy
        (proxy, until_next, n_locations) = self.proxies.start_a_proxy()
        self.mon.report_status(
            "{}/{}/{} locations active, {} started, "
            "{} till next".format(
                len(self.proxies.active_proxies),
                n_locations,
                len(self.locations),
                proxy.label() if proxy else None,
                until_next,
            )
        )

        while n_locations:
            time_now = time.monotonic()
            # Technically, until_next being None means "wait for a proxy
            # to exit", but use an hour as a backstop.  (When a proxy does
            # exit, this will get knocked down to zero below.)
            if until_next is None:
                until_next = 3600
            time_next = time_now + until_next
            pending_stop = False
            while time_now < time_next:
                for msg in queue_iter(self.status_queue, until_next):
                    if msg[0] == self._PROXY_ONLINE:
                        self.proxies.note_proxy_online(msg[1])
                        self.mon.report_status("proxy {} online".format(msg[1].label()))
                        self.mon.idle(1)

                    elif msg[0] == self._PROXY_OFFLINE:
                        self.mon.report_status("proxy {} offline".format(msg[1].label()))
                        self.proxies.note_proxy_offline(msg[1])
                        # Wait no more than 5 minutes before trying to
                        # start another proxy.  (XXX This hardwires a
                        # specific provider's policy.)
                        time_now = time.monotonic()
                        time_next = min(time_next, time_now + 300)
                        until_next = time_next - time_now

                    elif msg[0] == self._BATCH_COMPLETE:
                        locstate = self.active_workers[msg[1]]
                        del self.active_workers[msg[1]]
                        self.idle_workers.add(msg[1])
                        locstate.complete_job()
                        self.mon.report_status("{} batch complete".format(locstate.location))

                    elif msg[0] == self._BATCH_FAILED:
                        locstate = self.active_workers[msg[1]]
                        del self.active_workers[msg[1]]
                        self.idle_workers.add(msg[1])
                        locstate.fail_job()
                        self.mon.report_status("{} batch failed".format(locstate.location))

                    elif msg[0] == self._DROP_WORKER:
                        self.idle_workers.discard(worker)
                        if worker in self.active_workers:
                            self.active_workers[worker].fail_job()
                            del self.active_workers[worker]

                    elif msg[0] == self._MON_SAYS_STOP:
                        self.mon.report_status("interrupt pending")
                        pending_stop = True

                    else:
                        self.mon.report_error("bogus message: {!r}".format(message))

                for loc, state in self.locations.items():
                    if state.next_task is None:
                        self.mon.report_status("{} finished".format(loc))
                        if loc in self.proxies.locations:
                            self.proxies.locations[loc].finished()

                if pending_stop:
                    self.mon.report_status("interrupted")
                    self.mon.maybe_pause_or_stop()
                    # don't start new work yet, the set of proxies
                    # available may be totally different now

                else:
                    for proxy in self.proxies.active_proxies:
                        if not self.idle_workers:
                            break
                        if not proxy.online:
                            continue
                        state = self.locations[proxy.loc]
                        if not state.active_task and state.next_task is not None:
                            worker = self.idle_workers.pop()
                            self.active_workers[worker] = state
                            state.queue_job(worker, proxy)
                            self.mon.report_status("queuing job for {}".format(proxy.label()))

                time_now = time.monotonic()
                until_next = time_next - time_now

            # when we get to this point, it's time to start another proxy
            (proxy, until_next, n_locations) = self.proxies.start_a_proxy()
            self.mon.report_status(
                "{}/{}/{} locations active, {} started, "
                "{} till next".format(
                    len(self.proxies.active_proxies),
                    n_locations,
                    len(self.locations),
                    proxy.label() if proxy else None,
                    until_next,
                )
            )

        # done, kill off all the workers
        self.mon.report_status("finished")
        assert not self.active_workers
        for w in self.idle_workers:
            w.finished()

Example #3

Show file

    def __call__(self, mon, thr):
        self.mon = mon
        self.status_queue = queue.Queue()
        self.mon.register_event_queue(self.status_queue,
                                      (self._MON_SAYS_STOP, ))

        self.mon.set_status_prefix("d")
        self.mon.report_status("loading...")

        self.proxies = ProxySet(self,
                                mon,
                                self.args,
                                include_locations=self.dns_servers)
        self.mon.report_status("loading... (proxies OK)")

        for loc in list(self.dns_servers.keys()):
            if loc not in self.proxies.locations:
                del self.dns_servers[loc]

        assert list(self.dns_servers.keys()) == \
               list(self.proxies.locations.keys())

        self.locations = {
            loc: LocationState(loc, self.dns_servers[loc], self.hostnames,
                               self.output_dir)
            for loc in self.dns_servers.keys()
        }
        self.mon.report_status("loading... (locations OK)")

        # One work thread per active proxy.
        for _ in range(self.args.max_simultaneous_proxies):
            wt = DNSWorker(self)
            self.mon.add_work_thread(wt)
            self.idle_workers.add(wt)
        self.mon.report_status("loading... (work threads OK)")

        # kick things off by starting one proxy
        (proxy, until_next, n_locations) = self.proxies.start_a_proxy()
        self.mon.report_status("{}/{}/{} locations active, {} started, "
                               "{} till next".format(
                                   len(self.proxies.active_proxies),
                                   n_locations, len(self.locations),
                                   proxy.label() if proxy else None,
                                   until_next))

        while n_locations:
            time_now = time.monotonic()
            # Technically, until_next being None means "wait for a proxy
            # to exit", but use an hour as a backstop.  (When a proxy does
            # exit, this will get knocked down to zero below.)
            if until_next is None: until_next = 3600
            time_next = time_now + until_next
            pending_stop = False
            while time_now < time_next:
                for msg in queue_iter(self.status_queue, until_next):
                    if msg[0] == self._PROXY_ONLINE:
                        self.proxies.note_proxy_online(msg[1])
                        self.mon.report_status("proxy {} online".format(
                            msg[1].label()))
                        self.mon.idle(1)

                    elif msg[0] == self._PROXY_OFFLINE:
                        self.mon.report_status("proxy {} offline".format(
                            msg[1].label()))
                        self.proxies.note_proxy_offline(msg[1])
                        # Wait no more than 5 minutes before trying to
                        # start another proxy.  (XXX This hardwires a
                        # specific provider's policy.)
                        time_now = time.monotonic()
                        time_next = min(time_next, time_now + 300)
                        until_next = time_next - time_now

                    elif msg[0] == self._BATCH_COMPLETE:
                        locstate = self.active_workers[msg[1]]
                        del self.active_workers[msg[1]]
                        self.idle_workers.add(msg[1])
                        locstate.complete_job()
                        self.mon.report_status("{} batch complete".format(
                            locstate.location))

                    elif msg[0] == self._BATCH_FAILED:
                        locstate = self.active_workers[msg[1]]
                        del self.active_workers[msg[1]]
                        self.idle_workers.add(msg[1])
                        locstate.fail_job()
                        self.mon.report_status("{} batch failed".format(
                            locstate.location))

                    elif msg[0] == self._DROP_WORKER:
                        self.idle_workers.discard(worker)
                        if worker in self.active_workers:
                            self.active_workers[worker].fail_job()
                            del self.active_workers[worker]

                    elif msg[0] == self._MON_SAYS_STOP:
                        self.mon.report_status("interrupt pending")
                        pending_stop = True

                    else:
                        self.mon.report_error(
                            "bogus message: {!r}".format(message))

                for loc, state in self.locations.items():
                    if state.finished_p():
                        self.mon.report_status("{} finished".format(loc))
                        if loc in self.proxies.locations:
                            self.proxies.locations[loc].finished()

                if pending_stop:
                    self.mon.report_status("interrupted")
                    self.mon.maybe_pause_or_stop()
                    # don't start new work yet, the set of proxies
                    # available may be totally different now

                else:
                    for proxy in self.proxies.active_proxies:
                        if not self.idle_workers:
                            break
                        if not proxy.online:
                            continue
                        state = self.locations[proxy.loc]
                        if state.idle_p():
                            worker = self.idle_workers.pop()
                            self.active_workers[worker] = state
                            state.queue_job(worker, proxy)
                            self.mon.report_status("queuing job for {}".format(
                                proxy.label()))

                time_now = time.monotonic()
                until_next = time_next - time_now

            # when we get to this point, it's time to start another proxy
            (proxy, until_next, n_locations) = self.proxies.start_a_proxy()
            self.mon.report_status("{}/{}/{} locations active, {} started, "
                                   "{} till next".format(
                                       len(self.proxies.active_proxies),
                                       n_locations, len(self.locations),
                                       proxy.label() if proxy else None,
                                       until_next))

        # done, kill off all the workers
        self.mon.report_status("finished")
        assert not self.active_workers
        for w in self.idle_workers:
            w.finished()

Example #4

Show file

File: s_capture.py Project: marwan116/tbbscraper

class CaptureDispatcher:
    def __init__(self, args):
        # complete initialization deferred till we're on the right thread
        self.args                    = args
        self.idle_workers            = set()
        self.active_workers          = {}
        self.locations               = {}
        self.overall_jobsize         = 0
        self.proxies                 = None
        self.mon                     = None
        self.db                      = None
        self.status_queue            = None
        self.status_queue_serializer = 0

    def __call__(self, mon, thr):
        self.mon = mon
        self.status_queue = queue.PriorityQueue()
        self.mon.register_event_queue(self.status_queue,
                                      (self._MON_SAYS_STOP, -1))

        self.mon.set_status_prefix("d")
        self.mon.report_status("loading...")

        self.proxies = ProxySet(self, self.mon, self.args,
                                self.proxy_sort_key)
        self.mon.report_status("loading... (proxies OK)")

        self.db = url_database.ensure_database(self.args)
        self.prepare_database()

        for _ in range(self.args.total_workers):
            wt = CaptureWorker(self)
            self.mon.add_work_thread(wt)
            self.idle_workers.add(wt)

        self.dispatcher_loop()

    # Status queue helper constants and methods.
    _PROXY_OFFLINE  = 1
    _PROXY_ONLINE   = 2
    _BATCH_COMPLETE = 3
    _BATCH_FAILED   = 4
    _DROP_WORKER    = 5
    _MON_SAYS_STOP  = 6 # Stop after handling all incoming work

    # Entries in a PriorityQueue must be totally ordered.  We just
    # want to service all COMPLETE messages ahead of all others, and
    # STOP messages after all others, so we give them all a serial
    # number which goes in the tuple right after the command code,
    # before the data.  This also means we don't have to worry about
    # unsortable data.
    def oq(self):
        self.status_queue_serializer += 1
        return self.status_queue_serializer

    # worker-to-dispatcher API
    def complete_batch(self, worker, result):
        self.status_queue.put((self._BATCH_COMPLETE, self.oq(),
                               worker, result))

    def fail_batch(self, worker, exc_info):
        self.status_queue.put((self._BATCH_FAILED, self.oq(), worker))

    def drop_worker(self, worker):
        self.status_queue.put((self._DROP_WORKER, self.oq(), worker))

    # proxy-to-dispatcher API
    def proxy_online(self, proxy):
        self.status_queue.put((self._PROXY_ONLINE, self.oq(), proxy))

    def proxy_offline(self, proxy):
        self.status_queue.put((self._PROXY_OFFLINE, self.oq(), proxy))

    def _invalid_message(self, *args):
        self.mon.report_error("invalid status queue message {!r}"
                              .format(args))

    def dispatcher_loop(self):

        # Kick things off by starting one proxy.
        (proxy, until_next, n_locations) = self.proxies.start_a_proxy()

        while n_locations:
            time_now = time.monotonic()
            # Technically, until_next being None means "wait for a proxy
            # to exit", but use an hour as a backstop.  (When a proxy does
            # exit, this will get knocked down to zero below.)
            if until_next is None: until_next = 3600
            time_next = time_now + until_next
            pending_stop = False

            while time_now < time_next:
                self.update_progress_statistics(n_locations, until_next)

                for msg in queue_iter(self.status_queue, until_next):
                    if msg[0] == self._PROXY_ONLINE:
                        self.proxies.note_proxy_online(msg[2])

                    elif msg[0] == self._PROXY_OFFLINE:
                        self.proxies.note_proxy_offline(msg[2])
                        # Wait no more than 5 minutes before trying to
                        # start another proxy.  (XXX This hardwires a
                        # specific provider's policy.)
                        time_now = time.monotonic()
                        time_next = min(time_next, time_now + 300)
                        until_next = time_next - time_now

                    elif msg[0] == self._BATCH_COMPLETE:
                        worker, result = msg[2], msg[3]
                        locstate, _ = self.active_workers[worker]
                        del self.active_workers[worker]
                        self.idle_workers.add(worker)
                        self.record_batch(locstate, *result)

                    elif msg[0] == self._BATCH_FAILED:
                        worker = msg[2]
                        # We might've already gotten a COMPLETE message
                        # with more precision.
                        if worker in self.active_workers:
                            locstate, batch = self.active_workers[worker]
                            del self.active_workers[worker]
                            self.idle_workers.add(worker)
                            self.record_batch(locstate, [], batch)

                    elif msg[0] == self._DROP_WORKER:
                        worker = msg[2]
                        self.idle_workers.discard(worker)
                        if worker in self.active_workers:
                            self.active_workers[worker].fail_job()
                            del self.active_workers[worker]

                    elif msg[0] == self._MON_SAYS_STOP:
                        self.mon.report_status("interrupt pending")
                        pending_stop = True

                    else:
                        self.mon.report_error("bogus message: {!r}"
                                              .format(message))

                for loc, state in self.locations.items():
                    if state.todo == 0 and loc in self.proxies.locations:
                        self.proxies.locations[loc].finished()

                if pending_stop:
                    self.mon.report_status("interrupted")
                    self.mon.maybe_pause_or_stop()
                    # don't start new work yet, the set of proxies
                    # available may be totally different now

                else:
                    # One-second delay before starting new work, because
                    # proxies aren't always 100% up when they say they are.
                    self.mon.idle(1)

                    while self.idle_workers:
                        assigned_work = False
                        for proxy in self.proxies.active_proxies:
                            if not proxy.online:
                                continue
                            state = self.locations[proxy.loc]
                            if state.n_workers >= self.args.workers_per_loc:
                                continue
                            batch = self.select_batch(state)
                            if not batch:
                                # All work for this location is
                                # assigned to other workers already.
                                continue

                            state.n_workers += 1
                            state.in_progress.update(row[0] for row in batch)
                            worker = self.idle_workers.pop()
                            self.active_workers[worker] = (state, batch)
                            worker.queue_batch(state, batch)
                            assigned_work = True
                            if not self.idle_workers:
                                break

                        if not assigned_work:
                            break

                time_now = time.monotonic()
                until_next = time_next - time_now

            # when we get to this point, it's time to start another proxy
            (proxy, until_next, n_locations) = self.proxies.start_a_proxy()

        # done, kill off all the workers
        self.mon.report_status("finished")
        assert not self.active_workers
        for w in self.idle_workers:
            w.finished()

    def proxy_sort_key(self, loc, method):
        # Consider locales that currently have no workers at all first.
        # Consider locales with more work to do first.
        # Consider locales whose proxy is 'direct' first.
        # Consider locales named 'us' first.
        # As a final tie breaker use alphabetical order of locale name.
        state = self.locations[loc]
        return (state.n_workers != 0,
                -state.todo,
                method != 'direct',
                loc != 'us',
                loc)

    def select_batch(self, loc):
        with self.db, self.db.cursor() as cr:

            query = ('SELECT c.url as uid, s.url as url'
                     '  FROM capture_progress c, url_strings s'
                     ' WHERE c.url = s.id')

            query += ' AND NOT c."l_{0}"'.format(loc.locale)

            if loc.in_progress:
                query += ' AND c.url NOT IN ('
                query += ','.join(str(u) for u in loc.in_progress)
                query += ')'

            query += ' LIMIT {0}'.format(self.args.batch_size)
            cr.execute(query)
            return cr.fetchall()

    def record_batch(self, loc, successes, failures):
        locale = loc.locale
        loc.n_workers -= 1
        for r in failures:
            loc.in_progress.remove(r[0])

        if not successes:
            return

        with self.db, self.db.cursor() as cr:
            for s in successes:
                url_id = s[0]
                r      = s[1]
                loc.in_progress.remove(url_id)

                redir_url = None
                redir_url_id = None
                if r['canon']:
                    redir_url = r['canon']
                    if redir_url == r['ourl']:
                        redir_url_id = url_id
                    elif redir_url is not None:
                        try:
                            (redir_url_id, _) = \
                                url_database.add_url_string(cr, redir_url)
                        except (ValueError, UnicodeError):
                            addendum = "invalid redir url: " + redir_url
                            if ('detail' not in r or r['detail'] is None):
                                r['detail'] = addendum
                            else:
                                r['detail'] += " | " + addendum

                detail_id = self.capture_detail.get(r['detail'])
                if detail_id is None:
                    cr.execute("INSERT INTO capture_detail(id, detail) "
                               "  VALUES(DEFAULT, %s)"
                               "  RETURNING id", (r['detail'],))
                    detail_id = cr.fetchone()[0]
                    self.capture_detail[r['detail']] = detail_id

                result = url_database.categorize_result(r['status'],
                                                        r['detail'],
                                                        url_id,
                                                        redir_url_id)

                to_insert = {
                    "locale":       locale,
                    "url":          url_id,
                    "result":       result,
                    "detail":       detail_id,
                    "redir_url":    redir_url_id,
                    "log":          r['log'],
                    "html_content": r['content'],
                    "screenshot":   r['render']
                }
                cr.execute("INSERT INTO captured_pages"
                           "(locale, url, access_time, result, detail,"
                           " redir_url, capture_log, html_content,"
                           " screenshot)"
                           "VALUES ("
                           "  %(locale)s,"
                           "  %(url)s,"
                           "  TIMESTAMP 'now',"
                           "  %(result)s,"
                           "  %(detail)s,"
                           "  %(redir_url)s,"
                           "  %(log)s,"
                           "  %(html_content)s,"
                           "  %(screenshot)s)",
                           to_insert)
                cr.execute('UPDATE capture_progress SET "l_{0}" = TRUE '
                           ' WHERE url = {1}'.format(locale, url_id))
                loc.todo -= 1

    def update_progress_statistics(self, n_locations, until_next):
        jobsize = 0
        plreport = []
        for plstate in self.locations.values():
            jobsize = max(jobsize, plstate.todo)
            plreport.append((-plstate.todo, plstate.locale))

        plreport.sort()
        plreport = " ".join("{}:{}".format(pl[1], -pl[0]) for pl in plreport)

        self.mon.report_status("Processing {}/{} URLs | {}/{}/{} active, {} till next | {}"
                               .format(jobsize, self.overall_jobsize,
                                       len(self.proxies.active_proxies),
                                       n_locations,
                                       len(self.locations),
                                       until_next,
                                       plreport))

    def prepare_database(self):
        self.locations = { loc: PerLocaleState(loc, proxy)
                           for loc, proxy in self.proxies.locations.items() }
        with self.db, self.db.cursor() as cr:
            # Cache the status table in memory; it's reasonably small.
            self.mon.report_status("Preparing database... (capture detail)")
            cr.execute("SELECT detail, id FROM capture_detail;")
            self.capture_detail = { row.detail: row.id for row in cr }

            # The capture_progress table tracks what we've done so far.
            # It is regenerated from scratch each time this program is run,
            # based on the contents of the urls_* and captured_pages tables.
            self.mon.maybe_pause_or_stop()
            self.mon.report_status("Preparing database... "
                                   "(capture progress)")

            l_columns = ",\n  ".join(
                "\"l_{0}\" BOOLEAN NOT NULL DEFAULT FALSE"
                .format(loc) for loc in self.locations.keys())

            cr.execute("CREATE TEMPORARY TABLE capture_progress ("
                       "  url INTEGER PRIMARY KEY,"
                       + l_columns + ");")

            # Determine the set of URLs yet to be captured from the selected
            # tables.
            self.mon.maybe_pause_or_stop()
            self.mon.report_status("Preparing database... "
                                   "(capture progress rows)")

            cr.execute("SELECT table_name FROM information_schema.tables"
                       " WHERE table_schema = %s"
                       "   AND table_type = 'BASE TABLE'"
                       "   AND table_name LIKE 'urls_%%'",
                       (self.args.schema,))
            all_url_tables = set(row[0] for row in cr)

            if self.args.tables is None:
                want_url_tables = all_url_tables
            else:
                want_url_tables = set("urls_"+t.strip()
                                      for t in self.args.tables.split(","))
                if not want_url_tables.issubset(all_url_tables):
                    raise RuntimeError("Requested URL tables do not exist: "
                                       + ", ".join(
                                           t[5:] for t in
                                           want_url_tables - all_url_tables))

            for tbl in want_url_tables:
                self.mon.maybe_pause_or_stop()
                self.mon.report_status("Preparing database... "
                                       "(capture progress rows: {})"
                                       .format(tbl))

                # Only one row per URL, even if it appears in more than one
                # source table.
                cr.execute("INSERT INTO capture_progress (url) "
                           "        SELECT url FROM "+tbl+
                           " EXCEPT SELECT url FROM capture_progress")

            self.mon.maybe_pause_or_stop()
            self.mon.report_status("Preparing database... (analyzing)")
            cr.execute("ANALYZE captured_pages")

            for loc in self.locations.keys():
                self.mon.maybe_pause_or_stop()
                self.mon.report_status("Preparing database... "
                                       "(capture progress values: {})"
                                       .format(loc))

                cr.execute('UPDATE capture_progress c SET "l_{0}" = TRUE'
                           '  FROM captured_pages p'
                           ' WHERE c.url = p.url AND p.locale = \'{0}\''
                           .format(loc))

                self.mon.maybe_pause_or_stop()
                self.mon.report_status("Preparing database... (indexing: {})"
                                       .format(loc))
                cr.execute("CREATE INDEX \"capture_progress_l_{0}_idx\""
                           "  ON capture_progress(\"l_{0}\");"
                           .format(loc))

            self.mon.maybe_pause_or_stop()
            self.mon.report_status("Preparing database... (analyzing)")
            cr.execute("ANALYZE capture_progress")

            self.mon.maybe_pause_or_stop()
            self.mon.report_status("Preparing database... (statistics)")

            query = "SELECT COUNT(*)"
            for loc in self.locations.keys():
                query += ', SUM("l_{0}"::INTEGER) AS "l_{0}"'.format(loc)
            query += " FROM capture_progress"
            cr.execute(query)

            # Compute the number of unvisited URLs for each locale,
            # and remove locales where that number is zero from the
            # working set.

            counts = cr.fetchone()
            self.overall_jobsize = counts[0]
            for loc, done in zip(self.locations.keys(), counts[1:]):
                todo = self.overall_jobsize - done
                assert todo >= 0
                if todo:
                    self.locations[loc].todo = todo
                else:
                    self.locations[loc].proxy.finished()

            self.mon.maybe_pause_or_stop()
            self.mon.report_status("Database prepared.")