Example #1
0
    def test_uneven(self):
        children = []

        def child_created(child):
            # second child responds
            if children:
                child.stdout_write(b"AA\n")
            children.append(child)

        pool = worker_pool(child_created, 2, enumerate((b"job1\n", b"job2\n", b"job3\n", b"job4\n")), popen=_MockPopen)
        response = next(pool)
        self.assertEqual(len(children), 2)
        c0, c1 = children
        self.assertEqual(c0.stdin_readline(), b"job1\n")
        self.assertEqual(c1.stdin_readline(), b"job2\n")
        self.assertEqual(response, ([0, 2], 1, b"AA\n"))
        self.assertEqual(c1.stdin_readline(), b"job3\n")
        c1.stdout_write(b"BB\n")
        self.assertEqual(next(pool), ([0, 3], 2, b"BB\n"))
        self.assertEqual(c1.stdin_readline(), b"job4\n")
        c1.stdout_write(b"CC\n")
        self.assertEqual(next(pool), ([0, None], 3, b"CC\n"))
        c0.stdout_write(b"DD\n")
        self.assertEqual(next(pool), ([None, None], 0, b"DD\n"))
        self.assertRaises(StopIteration, next, pool)
        for c in children:
            c.close_pipes()
Example #2
0
    def test_overkill(self):
        children = []

        def child_created(child):
            if not children:
                child.stdout_write(b"AA\n")
            children.append(child)

        pool = worker_pool(child_created, 10, enumerate((b"job1\n",)), popen=_MockPopen)
        response = next(pool)
        self.assertEqual(len(children), 1)
        c = children[0]
        self.assertEqual(c.stdin_readline(), b"job1\n")
        self.assertEqual(response, ([None], 0, b"AA\n"))
        self.assertRaises(StopIteration, next, pool)
        for c in children:
            c.close_pipes()
Example #3
0
    def test_one(self):
        children = []

        def child_created(child):
            # need to respond or pool will block test
            child.stdout_write(b"AA\n")
            children.append(child)

        pool = worker_pool(child_created, 1, enumerate((b"job1\n", b"job2\n")), popen=_MockPopen)
        response = next(pool)
        self.assertEqual(len(children), 1)
        c = children[0]
        self.assertEqual(c.stdin_readline(), b"job1\n")
        self.assertEqual(response, ([1], 0, b"AA\n"))
        self.assertEqual(c.stdin_readline(), b"job2\n")
        c.stdout_write(b"BB\n")
        self.assertEqual(next(pool), ([None], 1, b"BB\n"))
        self.assertRaises(StopIteration, next, pool)
        for c in children:
            c.close_pipes()
Example #4
0
    def test_batch(self):
        children = []

        def child_created(child):
            if not children:
                child.stdout_write(b"AA\n")
            children.append(child)

        pool = worker_pool(child_created, 2, enumerate((b"job1\n",)), stop_when_jobs_done=False, popen=_MockPopen)
        response = next(pool)
        self.assertEqual(len(children), 1)
        c0 = children[0]
        self.assertEqual(c0.stdin_readline(), b"job1\n")
        self.assertEqual(response, ([None], 0, b"AA\n"))
        self.assertEqual(next(pool), (None, None, None))
        # need to write in advance to avoid blocking test
        c0.stdout_write(b"BB\n")
        response = pool.send(enumerate((b"job2\n", b"job3\n"), 1))
        self.assertEqual(response, ([None, 2], 1, b"BB\n"))
        for c in children:
            c.close_pipes()
Example #5
0
    def _portal_update(self, portal_ini, activity_date):
        if activity_date:
            past = re.match(PAST_RE, activity_date)
            if past:
                days, hours, minutes = (
                    int(x) if x else 0 for x in past.groups()
                )
                activity_date = datetime.now() - timedelta(
                    days=days,
                    seconds=(hours * 60 + minutes) * 60
                )
            else:
                activity_date = isodate(activity_date, None)
        else:
            activity_date = datetime.now() - timedelta(days=7)

        log = None
        if self.options.log:
            log = open(self.options.log, 'a')

        registry = LocalCKAN()

        def changed_package_id_runs(start_date):
            while True:
                packages, next_date = self._changed_packages_since(
                    registry, start_date)
                if next_date is None:
                    return
                yield packages, next_date
                start_date = next_date

        cmd = [
            sys.argv[0],
            'canada',
            'copy-datasets',
            '-c',
            portal_ini
        ]
        if self.options.mirror:
            cmd.append('-m')

        pool = worker_pool(
            cmd,
            self.options.processes,
            [],
            stop_when_jobs_done=False,
            stop_on_keyboard_interrupt=False,
            )

        # Advance generator so we may call send() below
        pool.next()

        def append_log(finished, package_id, action, reason):
            if not log:
                return
            log.write(json.dumps([
                datetime.now().isoformat(),
                finished,
                package_id,
                action,
                reason,
                ]) + '\n')
            log.flush()

        with _quiet_int_pipe():
            append_log(
                None,
                None,
                "started updating from:",
                activity_date.isoformat()
            )

            for packages, next_date in (
                    changed_package_id_runs(activity_date)):
                job_ids, finished, result = pool.send(enumerate(packages))
                stats = completion_stats(self.options.processes)
                while result is not None:
                    package_id, action, reason = json.loads(result)
                    print job_ids, stats.next(), finished, package_id, \
                        action, reason
                    append_log(finished, package_id, action, reason)
                    job_ids, finished, result = pool.next()

                print " --- next batch starting at: " + next_date.isoformat()
                append_log(
                    None,
                    None,
                    "next batch starting at:",
                    next_date.isoformat()
                )
                self._portal_update_activity_date = next_date.isoformat()
            self._portal_update_completed = True
    def _portal_update(self, source, activity_date):
        if activity_date:
            past = re.match(PAST_RE, activity_date)
            if past:
                days, hours, minutes = (
                    int(x) if x else 0 for x in past.groups()
                )
                activity_date = datetime.now() - timedelta(
                    days=days,
                    seconds=(hours * 60 + minutes) * 60
                )
            else:
                activity_date = isodate(activity_date, None)
        else:
            activity_date = datetime.now() - timedelta(days=7)

        log = None
        if self.options.log:
            log = open(self.options.log, 'a')

        if self.options.push_apikey and not self.options.fetch:
            registry = LocalCKAN()
        elif self.options.fetch:
            registry = RemoteCKAN(source)
        else:
            print "exactly one of -f or -a options must be specified"
            return

        def changed_package_id_runs(start_date):
            while True:
                package_ids, next_date = self._changed_package_ids_since(
                    registry, start_date)
                if next_date is None:
                    return
                yield package_ids, next_date
                start_date = next_date

        cmd = [
            sys.argv[0],
            'canada',
            'copy-datasets',
            source,
            '-c',
            self.options.config
        ]
        if self.options.push_apikey:
            cmd.extend(['-a', self.options.push_apikey])
        else:
            cmd.append('-f')
        if self.options.mirror:
            cmd.append('-m')

        pool = worker_pool(
            cmd,
            self.options.processes,
            [],
            stop_when_jobs_done=False,
            stop_on_keyboard_interrupt=False,
            )

        # Advance generator so we may call send() below
        pool.next()

        def append_log(finished, package_id, action, reason):
            if not log:
                return
            log.write(json.dumps([
                datetime.now().isoformat(),
                finished,
                package_id,
                action,
                reason,
                ]) + '\n')
            log.flush()

        with _quiet_int_pipe():
            append_log(
                None,
                None,
                "started updating from:",
                activity_date.isoformat()
            )

            for package_ids, next_date in (
                    changed_package_id_runs(activity_date)):
                job_ids, finished, result = pool.send(enumerate(package_ids))
                stats = completion_stats(self.options.processes)
                while result is not None:
                    package_id, action, reason = json.loads(result)
                    print job_ids, stats.next(), finished, package_id, \
                        action, reason
                    append_log(finished, package_id, action, reason)
                    job_ids, finished, result = pool.next()

                print " --- next batch starting at: " + next_date.isoformat()
                append_log(
                    None,
                    None,
                    "next batch starting at:",
                    next_date.isoformat()
                )
                self._portal_update_activity_date = next_date.isoformat()
            self._portal_update_completed = True