def test_uneven(self): children = [] def child_created(child): # second child responds if children: child.stdout_write(b"AA\n") children.append(child) pool = worker_pool(child_created, 2, enumerate((b"job1\n", b"job2\n", b"job3\n", b"job4\n")), popen=_MockPopen) response = next(pool) self.assertEqual(len(children), 2) c0, c1 = children self.assertEqual(c0.stdin_readline(), b"job1\n") self.assertEqual(c1.stdin_readline(), b"job2\n") self.assertEqual(response, ([0, 2], 1, b"AA\n")) self.assertEqual(c1.stdin_readline(), b"job3\n") c1.stdout_write(b"BB\n") self.assertEqual(next(pool), ([0, 3], 2, b"BB\n")) self.assertEqual(c1.stdin_readline(), b"job4\n") c1.stdout_write(b"CC\n") self.assertEqual(next(pool), ([0, None], 3, b"CC\n")) c0.stdout_write(b"DD\n") self.assertEqual(next(pool), ([None, None], 0, b"DD\n")) self.assertRaises(StopIteration, next, pool) for c in children: c.close_pipes()
def test_overkill(self): children = [] def child_created(child): if not children: child.stdout_write(b"AA\n") children.append(child) pool = worker_pool(child_created, 10, enumerate((b"job1\n",)), popen=_MockPopen) response = next(pool) self.assertEqual(len(children), 1) c = children[0] self.assertEqual(c.stdin_readline(), b"job1\n") self.assertEqual(response, ([None], 0, b"AA\n")) self.assertRaises(StopIteration, next, pool) for c in children: c.close_pipes()
def test_one(self): children = [] def child_created(child): # need to respond or pool will block test child.stdout_write(b"AA\n") children.append(child) pool = worker_pool(child_created, 1, enumerate((b"job1\n", b"job2\n")), popen=_MockPopen) response = next(pool) self.assertEqual(len(children), 1) c = children[0] self.assertEqual(c.stdin_readline(), b"job1\n") self.assertEqual(response, ([1], 0, b"AA\n")) self.assertEqual(c.stdin_readline(), b"job2\n") c.stdout_write(b"BB\n") self.assertEqual(next(pool), ([None], 1, b"BB\n")) self.assertRaises(StopIteration, next, pool) for c in children: c.close_pipes()
def test_batch(self): children = [] def child_created(child): if not children: child.stdout_write(b"AA\n") children.append(child) pool = worker_pool(child_created, 2, enumerate((b"job1\n",)), stop_when_jobs_done=False, popen=_MockPopen) response = next(pool) self.assertEqual(len(children), 1) c0 = children[0] self.assertEqual(c0.stdin_readline(), b"job1\n") self.assertEqual(response, ([None], 0, b"AA\n")) self.assertEqual(next(pool), (None, None, None)) # need to write in advance to avoid blocking test c0.stdout_write(b"BB\n") response = pool.send(enumerate((b"job2\n", b"job3\n"), 1)) self.assertEqual(response, ([None, 2], 1, b"BB\n")) for c in children: c.close_pipes()
def _portal_update(self, portal_ini, activity_date): if activity_date: past = re.match(PAST_RE, activity_date) if past: days, hours, minutes = ( int(x) if x else 0 for x in past.groups() ) activity_date = datetime.now() - timedelta( days=days, seconds=(hours * 60 + minutes) * 60 ) else: activity_date = isodate(activity_date, None) else: activity_date = datetime.now() - timedelta(days=7) log = None if self.options.log: log = open(self.options.log, 'a') registry = LocalCKAN() def changed_package_id_runs(start_date): while True: packages, next_date = self._changed_packages_since( registry, start_date) if next_date is None: return yield packages, next_date start_date = next_date cmd = [ sys.argv[0], 'canada', 'copy-datasets', '-c', portal_ini ] if self.options.mirror: cmd.append('-m') pool = worker_pool( cmd, self.options.processes, [], stop_when_jobs_done=False, stop_on_keyboard_interrupt=False, ) # Advance generator so we may call send() below pool.next() def append_log(finished, package_id, action, reason): if not log: return log.write(json.dumps([ datetime.now().isoformat(), finished, package_id, action, reason, ]) + '\n') log.flush() with _quiet_int_pipe(): append_log( None, None, "started updating from:", activity_date.isoformat() ) for packages, next_date in ( changed_package_id_runs(activity_date)): job_ids, finished, result = pool.send(enumerate(packages)) stats = completion_stats(self.options.processes) while result is not None: package_id, action, reason = json.loads(result) print job_ids, stats.next(), finished, package_id, \ action, reason append_log(finished, package_id, action, reason) job_ids, finished, result = pool.next() print " --- next batch starting at: " + next_date.isoformat() append_log( None, None, "next batch starting at:", next_date.isoformat() ) self._portal_update_activity_date = next_date.isoformat() self._portal_update_completed = True
def _portal_update(self, source, activity_date): if activity_date: past = re.match(PAST_RE, activity_date) if past: days, hours, minutes = ( int(x) if x else 0 for x in past.groups() ) activity_date = datetime.now() - timedelta( days=days, seconds=(hours * 60 + minutes) * 60 ) else: activity_date = isodate(activity_date, None) else: activity_date = datetime.now() - timedelta(days=7) log = None if self.options.log: log = open(self.options.log, 'a') if self.options.push_apikey and not self.options.fetch: registry = LocalCKAN() elif self.options.fetch: registry = RemoteCKAN(source) else: print "exactly one of -f or -a options must be specified" return def changed_package_id_runs(start_date): while True: package_ids, next_date = self._changed_package_ids_since( registry, start_date) if next_date is None: return yield package_ids, next_date start_date = next_date cmd = [ sys.argv[0], 'canada', 'copy-datasets', source, '-c', self.options.config ] if self.options.push_apikey: cmd.extend(['-a', self.options.push_apikey]) else: cmd.append('-f') if self.options.mirror: cmd.append('-m') pool = worker_pool( cmd, self.options.processes, [], stop_when_jobs_done=False, stop_on_keyboard_interrupt=False, ) # Advance generator so we may call send() below pool.next() def append_log(finished, package_id, action, reason): if not log: return log.write(json.dumps([ datetime.now().isoformat(), finished, package_id, action, reason, ]) + '\n') log.flush() with _quiet_int_pipe(): append_log( None, None, "started updating from:", activity_date.isoformat() ) for package_ids, next_date in ( changed_package_id_runs(activity_date)): job_ids, finished, result = pool.send(enumerate(package_ids)) stats = completion_stats(self.options.processes) while result is not None: package_id, action, reason = json.loads(result) print job_ids, stats.next(), finished, package_id, \ action, reason append_log(finished, package_id, action, reason) job_ids, finished, result = pool.next() print " --- next batch starting at: " + next_date.isoformat() append_log( None, None, "next batch starting at:", next_date.isoformat() ) self._portal_update_activity_date = next_date.isoformat() self._portal_update_completed = True