def start_queue( connection_string, job_filepath, schema_name="public", workers=10, cooperative=None, schedule_frequency=None, job_args=None, children=None, ): """Creates, configures and runs a job queue. :param connection_string: a Postgres connection string :param job_filepath: the path to a python script that can configure the queue :param schema_name: the name of the schema that contains the queue tables :param workers: the number of concurrent workers to run """ model_access = ModelAccess(get_pg_core(connection_string), search_path=schema_name) model_access.open(autocommit=True) queue = Queue(model_access, worker_count=workers, cooperative=cooperative, schedule_frequency=schedule_frequency) job_module = load_module(job_filepath, None) job_module.setup_jobs(queue, job_args) def stop(sig, _): """Stops the queue in the manner specified by the signal. :param sig: the signal receieved """ queue.stop(stop_mode=STOP_SIGNALS[sig]) if children: for pid in children: os.kill(pid, sig) try: os.waitpid(pid, 0) except ChildProcessError: # Child already shut down before we started waiting on it. pass for sig in STOP_SIGNALS: signal.signal(sig, stop) signal.signal(signal.SIGINFO, lambda n, f: print(queue.status(), file=sys.stderr)) log_queue_info(job_filepath, workers) queue.start()
def setUp(self): self.da = ModelAccess(get_pg_core(CONNECTION_STRING)).open(autocommit=True) self._clean_tables()
class PyPGQueueTestCase(TestCase): def setUp(self): self.da = ModelAccess(get_pg_core(CONNECTION_STRING)).open(autocommit=True) self._clean_tables() def tearDown(self): self._clean_tables() def _clean_tables(self): self.da.update(M.SerializationKey.table_name, dict(active_job_id=None)) self.da.delete(M.Job.table_name) self.da.delete(M.ScheduledJob.table_name) self.da.delete(M.SerializationKey.table_name) def _table_counts(self): return (self.da.count(M.Job.table_name), self.da.count(M.ScheduledJob.table_name), self.da.count(M.SerializationKey.table_name)) def _get_ts(self, seconds_in_future=0): return utc_now() + timedelta(seconds=seconds_in_future) def test_queue_job(self): self.assertEqual(self._table_counts(), (0, 0, 0)) pypgq.queue_job(self.da, "Job A", dict(name="Trey", status="Start")) pypgq.queue_job(self.da, "Job B") pypgq.queue_job(self.da, "Job C", dict(name="Julie", status="In Progress"), "tenant/1") pypgq.queue_job(self.da, "Job D", None, "tenant/1") pypgq.queue_job(self.da, "Job E", None, "tenant/2") self.assertEqual(self._table_counts(), (5, 0, 2)) def test_schedule_job(self): self.assertEqual(self._table_counts(), (0, 0, 0)) pypgq.queue_job(self.da, "Job A", dict(name="Trey", status="Start"), scheduled_at=self._get_ts()) pypgq.queue_job(self.da, "Job B", scheduled_at=self._get_ts()) pypgq.queue_job(self.da, "Job C", dict(name="Julie", status="In Progress"), "tenant/1", scheduled_at=self._get_ts()) pypgq.queue_job(self.da, "Job D", None, "tenant/1", scheduled_at=self._get_ts(2)) pypgq.queue_job(self.da, "Job E", None, "tenant/2", scheduled_at=self._get_ts(2)) self.assertEqual(self._table_counts(), (0, 5, 2)) queue = pypgq.Queue(self.da, schedule_frequency=1) self.assertEqual(queue._schedule_jobs(), 3) self.assertEqual(self._table_counts(), (3, 2, 2)) sleep(2) self.assertEqual(queue._schedule_jobs(), 2) self.assertEqual(self._table_counts(), (5, 0, 2)) self.assertIsNone(queue._schedule_jobs()) def test_run_jobs(self): qda = ModelAccess(get_pg_core(CONNECTION_STRING)).open(autocommit=True) queue = pypgq.Queue( qda, worker_count=2, cooperative=None)#pypgq.Cooperative(pypgq.Cooperative.advisory_lock, (1, 1))) queue._sleep_time = 1.0 queue.add_handler("sleep_job", sleep_job) queue.add_handler("exception_job", exception_job) self.assertEqual(queue.status(), {"waiting_jobs": 0, "running_jobs": 0, "running_job_ids": (), "completed_jobs": 0, "stop_mode": pypgq.StopMode.never}) j1, _ = pypgq.queue_job(self.da, "sleep_job", {"seconds": 5}) j2, _ = pypgq.queue_job(self.da, "sleep_job", {"seconds": 5}) j3, _ = pypgq.queue_job(self.da, "exception_job", {"seconds": 5}) sqt = Thread(target=queue.start) sqt.start() # NOTE: this is kind of tricy. We need to wait for the right intervals to check the status, and # it's not easy to get right. The better result would be to have some kind of event system # on the queue to call functions when it starts, stops, etc. jobs. sleep(2.5) self.assertEqual(queue.status(), {"waiting_jobs": 1, "running_jobs": 2, "running_job_ids": (j1.id, j2.id), "completed_jobs": 0, "stop_mode": pypgq.StopMode.never}) sleep(3.5) self.assertEqual(queue.status(), {"waiting_jobs": 0, "running_jobs": 1, "running_job_ids": (j3.id, ), "completed_jobs": 2, "stop_mode": pypgq.StopMode.never}) sleep(5.0) self.assertEqual(queue.status(), {"waiting_jobs": 0, "running_jobs": 0, "running_job_ids": (), "completed_jobs": 3, "stop_mode": pypgq.StopMode.never}) queue.stop(None, pypgq.StopMode.when_all_done) sqt.join() def test_job_serialization(self): qda = ModelAccess(get_pg_core(CONNECTION_STRING)).open(autocommit=True) queue = pypgq.Queue(qda, worker_count=2) queue._sleep_time = 1.0 queue.add_handler("sleep_job", sleep_job) queue.add_handler("exception_job", exception_job) j1, _ = pypgq.queue_job(self.da, "sleep_job", {"seconds": 3}, "serialize") j2, _ = pypgq.queue_job(self.da, "sleep_job", {"seconds": 3}, "serialize") sqt = Thread(target=queue.start) sqt.start() sleep(1.5) self.assertEqual(queue.status(), {"waiting_jobs": 1, "running_jobs": 1, "running_job_ids": (j1.id, ), "completed_jobs": 0, "stop_mode": pypgq.StopMode.never}) sleep(3.0) self.assertEqual(queue.status(), {"waiting_jobs": 0, "running_jobs": 1, "running_job_ids": (j2.id, ), "completed_jobs": 1, "stop_mode": pypgq.StopMode.never}) sleep(2.0) self.assertEqual(queue.status(), {"waiting_jobs": 0, "running_jobs": 0, "running_job_ids": (), "completed_jobs": 2, "stop_mode": pypgq.StopMode.never}) queue.stop(None, pypgq.StopMode.when_all_done) sqt.join()
def __init__(self, data_access_core): """Initialize the TransferTracker class.""" self.data_access = ModelAccess(data_access_core)
class TransferTracker(object): """The TransferTracker class keeps track of data being transferred from one database or table to another. Specifically, it makes it possible to record the old and new ids for a relation, and query for the new ID. """ def __init__(self, data_access_core): """Initialize the TransferTracker class.""" self.data_access = ModelAccess(data_access_core) def open(self, autocommit=False): """Call-through to data_access.open.""" self.data_access.open(autocommit=autocommit) return self def close(self, commit=True): """Call-through to data_access.close.""" self.data_access.close(commit=commit) return self def commit(self): """Call-through to data_access.commit.""" self.data_access.commit() return self def rollback(self): """Call-through to data_access.rollback.""" self.data_access.rollback() return self def setup(self): """Creates the default relations and transfers tables. The SQL used may not work on all databases. (It was written for SQLite3) """ cmds = [ """ create table if not exists relations ( id integer not null primary key, name text not null unique, completed_at datetime ); """, """ create table if not exists transfers ( relation_id integer not null references relations (id) on delete cascade, old_id text not null, new_id text, primary key (relation_id, old_id) ); """, """ create index if not exists transfers_relation_id_idx on transfers (relation_id); """ ] for cmd in cmds: self.data_access.execute(cmd) self.data_access.commit() return self def cleanup(self): """Cleanup the database, calling vacuum and analyze.""" self.data_access.execute("vacuum") self.data_access.execute("analyze") def reset(self, relation_name=None): """Reset the transfer info for a particular relation, or if none is given, for all relations. """ if relation_name is not None: self.data_access.delete("relations", dict(name=relation_name)) else: self.data_access.delete("relations", "1=1") return self def start_transfer(self, relation_name): """Write records to the data source indicating that a transfer has been started for a particular relation. """ self.reset(relation_name) relation = Relation(name=relation_name) self.data_access.insert_model(relation) return relation def register_transfer(self, relation, old_id, new_id): """Register the old and new ids for a particular record in a relation.""" transfer = Transfer(relation_id=relation.id, old_id=old_id, new_id=new_id) self.data_access.insert_model(transfer) return transfer def complete_transfer(self, relation, cleanup=True): """Write records to the data source indicating that a transfer has been completed for a particular relation. """ relation.completed_at = utc_now().isoformat() self.data_access.update_model(relation) if cleanup: self.cleanup() return relation def is_transfer_complete(self, relation_name): """Checks to see if a tansfer has been completed.""" phold = self.data_access.sql_writer.to_placeholder() return self.data_access.find_model( Relation, ("name = {0} and completed_at is not null".format(phold), [relation_name])) is not None def get_new_id(self, relation_name, old_id, strict=False): """Given a relation name and its old ID, get the new ID for a relation. If strict is true, an error is thrown if no record is found for the relation and old ID. """ record = self.data_access.find( "relations as r inner join transfers as t on r.id = t.relation_id", (("r.name", relation_name), ("t.old_id", old_id)), columns="new_id") if record: return record[0] else: if strict: raise KeyError("{0} with id {1} not found".format(relation_name, old_id)) def id_getter(self, relation_name, strict=False): """Returns a function that accepts an old_id and returns the new ID for the enclosed relation name.""" def get_id(old_id): """Get the new ID for the enclosed relation, given an old ID.""" return self.get_new_id(relation_name, old_id, strict) return get_id