Exemple #1
0
def start_queue(
    connection_string,
    job_filepath,
    schema_name="public",
    workers=10,
    cooperative=None,
    schedule_frequency=None,
    job_args=None,
    children=None,
):
    """Creates, configures and runs a job queue.

  :param connection_string: a Postgres connection string
  :param job_filepath: the path to a python script that can configure the queue
  :param schema_name: the name of the schema that contains the queue tables
  :param workers: the number of concurrent workers to run
  """
    model_access = ModelAccess(get_pg_core(connection_string), search_path=schema_name)
    model_access.open(autocommit=True)
    queue = Queue(model_access, worker_count=workers, cooperative=cooperative, schedule_frequency=schedule_frequency)
    job_module = load_module(job_filepath, None)
    job_module.setup_jobs(queue, job_args)

    def stop(sig, _):
        """Stops the queue in the manner specified by the signal.

    :param sig: the signal receieved
    """
        queue.stop(stop_mode=STOP_SIGNALS[sig])
        if children:
            for pid in children:
                os.kill(pid, sig)
                try:
                    os.waitpid(pid, 0)
                except ChildProcessError:
                    # Child already shut down before we started waiting on it.
                    pass

    for sig in STOP_SIGNALS:
        signal.signal(sig, stop)
    signal.signal(signal.SIGINFO, lambda n, f: print(queue.status(), file=sys.stderr))

    log_queue_info(job_filepath, workers)

    queue.start()
Exemple #2
0
 def setUp(self):
   self.da = ModelAccess(get_pg_core(CONNECTION_STRING)).open(autocommit=True)
   self._clean_tables()
Exemple #3
0
class PyPGQueueTestCase(TestCase):
  def setUp(self):
    self.da = ModelAccess(get_pg_core(CONNECTION_STRING)).open(autocommit=True)
    self._clean_tables()

  def tearDown(self):
    self._clean_tables()

  def _clean_tables(self):
    self.da.update(M.SerializationKey.table_name, dict(active_job_id=None))
    self.da.delete(M.Job.table_name)
    self.da.delete(M.ScheduledJob.table_name)
    self.da.delete(M.SerializationKey.table_name)

  def _table_counts(self):
    return (self.da.count(M.Job.table_name),
            self.da.count(M.ScheduledJob.table_name),
            self.da.count(M.SerializationKey.table_name))

  def _get_ts(self, seconds_in_future=0):
    return utc_now() + timedelta(seconds=seconds_in_future)

  def test_queue_job(self):
    self.assertEqual(self._table_counts(), (0, 0, 0))
    pypgq.queue_job(self.da, "Job A", dict(name="Trey", status="Start"))
    pypgq.queue_job(self.da, "Job B")
    pypgq.queue_job(self.da, "Job C", dict(name="Julie", status="In Progress"), "tenant/1")
    pypgq.queue_job(self.da, "Job D", None, "tenant/1")
    pypgq.queue_job(self.da, "Job E", None, "tenant/2")
    self.assertEqual(self._table_counts(), (5, 0, 2))

  def test_schedule_job(self):
    self.assertEqual(self._table_counts(), (0, 0, 0))
    pypgq.queue_job(self.da, "Job A", dict(name="Trey", status="Start"), scheduled_at=self._get_ts())
    pypgq.queue_job(self.da, "Job B", scheduled_at=self._get_ts())
    pypgq.queue_job(self.da, "Job C", dict(name="Julie", status="In Progress"), "tenant/1", scheduled_at=self._get_ts())
    pypgq.queue_job(self.da, "Job D", None, "tenant/1", scheduled_at=self._get_ts(2))
    pypgq.queue_job(self.da, "Job E", None, "tenant/2", scheduled_at=self._get_ts(2))
    self.assertEqual(self._table_counts(), (0, 5, 2))

    queue = pypgq.Queue(self.da, schedule_frequency=1)
    self.assertEqual(queue._schedule_jobs(), 3)
    self.assertEqual(self._table_counts(), (3, 2, 2))
    sleep(2)
    self.assertEqual(queue._schedule_jobs(), 2)
    self.assertEqual(self._table_counts(), (5, 0, 2))
    self.assertIsNone(queue._schedule_jobs())

  def test_run_jobs(self):
    qda = ModelAccess(get_pg_core(CONNECTION_STRING)).open(autocommit=True)
    queue = pypgq.Queue(
      qda,
      worker_count=2,
      cooperative=None)#pypgq.Cooperative(pypgq.Cooperative.advisory_lock, (1, 1)))
    queue._sleep_time = 1.0
    queue.add_handler("sleep_job", sleep_job)
    queue.add_handler("exception_job", exception_job)

    self.assertEqual(queue.status(),
                     {"waiting_jobs": 0,
                      "running_jobs": 0,
                      "running_job_ids": (),
                      "completed_jobs": 0,
                      "stop_mode": pypgq.StopMode.never})

    j1, _ = pypgq.queue_job(self.da, "sleep_job", {"seconds": 5})
    j2, _ = pypgq.queue_job(self.da, "sleep_job", {"seconds": 5})
    j3, _ = pypgq.queue_job(self.da, "exception_job", {"seconds": 5})

    sqt = Thread(target=queue.start)
    sqt.start()

    # NOTE: this is kind of tricy. We need to wait for the right intervals to check the status, and
    #       it's not easy to get right. The better result would be to have some kind of event system
    #       on the queue to call functions when it starts, stops, etc. jobs.
    sleep(2.5)
    self.assertEqual(queue.status(),
                     {"waiting_jobs": 1,
                      "running_jobs": 2,
                      "running_job_ids": (j1.id, j2.id),
                      "completed_jobs": 0,
                      "stop_mode": pypgq.StopMode.never})
    sleep(3.5)
    self.assertEqual(queue.status(),
                     {"waiting_jobs": 0,
                      "running_jobs": 1,
                      "running_job_ids": (j3.id, ),
                      "completed_jobs": 2,
                      "stop_mode": pypgq.StopMode.never})
    sleep(5.0)
    self.assertEqual(queue.status(),
                     {"waiting_jobs": 0,
                      "running_jobs": 0,
                      "running_job_ids": (),
                      "completed_jobs": 3,
                      "stop_mode": pypgq.StopMode.never})

    queue.stop(None, pypgq.StopMode.when_all_done)
    sqt.join()

  def test_job_serialization(self):
    qda = ModelAccess(get_pg_core(CONNECTION_STRING)).open(autocommit=True)
    queue = pypgq.Queue(qda, worker_count=2)
    queue._sleep_time = 1.0
    queue.add_handler("sleep_job", sleep_job)
    queue.add_handler("exception_job", exception_job)

    j1, _ = pypgq.queue_job(self.da, "sleep_job", {"seconds": 3}, "serialize")
    j2, _ = pypgq.queue_job(self.da, "sleep_job", {"seconds": 3}, "serialize")

    sqt = Thread(target=queue.start)
    sqt.start()

    sleep(1.5)
    self.assertEqual(queue.status(),
                     {"waiting_jobs": 1,
                      "running_jobs": 1,
                      "running_job_ids": (j1.id, ),
                      "completed_jobs": 0,
                      "stop_mode": pypgq.StopMode.never})
    sleep(3.0)
    self.assertEqual(queue.status(),
                     {"waiting_jobs": 0,
                     "running_jobs": 1,
                     "running_job_ids": (j2.id, ),
                     "completed_jobs": 1,
                     "stop_mode": pypgq.StopMode.never})
    sleep(2.0)
    self.assertEqual(queue.status(),
                     {"waiting_jobs": 0,
                      "running_jobs": 0,
                      "running_job_ids": (),
                      "completed_jobs": 2,
                      "stop_mode": pypgq.StopMode.never})

    queue.stop(None, pypgq.StopMode.when_all_done)
    sqt.join()
Exemple #4
0
 def __init__(self, data_access_core):
   """Initialize the TransferTracker class."""
   self.data_access = ModelAccess(data_access_core)
Exemple #5
0
class TransferTracker(object):
  """The TransferTracker class keeps track of data being transferred from one database or table to
  another. Specifically, it makes it possible to record the old and new ids for a relation, and
  query for the new ID.
  """
  def __init__(self, data_access_core):
    """Initialize the TransferTracker class."""
    self.data_access = ModelAccess(data_access_core)

  def open(self, autocommit=False):
    """Call-through to data_access.open."""
    self.data_access.open(autocommit=autocommit)
    return self

  def close(self, commit=True):
    """Call-through to data_access.close."""
    self.data_access.close(commit=commit)
    return self

  def commit(self):
    """Call-through to data_access.commit."""
    self.data_access.commit()
    return self

  def rollback(self):
    """Call-through to data_access.rollback."""
    self.data_access.rollback()
    return self

  def setup(self):
    """Creates the default relations and transfers tables. The SQL used may not work on all
    databases. (It was written for SQLite3)
    """
    cmds = [
      """
        create table if not exists relations (
          id integer not null primary key,
          name text not null unique,
          completed_at datetime
        );
      """,
      """
        create table if not exists transfers (
          relation_id integer not null references relations (id) on delete cascade,
          old_id text not null,
          new_id text,
          primary key (relation_id, old_id)
        );
      """,
      """
        create index if not exists transfers_relation_id_idx on transfers (relation_id);
      """
    ]

    for cmd in cmds:
      self.data_access.execute(cmd)
    self.data_access.commit()
    return self

  def cleanup(self):
    """Cleanup the database, calling vacuum and analyze."""
    self.data_access.execute("vacuum")
    self.data_access.execute("analyze")

  def reset(self, relation_name=None):
    """Reset the transfer info for a particular relation, or if none is given, for all relations.
    """
    if relation_name is not None:
      self.data_access.delete("relations", dict(name=relation_name))
    else:
      self.data_access.delete("relations", "1=1")
    return self

  def start_transfer(self, relation_name):
    """Write records to the data source indicating that a transfer has been started for a particular
    relation.
    """
    self.reset(relation_name)
    relation = Relation(name=relation_name)
    self.data_access.insert_model(relation)
    return relation

  def register_transfer(self, relation, old_id, new_id):
    """Register the old and new ids for a particular record in a relation."""
    transfer = Transfer(relation_id=relation.id, old_id=old_id, new_id=new_id)
    self.data_access.insert_model(transfer)
    return transfer

  def complete_transfer(self, relation, cleanup=True):
    """Write records to the data source indicating that a transfer has been completed for a
    particular relation.
    """
    relation.completed_at = utc_now().isoformat()
    self.data_access.update_model(relation)
    if cleanup:
      self.cleanup()
    return relation

  def is_transfer_complete(self, relation_name):
    """Checks to see if a tansfer has been completed."""
    phold = self.data_access.sql_writer.to_placeholder()
    return self.data_access.find_model(
      Relation,
      ("name = {0} and completed_at is not null".format(phold), [relation_name])) is not None

  def get_new_id(self, relation_name, old_id, strict=False):
    """Given a relation name and its old ID, get the new ID for a relation. If strict is true, an
    error is thrown if no record is found for the relation and old ID.
    """
    record = self.data_access.find(
      "relations as r inner join transfers as t on r.id = t.relation_id",
      (("r.name", relation_name), ("t.old_id", old_id)),
      columns="new_id")
    if record:
      return record[0]
    else:
      if strict:
        raise KeyError("{0} with id {1} not found".format(relation_name, old_id))

  def id_getter(self, relation_name, strict=False):
    """Returns a function that accepts an old_id and returns the new ID for the enclosed relation
    name."""
    def get_id(old_id):
      """Get the new ID for the enclosed relation, given an old ID."""
      return self.get_new_id(relation_name, old_id, strict)
    return get_id