def __init__(self): super().__init__() if Path(".env").exists(): self.config = Configuration.load_env_file(".env") else: self.config = Configuration() self._setup_logging(None) self.queue = JobQueue( name="default_queue", queue_file=self.config.queue_file, completed_limit=self.config.completed_limit, config=self.config, ) self.queue._start_manager_thread() self.queue.log = self.log self.log.info(f"Starting up LoQuTuS server - {VERSION}") self.pool = None self.__start_workers(self.config.nworkers) self.log.info(f"Visit {self.config.url}/qstatus to view the queue status") if self.config.resume_on_start_up and Path(self.config.queue_file).exists(): self.log.info("Attempting to resume queue") self.queue.load()
import os from pathlib import Path import requests import click import ujson from lqts.schema import JobSpec, JobID from .click_ext import OptionNargs import lqts.environment from lqts.config import Configuration if Path(".env").exists(): config = Configuration.load_env_file(".env") else: config = Configuration() def encode_path(p): return p.replace("\\", "/") @click.command("qsub") @click.argument("command", nargs=1) @click.argument("args", nargs=-1) @click.option("--priority", default=1, type=int) @click.option("--logfile", default="", type=str, help="Name of log file") @click.option( "--log", is_flag=True, default=False,
class JobQueue(BaseModel): name: str = "default" queue_file: str = "" completed_limit: int = 500 queued_jobs: Dict[JobID, Job] = {} running_jobs: Dict[JobID, Job] = {} completed_jobs: Dict[JobID, Job] = {} pruned_jobs: Dict[JobID, Job] = {} job_groups: Dict[int, JobGroup] = {} # deleted_jobs: List[Job] = [] next_group_number: int = 1 last_changed: datetime = datetime.now() is_dirty: bool = False flags: list = [] log: Any = None config: Configuration = Configuration() # def __post_init__(self): # self._last_save = datetime(year=1995) # def __init__(self, *args, start_manager_thread=False, **kwargs): # super().__init__(*args, **kwargs) # if start_manager_thread: # self._start_manager_thread() # def __post_init__(self): # if start_manager_thread: # self._start_manager_thread() # LOGGER = def start_up(self): self._start_manager_thread() def on_queue_change(self, *args, **kwargs): """ Calls this when the queue has changed """ self.last_changed = datetime.now() self.is_dirty = True def get_job_group(self, group_id: int) -> List[Job]: # pass return [ job for job in itertools.chain(self.running_jobs.values(), self.queued_jobs.values()) if job.job_id.group == group_id ] def find_job(self, job_id: JobID) -> (Job, "JobQueue"): """ Looks for a job in the queued and running jobs """ if job_id in self.queued_jobs: return self.queued_jobs[job_id], self.queued_jobs elif job_id in self.running_jobs: return self.running_jobs[job_id], self.running_jobs # elif job_id in self.completed_jobs: # return self.completed_jobs[job_id] return None, None def submit(self, job_specs: List[JobSpec]) -> List[JobID]: global LOGGER # job_ids = [] group = JobGroup(group_number=self.next_group_number) self.job_groups[group.group_number] = group self.next_group_number += 1 for job_spec in job_specs: job_id = group.next_job_id() # JobID(group=group, index=i) # print(job_id, job_spec) job = Job(job_id=job_id, job_spec=job_spec) group.jobs[job_id] = job job.submitted = datetime.now() self.queued_jobs[job_id] = job if len(job_specs) == 1: LOGGER.info( f"+++ Assimilated job {job.job_id} at {job.submitted.isoformat()} - {job.job_spec.command}" ) elif len(job_specs) > 1: first_job_id, *_, last_job_id = list(group.jobs.keys()) LOGGER.info( f"+++ Assimilated jobs {first_job_id} - {last_job_id} at {group.jobs[first_job_id].submitted.isoformat()}" ) self.on_queue_change() return list(group.jobs.keys()) def running_count(self) -> int: """ Gets the current nuber of running jobs sum (ncores_each_job * njobs). This is the number that is used to determine whether or not to start up another job """ n = 0 for job in self.running_jobs.values(): n += job.job_spec.cores return n def next_job(self) -> Job: """ Gets the next runnable job """ if not self.queued_jobs: return None for job in sorted(self.queued_jobs.values()): return job def on_job_started(self, started_job: Job): """ Call this when a job is about to start """ job = self.queued_jobs.pop(started_job.job_id) job.status = JobStatus.Running job.started = datetime.now() self.running_jobs[job.job_id] = job self.on_queue_change() if self.log is not None: self.log.info( f">>> Started job {job.job_id} at {job.started.isoformat()}" ) def on_job_finished(self, completed_job: Job): """ Call this when a job is done """ try: job = self.running_jobs.pop(completed_job.job_id) job.status = completed_job.status job.completed = completed_job.completed self.completed_jobs[job.job_id] = job if self.log is not None: self.log.info( f"--- Completed job {job.job_id} at {job.completed.isoformat()}" ) except KeyError: pass self.on_queue_change() def check_can_job_run(self, job_id: JobID) -> bool: """ Checks to see if a job is able to run. Three conditions must be met: 1. The job must be in self.queued_jobs 2. No dependencies must be in self.queued_jobs 3. No depencencies must be in self.running_jobs """ if job_id not in self.queued_jobs: return False job = self.queued_jobs[job_id] waiting_on: List[JobID] = [ id_ for id_ in job.job_spec.depends if ((id_ in self.running_jobs) or (id_ in self.queued_jobs)) ] if len(waiting_on) > 0: if DEBUG: print(f">w<{job.job_id} waiting on running jobs: {waiting_on}") return False else: return True def prune(self): """ Keeps the list of completed jobs to a defined size """ # completed_limit = DEFAULT_CONFIG.prune_job_limt completed_jobs = len(self.completed_jobs) if completed_jobs < self.completed_limit: return prune_count = completed_jobs - int(self.completed_limit / 2) self.pruned_jobs = {} for ij, job in enumerate(list(self.completed_jobs.values())): if ij >= prune_count: return self.completed_jobs.pop(job.job_id) self.pruned_jobs[job.job_id] = job self.on_queue_change() def _runloop(self): """ This is the loop that manages getting job completetions, taking care of the sub-processes and keeping the queue moving """ import time while True: self.prune() if self.is_dirty: self.save() for __ in range(15): time.sleep(2) if "abort" in self.flags: self.flags.remove("abort") return def _start_manager_thread(self): """ Starts the thread that manages the process pool Returns ------- t: threading.Thread The management thread """ import threading t = threading.Thread(target=self._runloop) t.start() return t def shutdown(self): self.flags.append("abort") def save(self): with open(self.queue_file, "w") as fid: fid.write("[running_jobs]\n") for job_id, job in self.running_jobs.items(): fid.write(f"{job_id}: {job.json()}\n") fid.write("[queued_jobs]\n") for job_id, job in self.queued_jobs.items(): fid.write(f"{job_id}: {job.json()}\n") fid.write("[completed_jobs]\n") for job_id, job in self.completed_jobs.items(): fid.write(f"{job_id}: {job.json()}\n") self.is_dirty = False def load(self): max_job_group = 0 with open(self.queue_file, "r") as fid: reading_queue = self.running_jobs was_running = False for line in fid: if "[running_jobs]" in line: reading_queue = self.queued_jobs was_running = True elif "[queued_jobs]" in line: reading_queue = self.queued_jobs was_running = False elif "[completed_jobs]" in line: reading_queue = self.completed_jobs was_running = False else: *_, str_job = line.partition(":") job = Job.parse_raw(str_job) if was_running: job.status = JobStatus.Queued max_job_group = max(job.job_id.group, max_job_group) reading_queue[job.job_id] = job self.is_dirty = False self.next_group_number = max_job_group + 1 @property def all_jobs(self): return list( itertools.chain( self.running_jobs.values(), self.queued_jobs.values(), self.completed_jobs.values(), )) def pop_job(self, job: Job, queue: Dict[JobID, Job]) -> Job: if job is None: return None else: queue.pop(job.job_id) job.status = JobStatus.Deleted job.completed = datetime.now() self.completed_jobs[job.job_id] = job return job def qdel(self, job_ids: List[JobID]) -> List[JobID]: """ Delete on or more jobs """ deleted_job_ids = [] for job_id in list(job_ids): if job_id.index is None: for job_id2 in self.job_groups[job_id.group]: job, queue = self.find_job(job_id2) if job is not None: job = self.pop_job(job, queue) deleted_job_ids.append(job.job_id) else: job, queue = self.find_job(job_id) if job is not None: job = self.pop_job(job, queue) deleted_job_ids.append(job.job_id) self.on_queue_change() return deleted_job_ids def clear(self): for job_id in list(self.running_jobs.keys()): job = self.running_jobs.pop(job_id) job.status = JobStatus.Deleted self.completed_jobs[job_id] = job