def dist_train(trainer, paddle_job): if os.getenv("RUNNING_ON_CLOUD", "NO") == "NO": job_manager = JobManager(paddle_job) if not job_manager.submit(): print "submit Paddle Job failed." else: print "submit Paddle Job successed." else: trainer()
class GameModel(object): def __init__(self): self.calendar_instance = Calendar() self.nature = Nature(self) self.state = CentralState(self) self.colonist_counter = 0 self.colonistlist = [self.create_colonist("Adam"), self.create_colonist("Berta"), self.create_colonist("Carl"), self.create_colonist("Dorothy"), self.create_colonist("Elmer"), self.create_colonist("Faye"), self.create_colonist("Gordon"), self.create_colonist("Hilma"), self.create_colonist("Igor")] self.buildingID = 1 self.buildingdict = {0: "dummy"} self.buildingdict = {1: House(self, 0, 0, self.colonistlist[0]), 2: House(self, 0, 1, self.colonistlist[1]), 3: House(self, 0, 2, self.colonistlist[2]), 4: House(self, 0, 3, self.colonistlist[3]), 5: House(self, 0, 4, self.colonistlist[4]), 6: House(self, 0, 5, self.colonistlist[5]), 7: House(self, 0, 6, self.colonistlist[6]), 8: House(self, 0, 7, self.colonistlist[7]), 9: House(self, 0, 8, self.colonistlist[8])} self.tradingpost = TradingPost(self, 0, 9) self.pricelist = {"food": 2, "clothing": 2} self.workplacelist = [Farm(self, 0, 10, self.state, 1)] # List of Farm objects self.jm = JobManager(self.state, self.colonistlist, self.workplacelist) def create_colonist(self, name): self.colonist_counter += 1 colonist = Colonist(self, name, self.colonist_counter) return colonist def get_building_ID(self): self.buildingID += 1 return self.buildingID def resolve_turn_phase(self): # forward time self.calendar_instance.increase_time() # determine AI actions for workplace in self.workplacelist: workplace.initialize_production() for colonist in self.colonistlist: colonist.update_colonist() self.jm.update_jobmanager(self.colonistlist, self.workplacelist) for workplace in self.workplacelist: workplace.update_workplace()
def trigger_job(instances_db, job_id, jobs_db, botleague_liaison_host, docker_tag=None, job_type=JOB_TYPE_EVAL): docker_tag = docker_tag or 'deepdriveio/problem-worker-test' eval_mgr = JobManager(jobs_db=jobs_db, instances_db=instances_db) eval_mgr.check_for_finished_jobs() test_job = Box(botleague_liaison_host=botleague_liaison_host, status=JOB_STATUS_CREATED, id=job_id, job_type=job_type, eval_spec=Box(docker_tag=docker_tag, eval_id=utils.generate_rand_alphanumeric(32), eval_key='fake_eval_key', seed=1, problem='domain_randomization', pull_request=None, max_seconds=20)) # Make a copy of prod instances prod_instances_db = get_worker_instances_db(force_firestore_db=True) for inst in prod_instances_db.where('id', '>', ''): instances_db.set(inst.id, inst) try: eval_mgr.jobs_db.set(job_id, test_job) new_jobs, exceptions = eval_mgr.assign_jobs() assert not exceptions if new_jobs: # We don't actually start instances but we act like we did. assert new_jobs[0].status == JOB_STATUS_CREATED or \ new_jobs[0].instance_id if 'instance_id' in new_jobs[0]: instance_meta = eval_mgr.instances_db.get( new_jobs[0].instance_id) # So we have real instance meta, but inserted the job into a # test collection that the instance is not watching. # So the job will not actually run. assert instance_meta.status == INSTANCE_STATUS_USED else: log.warning('Test did not find an instance to run. TODO: use' ' test instance data.') finally: if jobs_db is not None: jobs_db.delete_all_test_data() if instances_db is not None: instances_db.delete_all_test_data()
def main(): job_manager = JobManager() if '--check-for-finished-jobs' in sys.argv: job_manager.check_for_finished_jobs() def loop_fn(): ping_cronitor('run') # ci_mgr.run() job_manager.run() ping_cronitor('complete') SingletonLoop(loop_name=constants.JOB_LOOP_ID, fn=loop_fn).run()
def __init__(self): self.calendar_instance = Calendar() self.nature = Nature(self) self.state = CentralState(self) self.colonist_counter = 0 self.colonistlist = [self.create_colonist("Adam"), self.create_colonist("Berta"), self.create_colonist("Carl"), self.create_colonist("Dorothy"), self.create_colonist("Elmer"), self.create_colonist("Faye"), self.create_colonist("Gordon"), self.create_colonist("Hilma"), self.create_colonist("Igor")] self.buildingID = 1 self.buildingdict = {0: "dummy"} self.buildingdict = {1: House(self, 0, 0, self.colonistlist[0]), 2: House(self, 0, 1, self.colonistlist[1]), 3: House(self, 0, 2, self.colonistlist[2]), 4: House(self, 0, 3, self.colonistlist[3]), 5: House(self, 0, 4, self.colonistlist[4]), 6: House(self, 0, 5, self.colonistlist[5]), 7: House(self, 0, 6, self.colonistlist[6]), 8: House(self, 0, 7, self.colonistlist[7]), 9: House(self, 0, 8, self.colonistlist[8])} self.tradingpost = TradingPost(self, 0, 9) self.pricelist = {"food": 2, "clothing": 2} self.workplacelist = [Farm(self, 0, 10, self.state, 1)] # List of Farm objects self.jm = JobManager(self.state, self.colonistlist, self.workplacelist)
def handle_failed_result(self, task_id): """save the failed status result Args: task_id (str): the result under this task_id """ from job_manager import JobManager logger = Logger().get() logger.debug( f"save_result: task_id: {task_id} status: {Status.Failed}") ResourceManager().save_result(task_id, []) ResourceManager().update_task_status(task_id, Status.Failed) JobManager().finish_task(task_id) try: container = self.id_to_task_container[task_id][1] except Exception as e: logger.info( f"exception for finding container correspoding to {task_id}, status:{Status.Failed}, maybe the container is forced killed before, {e}" ) else: self.id_to_task_container.pop(task_id, None) container.stop() container.remove() logger.debug( f"for task:{task_id}, can't run cmd normally, exit normally")
def add_job(self, job_dict): job = models.StitchJob() # TODO: job.id is the same as file_id job.id = str(time.time()) job.src_filename = str(job_dict.get("src_filename", "")) job.src_file_id = "" job.dst_dir = str(job_dict.get("dst_dir", "")) job.dst_format = str(job_dict.get("dst_format", "flv")) job.segments = str(job_dict.get("segments", "")) job.map_filename = str(job_dict.get("map_filename", "")) job.map_file_id = "" print "src_filename = %s, dst_dir = %s" % (job.src_filename, job.dst_dir) if job.src_filename == "" or job.dst_dir == "": return None print "add_job, job_id: %s" % (job.id) try: job_db_operator.add(job) job_manager = JobManager(job) self._job_managers.append(job_manager) return job.id except Exception as e: # TODO: maka a response print e.message return None
def init_server(args): global environ global job_manager global measurement_manager base_path = args.config_path modules = args.modules master = args.master modules = ['%s/%s' % (base_path, module) for module in modules] environ = load_environ('%s/config.json' % (base_path), modules) if master != '': environ['master'] = master job_manager = JobManager(environ) job_manager.register_completion_cb(job_completed) measurement_manager = MeasurementManager(environ)
def post(self, serviceName): importDataDict = parserClass.parsePostParameters() channelName = importDataDict.get('channelName') getServiceIdByName(serviceName) getChannelByName(serviceName, channelName) job = jobClass(importFunction, importDataDict.get('channelName'), importDataDict.get('openDataUrl'), importDataDict, serviceName) return JobManager.startJob(job)
def valid(self): iface.removeDockWidget(self.ui) sqlite = execFileDialog("*.sqlite", "Créer une couche de sortie", "save") wk_lyr = JobManager().create_carhab_lyr(sqlite) for lyr in wk_lyr.getQgisLayers(): if lyr.name().endswith("_polygon"): this_lyr = lyr iface.mapCanvas().setCurrentLayer(lyr) shp_path = self.ui.findChild(QLineEdit, this_lyr.name().split("_")[-1]).text() lyr = ImportLayer().createQgisVectorLayer(shp_path) # thread = QThread() worker = ImportJob(lyr) # worker.moveToThread(thread) # thread.start() # thread.wait() # worker.run() worker.start() print "worker started" worker.wait() print "finished"
def simulate(cluster_manager): # NUM_NODES = 50 NUM_NODES = 20 # NUM_TASKS = 18000 NUM_JOBS = 40 JOB_ARRIVAL_DURATION = 1000 MACHINE_SPEC = ResourceVec(16, 64, 3000, 5) # a, m = 3., 2. # shape and mode of distribution # durations = np.round((np.random.pareto(a, NUM_TASKS) + 1) * m) # Initialize cluster cluster = cluster_manager() # Add nodes for _ in range(NUM_NODES): cluster.addNode(MACHINE_SPEC) # Create jobs arrival_times = np.sort( np.round( np.random.uniform(low=0, high=JOB_ARRIVAL_DURATION, size=NUM_JOBS))) jobs = [] for i in range(NUM_JOBS): duration = 260 numTasks = 30 taskResources = randomResource(MACHINE_SPEC) jobs.append(JobManager(duration, numTasks, taskResources)) print("{0} {1} {0}".format("=" * 15, cluster.name)) print("Starting simulation.") time = 0 jobIdx = 0 # Run simulation while cluster.hasUncompletedJobs() or jobIdx < len(jobs): while jobIdx < len(jobs) and arrival_times[jobIdx] == time: job = jobs[jobIdx] cluster.assignJob(job) jobIdx += 1 if time % 100 == 0: print(f'{time}s\n{cluster.status()}') cluster.tick() time += 1 print(f'All jobs completed in {time}s.') job_durations = [job.endTime - job.startTime for job in jobs] print(job_durations) print(sum(job_durations) / len(job_durations)) return time
class SchedulerService(rpyc.Service): def __init__(self): self.manager = JobManager() self.base = automap_base() self.engine = create_engine(settings.CONNECTION_STR) self.base.prepare(self.engine, reflect=True) self.Session = sessionmaker(bind=self.engine) def exposed_push(self, job_id): Job = self.base.classes.project_job print('here') with session_scope(self.Session) as session: print('before query') job = session.query(Job).get(job_id) print('after query') for series in job.project_series_collection: print(series.id) self.manager.push(series.id) def exposed_cancel(self, job_id): self.manager.cancel(job_id)
def setUp(self): print("Test with command\n%s\n" % self.command_line_input) print("Submitting job...") now = datetime.now() dummy_job = Job(self.job_name, self.comments, now) test_task = Task(self.location, self.tool_type, self.command_line_input) dummy_job.tasks = [test_task] job_id, tasks_id = JobManager().submit_job(dummy_job) self.job_id = job_id self.tasks_id = tasks_id ms_without_ns = int(now.microsecond / 1000) * 1000 self.now = now.replace(microsecond=ms_without_ns)
def test_GT_1507_class_job_manager(self): importDataDict = { TEST_showImageUrl: TEST_showImageUrl, TEST_showObjectUrl: TEST_showObjectUrl } threadJobObj = ThreadJob(backgroundFunction, TEST_channelName, TEST_openDataUrl, importDataDict, TEST_serviceName) threadJobObj.start() manager = JobManager() jobId = manager.startJob(threadJobObj) self.assertEquals(len(jobId), 12) self.assertEquals(type(manager.getJob(jobId)), dict) manager.stopJob(jobId) self.assertEquals(threadJobObj.done, True) self.assertEquals(type(manager.getJobs()), list) threadJobObj.stop()
def kill_task(self, task_id): """try to kill and remove the container correspoding to the given task_id, if succeed, update the status at RM Args: task_id (Task): The id of the task """ logger = Logger().get() try: container = self.id_to_task_container[task_id][1] container.stop() container.remove() self.id_to_task_container.pop(task_id, None) ResourceManager().update_task_status(task_id, Status.Killed) from job_manager import JobManager JobManager().finish_task(task_id) except Exception as e: logger.error( f"try to kill {task_id}'s container fail, maybe the container is not existed or already killed, exception: {e}" )
def test_GT_1507_class_job_manager(self): importDataDict = { TEST_showImageUrl: TEST_showImageUrl, TEST_showObjectUrl: TEST_showObjectUrl} threadJobObj = ThreadJob( backgroundFunction, TEST_channelName, TEST_openDataUrl, importDataDict, TEST_serviceName) threadJobObj.start() manager = JobManager() jobId = manager.startJob(threadJobObj) self.assertEquals(len(jobId), 12) self.assertEquals(type(manager.getJob(jobId)), dict) manager.stopJob(jobId) self.assertEquals(threadJobObj.done, True) self.assertEquals(type(manager.getJobs()), list) threadJobObj.stop()
def setUp(self): # Use "test" database for unit tests instead of "felucca" self.resource_manager = ResourceManager("test") self.job_manager = JobManager() self.job_manager.db_name = "test"
class TestJobManager(unittest.TestCase): def setUp(self): # Use "test" database for unit tests instead of "felucca" self.resource_manager = ResourceManager("test") self.job_manager = JobManager() self.job_manager.db_name = "test" def test_job_status(self): self.resource_manager.remove_all_jobs_and_tasks() # Create a job with two tasks task_json = { "Files": {}, "Program_Name": "ooanalyzer", "Input_File_Args": { "-f": "oo.exe" }, "Input_Text_Args": { "--timeout": "300" }, "Input_Flag_Args": ["-v"], "Output_File_Args": { "-j": "output.json", "-F": "facts", "-R": "results" } } input_json = { "Job_Name": "dump_job", "Job_Comment": "this is the test json input for job manager", "Tasks": [task_json, task_json, task_json] } job = self.resource_manager.save_new_job_and_tasks(input_json) print(Job.to_json(job)) job_id = job.job_id # Initialize the metadata in JM self.job_manager.initialize_job(job) job = self.resource_manager.get_job_by_id(job_id) self.assertEqual(job.status, Status.Running) # Finish all tasks for task in job.tasks: task_id = task.task_id self.resource_manager.update_task_status(task_id, Status.Successful) self.job_manager.finish_task(task_id) job = self.resource_manager.get_job_by_id(job_id) self.assertEqual(job.status, Status.Finished) def test_kill_job(self): self.resource_manager.remove_all_jobs_and_tasks() # Create a job without tasks input_json = { "Job_Name": "dump_job", "Job_Comment": "this is the test json input for job manager", "Tasks": [] } job = self.resource_manager.save_new_job_and_tasks(input_json) job_id = job.job_id # Initialize the metadata in JM self.job_manager.initialize_job(job) self.job_manager.kill_job(job_id) self.assertEqual(job_id in self.job_manager.job_metadata, False) job = self.resource_manager.save_new_job_and_tasks(input_json) job_id = job.job_id self.job_manager.initialize_job(job) self.job_manager.kill_all_jobs() self.assertEqual(job_id in self.job_manager.job_metadata, False) self.resource_manager.remove_all_jobs_and_tasks()
def submit_job_through_job_manager(job): JobManager().submit_job(job)
import os from job_manager import JobManager from datetime import datetime # Configure logging log_format = "%(asctime)s %(name)8s %(levelname)5s %(message)s" logging.basicConfig( level=logging.INFO, handlers=[logging.FileHandler("test.log"), logging.StreamHandler()], format=log_format, ) logger = logging.getLogger("main") # Instantiate JobManager instance jm = JobManager() class BaseHandler(tornado.web.RequestHandler): def set_default_headers(self): self.set_header("Access-Control-Allow-Origin", "*") self.set_header( "Access-Control-Allow-Headers", "Origin, X-Requested-With, Content-Type, Accept, Authorization") self.set_header("Access-Control-Allow-Methods", " POST, PUT, DELETE, OPTIONS, GET") def options(self): self.set_status(204) self.finish()
def __init__(self, *args, **kwargs): TkListApp.__init__(self, 'MD5 Checksums') JobManager.__init__(self) SingleInstanceApp.__init__(self, 'MD5 Checksums App')
def thread_update_kernel(BASE_IMAGE="seipharos/pharos:latest"): JobManager().kill_all_jobs() ExecutionManager().update_kernel(BASE_IMAGE)
def get_job_list(): """Test command: curl --request GET http://localhost:5000/job-list/json """ job_list = ResourceManager().get_job_list() print(JobManager().job_metadata) return {"Job_List": job_list}
config_file_path, '--metatr_output_type', output_type, '--out_dir', results_object.output_dir ] update_progress(job, 5) run_command(commands, job) update_progress(job, 100) if __name__ == '__main__': job_manager = JobManager(debug_mode=settings.DEBUG) job_manager.add_callback(run_metatranscriptomics, 'metatranscriptomics') job_manager.add_callback(run_metamos, 'default') while True: performed = job_manager.run_queued() if performed: sleep(1) else: # let's be more friendly for server, # if the queue was empty last time. sleep(5)
def get(self, serviceName, jobId): getServiceIdByName(serviceName) return JobManager.getJob(jobId)
for key, value in self.__dict__.items() if not key.startswith('__') and not callable(key) } state_file = open(SERVER_STATE_FILE, 'w') dump(state_variables, state_file, skipkeys=True) state_file.close() # Called on first run of the server to initialise first pull job def init(): # Commented out for front end deployment to AWS job_manager.add_job(datetime.datetime.now(), PULL_LAND_REGISTRY_JOB, '') Base.metadata.create_all(database_engine) job_manager = JobManager(1, 'JobsManagerThread', 1, database_engine) if '-i' in sys.argv: init() server_state = ServerState() external_stylesheets = [''] # Reflect the dataset table from the database Table('core_dataset', Base.metadata, autoload=True, autoload_with=database_engine, keep_existing=False, extend_existing=True)
def handle_successful_result(self, task_id): """save the successful status result Args: task_id (str): the result under this task_id """ from job_manager import JobManager logger = Logger().get() logger.debug( f"save_result: task_id: {task_id} status: {Status.Successful}") try: output_path = self.id_to_task_container[task_id][0].output container = self.id_to_task_container[task_id][1] except Exception as e: logger.info( f"exception for finding container correspoding to {task_id}, status:{Status.Successful} maybe the container is forced killed before, {e}" ) ResourceManager().save_result(task_id, []) ResourceManager().update_task_status(task_id, Status.Killed) else: self.id_to_task_container.pop(task_id, None) try: # get result from container container.exec_run("tar -cvf {}.docker {}".format( task_id, " ".join(output_path))) bits, stat = container.get_archive(f"{task_id}.docker") path = f"/tmp/Felucca/result/{task_id}" if not os.path.exists(path): os.makedirs(path) file = open(f"{path}/{task_id}.tar", "wb+") for chunk in bits: file.write(chunk) file.close() # extract result tar file result_tar = tarfile.open(f"{path}/{task_id}.tar", "r") result_tar.extractall(path) result_tar.close() result_tar = tarfile.open(f"{path}/{task_id}.docker", "r") result_tar.extractall(path) result_tar.close() #delete temp tar file after extraction os.remove(f"{path}/{task_id}.tar") os.remove(f"{path}/{task_id}.docker") logger.debug(f"for task:{task_id} execute and exit normally") except Exception as e: logger.error( f"exception at copying result out of container and extrating the file for {task_id}, {e}" ) ResourceManager().save_result(task_id, []) ResourceManager().update_task_status(task_id, Status.Failed) container.stop() container.remove() else: for index in range(len(output_path)): output_path[index] = os.path.join(path, output_path[index]) print(output_path[index]) ResourceManager().save_result(task_id, output_path) ResourceManager().mark_task_as_finished(task_id) JobManager().finish_task(task_id) container.stop() container.remove()
def get(self, serviceName): getServiceIdByName(serviceName) return JobManager.getJobs()
def kill_job(job_id): JobManager().kill_job(job_id) return {"Status": "ok"}
def __init__(self, *args, **kwargs): super(StrategyAgent.TrainBehaviour, self).__init__() self.job_manager = JobManager( workers=[f'strategy_agent_worker1@{domain}', f'strategy_agent_worker2@{domain}'])
def delete(self, serviceName, jobId): getServiceIdByName(serviceName) return JobManager.stopJob(jobId)
def get_result(): status = request.form['status'] ExecutionManager().save_result(request.form['task_id'], status) JobManager().finish_task(request.form['task_id']) return {'is_received': True}
TITLE_KEYWORDS = [ 'engineer', 'software-engineer', 'dataengineer', 'data-engineer', 'data' ] # jobs must contain one group keyword in job description for every group MUST_HAVE_KEYWORD_GROUPS = [['python', 'python3']] # job will update all nice to have keywords founds NICE_TO_HAVE_KEYWORDS = [ 'pandas', 'webscraping', 'dash', 'scrapy', 'etl', 'pipeline' ] # Starting urls from indeed site, can add job titles, experience level, etc INDEED_STARTING_URLS = [ "https://www.indeed.com/jobs?q=data+engineer&jt=fulltime&explvl=entry_level", "https://www.indeed.com/jobs?q=software+engineer&jt=fulltime&explvl=entry_level", ] # amount of jobs opening at a time JOB_TAB_AMOUNT = 5 # resume you are sending out RESUME = 'V1.00' if __name__ == '__main__': user = JobManager(TITLE_KEYWORDS, MUST_HAVE_KEYWORD_GROUPS, NICE_TO_HAVE_KEYWORDS, INDEED_STARTING_URLS, JOB_TAB_AMOUNT, RESUME) print(user) user.start()
def __init__(self): self.manager = JobManager() self.base = automap_base() self.engine = create_engine(settings.CONNECTION_STR) self.base.prepare(self.engine, reflect=True) self.Session = sessionmaker(bind=self.engine)