def rescue_exp(self, central_db_obj, sched_db_obj, trace_id=None): """Retrieves the job trace from the database of an experiment worker and stores it in the central db. Args: - central_db_obj: DB object configured to access the analysis database. - sched_db_obj: DB object configured to access the slurm database of an experiment worker. - trace_id: trace_id of the experiment to which the rescued trace corresponds. """ there_are_more=True while there_are_more: ed = ExperimentDefinition() if trace_id: ed.load(central_db_obj, trace_id) ed.mark_simulation_done(central_db_obj) else: there_are_more = ed.load_next_state("simulation_failed", "simulation_done") if there_are_more: print(("About to run resque({0}):{1}".format( ed._trace_id, ed._name))) er = ExperimentRunner(ed) if(er.check_trace_and_store(sched_db_obj, central_db_obj)): er.clean_trace_file() print(("Exp({0}) Done".format( ed._trace_id))) else: print(("Exp({0}) Error!".format( ed._trace_id))) if trace_id: break
print("Reseting experiments in state {0}".format(state)) there_are_more = True while (there_are_more): ed = ExperimentDefinition() if trace_id is not None: ed.load(central_db_obj, trace_id) if ed._work_state!=state: print("Error, unexpected state {0} for trace {1}".format( ed._work_state, trace_id)) exit() ed.upate_state(central_db_obj, new_state) there_are_more=False else: there_are_more=ed.load_next_state(central_db_obj, state, new_state) if ed._trace_id!=None: print("Reset experiment({0}: {1}): {2} -> {3}".format(ed._trace_id, ed._name, state, new_state)) if new_state == "fresh": ed.del_trace(central_db_obj) ed.update_worker(central_db_obj,"") ed.reset_simulating_time(central_db_obj) if new_state in ["fresh", "simulation_complete", "simulation_done", "pending"]: print("A resetear") ed.del_results(central_db_obj) if new_state in ["analysis_done"]: ed.del_results_like(central_db_obj)