def do_process_fetch(workdir, test=False): engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() job = get_job_from_workdir(session, workdir) cluster, shell = clusters.get_cluster(job.cluster_name) print(job) print(job.status, job.name, job.id, job.workdir) #if job.status == "S": return rsync_return_code = cluster.pull(shell, job) assert (rsync_return_code == 0) if test: chemtime, target_chemtime = test_workdir(job) print chemtime, target_chemtime # If the simulation hasn't started yet, skip if not (chemtime and target_chemtime): return # If the target chemtime hasn't been achieved, don't stop if not (chemtime == target_chemtime): return # Stop the simulation if complete return if job.status == "S": return cluster.cancel(shell, job.cluster_id) print("Stopping", job) job.status = "S" # Stopped session.add(job) session.commit()
def process_resubmit(args): engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() if True: print("Checking") jobs = [job for job in session.query(Job).order_by(Job.id) if job.status != "S"] print("Found {} jobs to checkon".format(len(jobs))) for job in jobs : if job.status == "S": continue if args.cluster and not job.cluster_name.lower().startswith(args.cluster): continue print job cluster, shell = clusters.get_cluster(job.cluster_name) if not shell: continue status = cluster.get_status(shell, job) job.status = status print("Before:",status, job.id) if os.path.exists("{}/confout.gro".format(job.local_workdir)): job.status = "S" if job.status == "C": job = cluster.submit(shell, job) print("After:", job.status, job.id) session.add(job) session.commit()
def do_process_fetch(workdir, test=False): engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() job = get_job_from_workdir(session, workdir) cluster, shell = clusters.get_cluster(job.cluster_name) print(job) print(job.status, job.name, job.id, job.workdir) #if job.status == "S": return rsync_return_code = cluster.pull(shell, job) assert(rsync_return_code==0) if test: chemtime, target_chemtime = test_workdir(job) print chemtime, target_chemtime # If the simulation hasn't started yet, skip if not (chemtime and target_chemtime): return # If the target chemtime hasn't been achieved, don't stop if not (chemtime == target_chemtime): return # Stop the simulation if complete return if job.status == "S": return cluster.cancel(shell, job.cluster_id) print("Stopping", job) job.status = "S" # Stopped session.add(job) session.commit()
def process_resubmit(args): engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() if True: print("Checking") jobs = [ job for job in session.query(Job).order_by(Job.id) if job.status != "S" ] print("Found {} jobs to checkon".format(len(jobs))) for job in jobs: if job.status == "S": continue if args.clusters and not job.cluster_name.lower() in args.clusters: continue print job cluster, shell = clusters.get_cluster(job.cluster_name) if not shell: continue status = cluster.get_status(shell, job) job.status = status print("Before:", status, job.id) if os.path.exists("{}/confout.gro".format(job.local_workdir)): job.status = "S" if job.status == "C": job = cluster.submit(shell, job) print("After:", job.status, job.id) session.add(job) session.commit()
def main(): args = parse_args() engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() job = get_job_from_workdir(session, args.workdir) print job cluster, shell = clusters.get_cluster(job.cluster_name) print cluster, shell return_code = cluster.delete(shell, job) assert (return_code == 0)
def main(): args = parse_args() engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() job = get_job_from_workdir(session, args.workdir) print job cluster, shell = clusters.get_cluster(job.cluster_name) print cluster, shell rsync_return_code = cluster.push(shell, job) assert(rsync_return_code==0)
def main(): args = parse_args() engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() for workdir in args.workdir: job = get_job_from_workdir(session, workdir) print job cluster, shell = clusters.get_cluster(job.cluster_name) print cluster, shell cluster.cancel(shell, job) job.status = "S" # Stopped session.add(job) session.commit()
def test_rsync(): f = "data/M2__monomer__charmm36__charmm22stprot__tip3p__strand_bilayer__run_1.tpr" clusters = Clusters() names = clusters.clusters.keys() names = ["jade", "arcus", "biowulf"] for name in names: cluster, shell = clusters.get_cluster(name) print cluster output = "{}.tpr".format(name) cluster.test_push(shell, f, "/tmp/topol.tpr", verbose=False) cluster.test_pull(shell, "/tmp/topol.tpr", output, verbose=False) desired = md5sum(f) actual = md5sum(output) print desired, actual np.testing.assert_equal(actual, desired) print("Testing complete, closing down")
def process_fetch(args): engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() if True: print("Fetching") jobs = [job for job in session.query(Job).order_by(Job.id)] print(dir(jobs[0])) for i, job in enumerate(jobs) : if job.status == "S": continue if not job.cluster_name.lower().startswith(args.cluster): continue print(i+1, len(jobs), job) cluster, shell = clusters.get_cluster(job.cluster_name) if not shell: continue rsync_return_code = cluster.pull(shell, job)
def process_fetch(args): engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() if True: print("Fetching") jobs = [job for job in session.query(Job).order_by(Job.id)] print(dir(jobs[0])) for i, job in enumerate(jobs): if job.status == "S": continue if not job.cluster_name.lower() in args.clusters: continue print(i + 1, len(jobs), job) cluster, shell = clusters.get_cluster(job.cluster_name) if not shell: continue rsync_return_code = cluster.pull(shell, job)
def main(): args = parse_args() engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() for workdir in args.workdir: job = get_job_from_workdir(session, workdir) print(workdir,job) if not args.refresh: continue cluster, shell = clusters.get_cluster(job.cluster_name) print cluster, shell actual, desired = job.status, cluster.get_status(shell, job) print "actual =", actual, "desired =", desired if actual == desired : continue
def main(): args = parse_args() engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() for workdir in args.workdir: job = get_job_from_workdir(session, workdir) print(workdir, job) if not args.refresh: continue cluster, shell = clusters.get_cluster(job.cluster_name) print cluster, shell actual, desired = job.status, cluster.get_status(shell, job) print "actual =", actual, "desired =", desired if actual == desired: continue
def main(): args = parse_args() engine = create_engine(engine_file) Session = sessionmaker(bind=engine) session = Session() clusters = Clusters() for workdir in args.workdir: print(workdir) job = get_job_from_workdir(session, workdir) if not job: continue if not args.dry: cluster, shell = clusters.get_cluster(job.cluster_name) print(cluster, shell) cluster.cancel(shell, job) job.status = "S" # Stopped session.add(job) session.commit()
def main(): args = parse_args() print(args.workdir) engine = create_engine(engine_file) Job.metadata.create_all(engine) Session = sessionmaker(bind=engine) session = Session() if args.workdir == [] and os.path.exists("workdir"): args.workdir = ["workdir"] # Restore an existing workdir if len(args.workdir): for workdir in args.workdir: job = get_job_from_workdir(session, workdir) print job if job.status == "Q" or job.status == "R" or job.status == "PD": print("Job already running or queued") return cluster, shell = Clusters().get_cluster(job.cluster_name) job = cluster.submit(shell, job, duration=args.duration, nodes=args.nodes, partition=args.partition, ntasks_per_node=args.ntasks_per_node) status = cluster.get_status(shell, job) job.status = status if args.partition and job.partition != args.partition: job.partition = args.partition if args.nodes and job.nodes != args.nodes: job.nodes = args.nodes session.add(job) session.commit() # Create a brand-new workdir else: cluster, shell = Clusters().get_cluster(args.cluster) job = create(args.topol, cluster, shell, args.jobname, args.duration, args.nodes, args.processes, args.script, args.partition, ntasks_per_node=args.ntasks_per_node) assert(job) print job status = cluster.get_status(shell, job) print status if not job.cluster_id: job = cluster.submit(shell, job) print status session.add(job) session.commit()