globstr = '{}/0*/phosim_cat*.txt'.format(inst_cat_root) print("parsl-initial-bundle: globbing {}".format(globstr)) instcat_list_a = glob.glob(globstr) print("parsl-initial-bundle: globbed: {}".format(instcat_list_a)) print("parsl-initial-bundle: globbed {} instance catalogs".format( len(instcat_list_a))) #instcat_list_a = ['/mnt/cwd/DC2-R1-2p-WFD-g/000000/instCat/phosim_cat_159479.txt', # '/mnt/cwd/DC2-R1-2p-WFD-g/000001/instCat/phosim_cat_159480.txt'] # We then want to find out what work needs to be done on this sensor. To do that, # we use the instcat_trimmer module. This requires the imsim python package, # which is why this is divided into a separate module from the remaining workflow. It # can be expensive to run on many groups, but is trivially parallelized (split up the instcat inputs) print("parsl-initial-bundle: determining work") ict.determine_instcat_work(instcat_list_a, worklist) print("parsl-initial-bundle: determined work") sys.exit(0) # We want to be able to check if a job has outputs. Since we haven't started the job, # this should fail all those tests. But we can test this with our utilities! infile = '/mnt/scripts/bundle_worklist_a.json' # the path to our file to check outpath = '/mnt/outputs/' # the path to out imSim outputs restartpath = '/mnt/restarts/' # where we are putting all our restarts # This checks for the existence of a gzipped file WHILE not having a checkpoint to measure # success, for each sensor that has work to do. Otherwise, it returns that information into # a new json file for restarting off of, in the requested restart path. jbu.check_job_success(infile, outpath, restartpath)
import json import sys import os # Change to Singularity working directory. os.chdir('/mnt/cwd') # Take subset index as argument subset_index = sys.argv[1] # Open up subset matching this. with open('/mnt/scripts/outputs/instcat_list_subset'+str(subset_index)+'.json', 'r') as f: instcat_list_subset = json.load(f) # Import instcat trimmer sys.path.append('/mnt/scripts') import instcat_trimmer as ict ict.determine_instcat_work(instcat_list_subset, '/mnt/scripts/outputs/worklist_subset'+str(subset_index)+'.json')