def remove_raw(obs): comp_config = load_config_file() raw_folder = os.path.join(comp_config['base_data_dir'], str(obs), "raw") combined_folder = os.path.join(comp_config['base_data_dir'], str(obs), "combined") raw_files = False tar_files = False combined_files = False ics_files = False for file in glob.iglob("{0}/*".format(raw_folder)): if re.search('dat', file): raw_files = True for file in glob.iglob("{0}/*".format(combined_folder)): if re.search('tar', file): tar_files = True if re.search('ch\d{3}', file): combined_files = True if re.search('ics', file): ics_files = True if raw_files: munlink_files(raw_folder, "dat") if tar_files: munlink_files(combined_folder, "tar") if combined_files: munlink_files(combined_folder, "ch\d{3}") if ics_files: munlink_files(combined_folder, "ics")
def stokes_fold(run_params): """ Launches the stokes_fold part of the data processing pipeling Parameters: ----------- run_params: object The run_params object defined by data_processing_pipeline """ launch_line = stokes_launch_line(run_params) commands = [launch_line] name = "Stokes_Fold_init_{0}_{1}".format(run_params.pulsar, run_params.obsid) comp_config = load_config_file() batch_dir = "{0}{1}/batch/".format(comp_config['base_product_dir'], run_params.obsid) job_id = submit_slurm(name, commands,\ batch_dir=batch_dir,\ slurm_kwargs={"time": "00:10:00"},\ module_list=["mwa_search/{0}".format(run_params.mwa_search),\ "dspsr/master", "psrchive/master"],\ submit=True, vcstools_version="{0}".format(run_params.vcs_tools)) logger.info("Job successfully submitted: {0}".format(name)) return job_id
def check_recombine(obsID, directory=None, required_size=327680000, \ required_size_ics=30720000, startsec=None, n_secs=None): ''' Checks that the number of files in directory (/astro/mwavcs/vcs/[obsID]/combined/) is .... as that found on the archive and also checks that all files have the same size (327680000 by default). ''' comp_config = load_config_file() if not directory: directory = os.path.join(comp_config['base_data_dir'], str(obsID), "combined") base = "\n Checking file size and number of files for obsID {0} in {1} for ".format( obsID, directory) n_secs = n_secs if n_secs else 1 logger.info(base + "gps times {0} to {1}".format(startsec, startsec + n_secs - 1) if startsec else base + "the whole time range.") required_size = required_size # we need to get the number of unique seconds from the file names files = np.array(get_files(obsID)) mask = np.array(['.dat' in file for file in files]) if not startsec: times = [time[11:21] for time in files[mask]] n_secs = len(set(times)) command = "ls -l %s/*ch*.dat | ((tee /dev/fd/5 | wc -l >/dev/fd/4) 5>&1 | " %(directory) + \ "awk '($5!=%s){print $9}' | tee >> %s/%s_all.txt | xargs rm -rf) 4>&1;" %(required_size, directory, obsID) + \ "cat %s/%s_all.txt; rm -rf %s/%s_all.txt" %(directory, obsID, directory, obsID) output = subprocess.Popen([command], stdout=subprocess.PIPE, shell=True).stdout else: output = subprocess.Popen(["count=0;for sec in `seq -w %s %s `;do let count=${count}+`ls -l %s/*${sec}*ch*.dat | " %(startsec, startsec+n_secs-1, directory) + \ "((tee /dev/fd/5 | wc -l >/dev/fd/4) 5>&1 | awk '($5!=%s) " %(required_size) + \ "{print $9}' | tee >> %s/errors_%s.txt | xargs rm -rf) 4>&1`;done;" %(directory,startsec) +\ "echo ${count}; cat %s/errors_%s.txt;rm -rf %s/errors_%s.txt" %(directory,startsec,directory,startsec)], stdout=subprocess.PIPE, shell=True).stdout output = output.readlines() files_in_dir = int(output[0].strip()) expected_files = n_secs * 25 error = False error, n_ics = check_recombine_ics(directory=directory, \ startsec=startsec, n_secs=n_secs, required_size=required_size_ics) files_in_dir += n_ics if not files_in_dir == expected_files: logger.error("We have {0} files but expected {1}".format( files_in_dir, expected_files)) error = True for line in output[1:]: if b'dat' in line: logger.warning("Deleted {0} due to wrong size.".format( line.strip())) error = True if not error: logger.info("We have all {0} files as expected.".format(files_in_dir)) return error
def opt_parser(loglevels): comp_config = load_config_file() parser = argparse.ArgumentParser( description="scripts to check sanity of downloads and recombine.") parser.add_argument("-m", "--mode", type=str, choices=['download','recombine'],\ help="Mode you want to run: download, recombine", dest='mode', default=None) parser.add_argument("-d", "--data_type", type=str, choices=['11','15','16', 'raw','ics','tar_ics'],\ help="Only necessary when checking downloads. Types refer to those as definded " + \ "in voltdownload.py: 11 = Raw, 15 = ICS only, 16 = ICS and tarballs of recombined data.", \ dest='data_type', default=None) parser.add_argument("-o", "--obs", metavar="OBS ID", type=int, dest='obsID',\ help="Observation ID you want to process [no default]", default=None) parser.add_argument("-b", "--begin", metavar="start", type=int, dest='begin',\ help="gps time of first file to ckeck on [default=%(default)s]",\ default=None) parser.add_argument("-e", "--end", metavar="stop", type=int, dest='end',\ help="gps time of last file to ckeck on [default=%(default)s]",\ default=None) parser.add_argument( "-a", "--all", action="store_true", default=False, help= "Perform on entire observation span. Use instead of -b & -e. [default=%(default)s]" ) parser.add_argument("-i", "--increment", metavar="time increment", type=int, \ dest='increment',\ help="Effectively the number of seconds to ckeck for " +\ "starting at start time [default=%(default)s]",\ default=None) parser.add_argument("-s", "--size", type=int, dest='size',\ help="The files size in bytes that you expect all files" +\ " to have. Per default will figure this out from files on he archive" +\ "We expect 253440000 (download raw), 327680000" +\ " (recombined, not ics), 7865368576 (tarballs)", default=None) parser.add_argument("-S", "--size_ics", type=int, help='Size in bytes that' +\ "you expect the ics files to have. Default = %(default)s", \ dest='size_ics', default=30720000) parser.add_argument('-w', '--work_dir', type=str, dest='work_dir',\ help="Directory to check the files in. " +\ "Default is {0}[obsID]/[raw,combined]".format(comp_config['base_data_dir'])) parser.add_argument("-V", "--version", action="store_true", help="Print version and quit") parser.add_argument("-L", "--loglvl", type=str, help="Logger verbosity level. Default: INFO", choices=loglevels.keys(), default="INFO") return parser.parse_args()
def check_data(obsid, beg=None, dur=None, base_dir=None): """ Checks to see if all of the recombined files exist on disk Parameters: ----------- obsid: int The observation ID to check beg: int OPTIONAL - The beginning time of the files to check. If none, will use entire obs. Default: None dur: int OPTIONAL - The duration in seconds to check since the beginning time. If none, will use entire obs. Default: None base_dir: string OPTIONAL - The base directory to use. If none, will load from config. Default: None Returns: --------- check: boolean True - all files are on disk. False - not all files are on disk """ if base_dir is None: comp_config = load_config_file() base_dir = comp_config['base_data_dir'] comb_dir = "{0}{1}/combined".format(base_dir, obsid) if not isinstance(beg, int): beg = int(beg) if not isinstance(dur, int): dur = int(dur) #Check to see if the files are combined properly if beg is not None and dur is not None: logger.info("Checking recombined files beginning at {0} and ending at {1}. Duration: {2} seconds"\ .format(beg, (beg+dur), dur)) error = checks.check_recombine(obsid, startsec=beg, n_secs=dur, directory=comb_dir) else: logger.warn( "No start time information supplied. Comparing files with full obs" ) error = checks.check_recombine(obsid, directory=comb_dir) if error == True: check = False else: check = True return check
def remove_beamformed(obs, pointing=None): comp_config = load_config_file() pointing_folder = os.path.join(comp_config['base_data_dir'], str(obs), "pointings") if not pointing: authority = ('No pointing specified, would you like to remove all pointings for this observation?') if (authority == "Y") or (authority == "y"): pointings = glob.glob("{0}/*:*:*:*:*") if not pointings: logger.error("No valid pointings in {0}. Exiting...") sys.exit(0) else: for pointing in pointings: logger.info("Checking if pointing {0} has been uploaded to MWA Pulsar Database...".format(pointing)) # Upload to MWA Pulsar Database if not there already # Remove each pointing return
def binfind(run_params): """ Launches the binfinding part of the data processing pipeline Parameters: ----------- run_params: object The run_params object defined by data_proces_pipeline """ launch_line = binfinder_launch_line(run_params) commands = [launch_line] #decide how much time to allocate based on number of pointings n_pointings = len(run_params.pointing_dir) if n_pointings < 100: time = "00:30:00" elif n_pointings < 400: time = "02:00:00" elif n_pointings < 1000: time = "05:00:00" else: time = "10:00:00" name = "bf_initiate_{0}_{1}".format(run_params.pulsar, run_params.obsid) logger.info("Submitting binfinder script:") logger.info("") logger.info("Job Name: {}".format(name)) comp_config = load_config_file() batch_dir = "{0}{1}/batch/".format(comp_config['base_product_dir'], run_params.obsid) job_id = submit_slurm(name, commands,\ batch_dir=batch_dir,\ slurm_kwargs={"time": time},\ module_list=['mwa_search/{0}'.format(run_params.mwa_search),\ 'presto/no-python'],\ submit=True, vcstools_version="{0}".format(run_params.vcs_tools)) logger.info("Job successfully submitted: {0}".format(name)) return job_id
def __init__(self, obsid, cal_obsid, metafits, srclist, n_int_bins=6, datadir=None, outdir=None, offline=False, beam_model="FEE2016", vcstools_version="master"): self.obsid = obsid # target observation ID self.cal_obsid = cal_obsid # calibrator observation ID self.offline = offline # switch to decide if offline correlated data or not self.utctime = None # start UTC time self.nfine_chan = None # number of fine channels self.channels = None # actual channel numbers self.fine_cbw = None # fine channel bandwidth self.max_frequency = None # the maximum frequency used by the RTS to calculate decorrelation self.corr_dump_time = None # correlator dump times (i.e. integration time) self.n_dumps_to_average = None # number of integration times to use for calibration self.PB_HA = None # primary beam HA self.PB_DEC = None # primary beam Dec self.freq_base = None # frequency base for RTS self.JD = None # time base for RTS self.metafits_RTSform = None # modified metafits file name for RTS self.ArrayPositionLat = -26.70331940 # MWA latitude self.ArrayPositionLong = 116.6708152 # MWA longitude self.n_integration_bins = n_int_bins # number of visibility integration groups for RTS self.base_str = None # base string to be written to file, will be editted by RTScal self.beam_model = beam_model # The beam model to use for the calibration solutions. Either 'ANALYTIC' or 'FEE2016' self.beam_model_bool = None self.vcstools_version = vcstools_version comp_config = load_config_file() # Check to make sure paths and files exist: # First, check that the actual data directory exists if datadir is None: # use the default data path self.data_dir = os.path.join(comp_config['base_data_dir'], str(obsid), "cal", str(cal_obsid), "vis") logger.info("Using default calibrator data path: {0}".format( self.data_dir)) if os.path.exists(os.path.realpath(self.data_dir)) is False: errmsg = "Default data directory ({0}) does not exist. Aborting.".format( self.data_dir) logger.error(errmsg) raise CalibrationError(errmsg) elif os.path.isdir(datadir): self.data_dir = os.path.realpath(datadir) logger.info( "Using the user specified data directory: {0}".format(datadir)) else: errmsg = "Data directory ({0}) does not exist. Aborting.".format( datadir) logger.error(errmsg) raise CalibrationError(errmsg) # Then check if the specified output and batch directories exists if outdir is None: # this is the default logger.info("Assuming default directory structure...") self.output_dir = os.path.join(comp_config['base_data_dir'], str(self.obsid), "cal", str(self.cal_obsid), "rts") self.batch_dir = os.path.join(comp_config['base_data_dir'], str(self.obsid), "batch") logger.debug("RTS output directory is {0}".format(self.output_dir)) logger.debug("Batch directory is {0}".format(self.batch_dir)) mdir(self.output_dir, "RTS", gid=comp_config['gid']) mdir(self.batch_dir, "Batch", gid=comp_config['gid']) else: # mdir handles if the directory already exists self.output_dir = os.path.realpath(outdir + "/rts") self.batch_dir = os.path.realpath(outdir + "/batch") logger.warning("Non-standard RTS output path: {0}".format( self.output_dir)) logger.warning("Non-standard batch directory path: {0}".format( self.batch_dir)) mdir(self.output_dir, "RTS", gid=comp_config['gid']) mdir(self.batch_dir, "Batch", gid=comp_config['gid']) # Then check that the metafits file exists if os.path.isfile(metafits) is False: # file doesn't exist errmsg = "Given metafits file ({0}) does not exist.".format( metafits) logger.error(errmsg) raise CalibrationError(errmsg) elif "_ppds" not in metafits: # file doesn't have the correct naming convention errmsg = "Looks like you have an old-style metafits. " \ "You'll need to download the new version, which is named like: " \ "{0}_metafits_ppds.fits.".format(obsid) logger.error(errmsg) raise CalibrationError(errmsg) else: logger.info("Metafits file exists and is named correctly.") self.metafits = os.path.realpath(metafits) logger.debug(" {0}".format(self.metafits)) # the check that the source list exists if os.path.isfile(srclist) is False: # file doesn't exist errmsg = "Given source list file ({0}) does not exist.".format( srclist) logger.error(errmsg) raise CalibrationError(errmsg) else: logger.info("Checking source list file exists... Ok") self.source_list = os.path.realpath(srclist) # Check the 'beam_model' is one of the correct choices choices = ("FEE2016", "ANALYTIC") if self.beam_model not in choices: errmsg = "Given beam model: {0} not an available choice: {1}".format( self.beam_model, choices) logger.error(errmsg) raise CalibrationError(errmsg) else: logger.info("Using {0} beam model for calibration solution".format( self.beam_model)) self.beam_model_bool = int( bool(self.beam_model == "ANALYTIC")) #produces 1 for ANALYTIC, 0 for FEE2016 # set some RTS flags based on if we have offline correlated data or not logger.info("Setting RTS data input flags...") if self.offline: self.useCorrInput = 1 self.readDirect = 0 logger.debug("Offline correlation") else: self.useCorrInput = 0 self.readDirect = 1 logger.debug("Online correlation")
def submit_slurm(name, commands, tmpl=SLURM_TMPL, slurm_kwargs=None, module_list=[], vcstools_version="master", batch_dir="batch/", depend=None, depend_type='afterok', submit=True, outfile=None, queue="cpuq", export="NONE", gpu_res=None, mem=1024, cpu_threads=1, temp_mem=None, nice=0, shebag='#!/bin/bash -l', module_dir=None, load_vcstools=True): """ Making this function to cleanly submit SLURM jobs using a simple template. Parameters ---------- name : str The base name that is used to create the "`name`.batch" and "`name`.out" files. commands : list of strs The actual bash script commands you wnat to run. Expects a list where each element is a single line of the bash script. tmpl : str A template header string with format place holders: export, outfile, cluster, header and script. This is used to create the final string to be written to the job script. For this function, it is required to be SLURM compliant. Default: `SLURM_TMPL` slurm_kwargs : dict [optional] A dictionary of SLURM keyword, value pairs to fill in whatever is not in the template supplied to `tmpl`. Default: `{}` (empty dictionary, i.e. no additional header parameters) module_list : list of str [optional] A list of module names (including versions if applicable) that will be included in the header for the batch scripts. e.g. ["vcstools/master", "mwa-voltage/master", "presto/master"] would append module load vcstools/master module load mwa-voltage/master module load presto/master to the header of the batch script. This can also invoke "module use ..." commands. NOTE: /group/mwa/software/modulefiles is used and vcstools/master is loaded by default. vcstools_version : str The version of vcstools to load. Default: master. batch_dir : str [optional] The LOCAL directory where you want to write the batch scripts (i.e. it will write to `$PWD/batch_dir`). Default: "batch/" depend : list or None [optional] A list of the SLURM job IDs that your would like this job to depend on. If `None` then it is assumed there is no dependency on any other job. Default: `None` depend_type : str [optional] The type of slurm dependancy required. For example if you wanted the job to run after the jobs have been terminated use 'afterany'. Default: "afterok" submit : boolean [optional] Whether to write and submit the job scripts (`True`) or only write the scripts (`False`). Default: `True` outfile : str [optional] The output file name if "`name`.out" is not desirable. Default: `None` (i.e. "`batch_dir`/`name`.out") queue : str [optional] The type of queue you require (cpuq, gpuq or copyq) then the script will choose the correct partitions and clusters for the job to run on Default: "cpuq" export : str [optional] Switch that lets SLURM use your login environment on the compute nodes ("ALL") or not ("NONE"). Default: "None" gpu_res : int [optional] Number of GPUs that the SLURM job will reserve. Default: "None" mem : int [optional] The MB of ram required for your slurm job. Default: 8192 cpu_threads : int [optional] The number of cpu threads required for your slurm job. Default: 1 Returns ------- jobid : int The unique SLURM job ID associated with the submitted job. """ if slurm_kwargs is None: slurm_kwargs = {} #Work out which partition and cluster to use based on the supercomputer #(in config file) and queue required comp_config = load_config_file() if queue == 'cpuq': cluster = comp_config['cpuq_cluster'] partition = comp_config['cpuq_partition'] elif queue == 'gpuq': cluster = comp_config['gpuq_cluster'] partition = comp_config['gpuq_partition'] if gpu_res is None: # No gpus reserved so change it to a default of 1 gpu_res = 1 elif queue == 'copyq': cluster = comp_config['copyq_cluster'] partition = comp_config['copyq_partition'] elif queue == 'zcpuq': # Download and checks should be done on Zeus's cpuq. This will only work # on Galaxy as the Ozstar workflow is different cluster = comp_config['zcpuq_cluster'] partition = comp_config['zcpuq_partition'] else: logger.error("No queue found, please use cpuq, gpuq or copyq") header = [] if batch_dir.endswith("/") is False: batch_dir += "/" # define file names (both the batch job file and the output file) jobfile = batch_dir + name + ".batch" if not outfile: outfile = batch_dir + name + ".out" # create the header from supplied arguments for k, v in slurm_kwargs.items(): if len(k) > 1: k = "--" + k + "=" else: k = "-" + k + " " header.append("#SBATCH {0}{1}".format(k, v)) # check if there are dependencies, and if so include that in the header if depend is not None: #assumes append is a list but if not will make an educated guess of how to reformat it if isinstance(depend, int): #assume it's ben given a single job id header.append("#SBATCH --dependency={0}:{1}".format( depend_type, depend)) if isinstance(depend, str): if ":" in depend: #assume it has been given an already formated string if depend.startswith(":"): depend = depend[1:] #or a single jobid header.append("#SBATCH --dependency={0}:{1}".format( depend_type, depend)) if isinstance(depend, list): depend_str = "" for job_id in depend: depend_str += ":" + str(job_id) header.append("#SBATCH --dependency={0}{1}".format( depend_type, depend_str)) # add a gpu res to header if gpu_res is not None: header.append('#SBATCH --gres=gpu:{0}'.format(gpu_res)) # add temp SSD memory to combat I/O issues. Only availble on Ozstar hostname = socket.gethostname() if temp_mem is not None: header.append("#SBATCH --tmp={0}GB".format(temp_mem)) if module_dir is None: module_dir = comp_config['module_dir'] # now join the header into one string header = "\n".join(header) # construct the module loads if load_vcstools: modules = ["module load vcstools/{0}\n".format(vcstools_version)] else: modules = [] switches = [] for m in module_list: if m == "vcstools": # don't do anything as vcstools is loaded automatically continue if "module switch" in m: # if a module switch command is included rather than just a module name, then add it to a separate list switches.append(m) elif "module" in m: modules.append("{0}\n".format(m)) else: modules.append("module load {0}\n".format(m)) # join the module loads and switches into a single string switches = "\n".join(switches) modules = "\n".join(modules) # join the commands into a single string commands = "\n".join(commands) # some little hacks to make jobs work on the shanghai server if hostname.startswith('x86') or hostname.startswith('arm'): if vcstools_version == 'master': vcstools_version = 'cpu-master' if export == "NONE": export = "ALL" if shebag == "#!/bin/bash -l": shebag = "#!/bin/bash" # format the template script tmpl = tmpl.format(shebag=shebag, script=commands, outfile=outfile, header=header, switches=switches, modules=modules, cluster=cluster, partition=partition, export=export, account=comp_config['group_account'][queue], module_dir=module_dir, threads=cpu_threads, mem=mem, nice=nice) # write the formatted template to the job file for submission with open(jobfile, "w") as fh: fh.write(tmpl) # submit the jobs batch_submit_line = "sbatch {0}".format(jobfile) jobid = None if submit: submit_cmd = subprocess.Popen(batch_submit_line, shell=True, stdout=subprocess.PIPE) for line in submit_cmd.stdout: if b"Submitted" in line: jobid = str(line.split(b" ")[3].decode()) if jobid is None: logger.debug(batch_submit_line) logger.debug(submit_cmd.stdout) return else: return jobid else: return
default="INFO") args = parser.parse_args() # set up the logger for stand-alone execution logger.setLevel(loglevels[args.loglvl]) ch = logging.StreamHandler() ch.setLevel(loglevels[args.loglvl]) formatter = logging.Formatter( '%(asctime)s %(filename)s %(name)s %(lineno)-4d %(levelname)-9s :: %(message)s' ) ch.setFormatter(formatter) logger.addHandler(ch) logger.propagate = False comp_config = load_config_file() if args.base_dir is None: data_dir = os.path.join(comp_config['base_product_dir'], str(args.obsID), "combined") ics_dir = os.path.join(comp_config['base_product_dir'], str(args.obsID), "ics") else: data_dir = "{base_dir}/{obsid}/combined".format(base_dir=args.base_dir, obsid=args.obsID) ics_dir = "{base_dir}/{obsid}/ics".format(base_dir=args.base_dir, obsid=args.obsID) logger.info("Data directory: {data_dir}".format(data_dir=data_dir)) logger.info("ICS output directory: {ics_dir}".format(ics_dir=ics_dir)) data_files = sorted(
def check_download(obsID, directory=None, startsec=None, n_secs=None, data_type='raw'): ''' Checks that the number of files in directory (default is /astro/mwavcs/vcs/[obsID]/raw/) is the same as that found on the archive and also checks that all files have the same size (253440000 for raw, 7864340480 for recombined tarballs by default). ''' comp_config = load_config_file() if not data_type in ['raw', 'tar_ics', 'ics']: logger.error("Wrong data type given to download check.") return True if not directory: directory = os.path.join( comp_config['base_data_dir'], str(obsID), "raw") if data_type == 'raw' else os.path.join( comp_config['base_data_dir'], str(obsID), "combined") base = "\n Checking file size and number of files for obsID {0} in {1} for ".format( obsID, directory) n_secs = n_secs if n_secs else 1 logger.info(base + "gps times {0} to {1}".format(startsec, startsec + n_secs - 1) if startsec else base + "the whole time range.") # put files in try: files, suffix, required_size = get_files_and_sizes(obsID, data_type, mintime=startsec, maxtime=startsec + n_secs) except: return True if not startsec: n_files_expected = len(files) command = "ls -l %s/*%s | ((tee /dev/fd/5 | wc -l >/dev/fd/4) 5>&1 | " %(directory, suffix) + \ "awk '($5!=%s){print \"file \" $9 \" has size \" $5 \" (expected %s)\"}' >> %s/%s_all.txt) 4>&1;" %(required_size, required_size,directory, obsID) + \ "cat %s/%s_all.txt; rm -rf %s/%s_all.txt" %(directory, obsID, directory, obsID) output = subprocess.Popen([command], stdout=subprocess.PIPE, shell=True).stdout else: n_files_expected = 0 #remove stray metafits from list that causes int errors files = [x for x in files if "metafits" not in x] times = [int(time[11:21]) for time in files] for sec in range(startsec, startsec + n_secs): n_files_expected += times.count(sec) output = subprocess.Popen(["count=0;for sec in `seq -w %s %s `;do let count=${count}+`ls -l %s/*${sec}*%s | " %(startsec, startsec+n_secs-1, directory, suffix) + \ "((tee /dev/fd/5 | wc -l >/dev/fd/4) 5>&1 | awk '($5!=%s) " %(required_size) + \ "{print \"file \" $9 \" has size \" $5 \" (expected %s)\"}' >> %s/errors_%s.txt) 4>&1`;done;" %(required_size,directory,startsec) +\ "echo ${count}; cat %s/errors_%s.txt;rm -rf %s/errors_%s.txt" %(directory,startsec,directory,startsec)], stdout=subprocess.PIPE, shell=True).stdout output = output.readlines() files_in_dir = int(output[0].strip()) error = False # in case we're checking for downloaded tarballs also need to check ics-files. if data_type == 'tar_ics': logger.info("Now checking ICS files") error, n_ics = check_recombine_ics( directory=directory, startsec=startsec, n_secs=n_secs, #n_files_expected, obsID=obsID) n_files_expected *= 2 files_in_dir += n_ics if not files_in_dir == n_files_expected: logger.error("We have {0} files but expected {1}".format( files_in_dir, n_files_expected)) error = True for line in output[1:]: if b'file' in line: logger.error(line) error = True if not error: logger.info("We have all {0} {1} files as expected.".format( files_in_dir, data_type)) return error
def search_for_cal_srclist(obsid, cal_id, all_cal_returns=False, all_srclist_returns=False): """ Given an obsid, searches common locations for the rts folder(s) as well as the sourcelist(s) Parameters: ----------- obsid: int The observation ID cal_id: int The calibrator ID all_cal_returns: boolean OPTIONAL - If true, will return all RTS directories found. Default: False all_srclist_returns: boolean OPTIONAL - If true, will return all sourcelist files found. Default: False Returns: -------- cal_path: string The location of the calibrator path srclist: string The pathname of the sourcelist """ comp_config = load_config_file() base_dir = comp_config['base_product_dir'] cal_dir = os.path.join(base_dir, str(obsid), "cal", str(cal_id)) cal_dirs = [] srclists = [] #search all subdirectories for root, dirs, files in os.walk(cal_dir): if "rts" in dirs: cal_dirs.append(os.path.join(root, "rts")) for f in files: if f.endswith(".txt") and "srclist" in f: srclists.append(os.path.join(root, f)) #handle multiple rts folders with user input if not all_cal_returns and len(cal_dirs) > 1: valid = False while not valid: print("Multiple RTS files found. Please choose one") for i, a_dir in enumerate(cal_dirs): print("{0}: {1}".format(i + 1, a_dir)) choice = int( input("Choose a number between 1 and {0}: ".format( len(cal_dirs)))) if choice >= 1 and choice <= len(cal_dirs): valid = True my_cal_dir = cal_dirs[choice - 1] print("Using RTS directory: {}".format(my_cal_dir)) cal_dirs = [my_cal_dir] else: print("## Not a valid choice! ##") if not all_cal_returns and len(cal_dirs) == 1: cal_dirs = cal_dirs[0] #handle multiple sourcelist files with user input if not all_srclist_returns and len(srclists) > 1: valid = False print("Multiple sourcelist files found. Please choose one") while not valid: for i, a_file in enumerate(srclists): print("{0}: {1}".format(i + 1, a_file)) choice = int( input("Choose a number between 1 and {0}: ".format( len(srclists)))) if choice >= 1 and choice <= len(srclists): valid = True my_srclist = srclists[choice - 1] print("Using sourcelist directory: {}".format(my_srclist)) srclists = [my_srclist] else: print("## Not a valid choice! ##") if not all_srclist_returns and len(srclists) == 1: srclists = srclists[0] return cal_dirs, srclists