def setup_classify(cfg): """Creates the required directories and copies files for the lotaas classifier""" owd = getcwd() chdir(cfg["files"]["psr_dir"]) mdir(cfg["files"]["classify_dir"], cfg["files"] ["classify_dir"]) # This should already exist but keep it anyway for pointing in cfg["folds"].keys(): init_bins = list(cfg["folds"][pointing]["init"].keys())[0] if int(init_bins) not in (50, 100): raise ValueError( f"Initial bins for {cfg['source']['name']} is invalid: {init_bins}" ) try: pfd_name = glob_pfds(cfg, pointing, init_bins, pfd_type=".pfd")[0] except IndexError as e: raise IndexError( f"No suitable pfds found: {cfg['files']['psr_dir']}") # Copy pdf file to classify directory newfilename = join(cfg["files"]["classify_dir"], basename(pfd_name)) copyfile(pfd_name, newfilename) chdir(owd)
def setup_cfg_dirs(cfg): """Creates the necessary folders and symlinks for dpp""" # Create pulsar directory mdir(cfg["files"]["psr_dir"], cfg["files"]["psr_dir"]) # Create classify dir mdir(cfg["files"]["classify_dir"], cfg["files"]["classify_dir"]) # Create edited .eph if necessary if cfg["source"]["binary"]: with open(cfg["source"]["edited_eph_name"], "w") as f: f.write(cfg["source"]["edited_eph"]) # Create symlinks to pointing dirs remove_pointings = [] for pointing in cfg['folds'].keys(): real = join(comp_config["base_data_dir"], str(cfg["obs"]["id"]), "pointings", pointing) sym = join(cfg["files"]["psr_dir"], pointing) if exists(real): if not exists(sym): symlink(real, sym) else: # Remove the pointing from the dictionary if the real pointing directory isn't found logger.warn( f"Expected pointing directory not found: {pointing}. Skipping") for pointing in remove_pointings: del cfg["folds"][pointing]
def download_cal(obs_id, cal_obs_id, data_dir, product_dir, vcstools_version="master", nice=0): #Load computer dependant config file comp_config = load_config_file() batch_dir = os.path.join(product_dir, 'batch') product_dir = os.path.join(product_dir, 'cal', str(cal_obs_id)) vis_dir = os.path.join(data_dir, 'vis') mdir(vis_dir, 'Calibrator vis', gid=comp_config['gid']) mdir(product_dir, 'Calibrator product', gid=comp_config['gid']) mdir(batch_dir, 'Batch', gid=comp_config['gid']) # Downloads the visablities to /astro/mwavcs/vcs/[cal_obs_id]/vis # but creates a link to it here /astro/mwavcs/vcs/[obs_id]/cal/[cal_obs_id] csvfile = os.path.join(batch_dir, "{0}_dl.csv".format(cal_obs_id)) create_link(data_dir, 'vis', product_dir, 'vis') obsdownload_batch = "caldownload_{0}".format(cal_obs_id) secs_to_run = "03:00:00" # sometimes the staging can take a while... module_list = ["manta-ray-client/python3"] commands = [] commands.append("csvfile={0}".format(csvfile)) commands.append('cd {0}'.format(vis_dir)) commands.append('if [[ -z ${MWA_ASVO_API_KEY} ]]') commands.append('then') commands.append(' echo "Error, MWA_ASVO_API_KEY not set"') commands.append(' echo "Cannot use client"') commands.append(' echo "Please read the MWA ASVO documentation ' 'about setting this (https://wiki.mwatelescope.org/' 'display/MP/MWA+ASVO%3A+Release+Notes)"') commands.append(' exit 1') commands.append('fi') commands.append('echo "obs_id={0}, job_type=d, download_type=vis" > {1}'.\ format(cal_obs_id,csvfile)) commands.append('mwa_client --csv={0} --dir={1}'.format(csvfile,vis_dir)) #commands.append("ln -sfn {0} {1}/{2}".format(data_dir, product_dir, 'vis')) commands.append('unzip *.zip') submit_slurm(obsdownload_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time": secs_to_run, "nice": nice}, vcstools_version=vcstools_version, queue="copyq", export="NONE", mem=4096, # Manually handing it the module dir as it should only run module_dir='/group/mwa/software/modulefiles')
def write_batch_files(obsid, begin, end, ra, dec, freq, flaggedtiles, step=500, thetares=0.05, phires=0.05, nnodes=1, eff=1, beam_model='hyperbeam', maploc="$PWD", odir=None, delays=[0] * 16, write=True, write_showspec=False, vcstools_version='master', metafits_loc=None): comp_config = load_config_file() times = np.arange(begin, end, step=step) times = np.append(times, end) #nprocesses = 32 * nnodes nprocesses = 1 * nnodes flags = " ".join(flaggedtiles) if odir is None: # Make default directories product_dir = os.path.join(comp_config['base_data_dir'], obsid, 'pabeam', '{}_{}'.format(ra, dec)) batch_dir = os.path.join(comp_config['base_data_dir'], obsid, 'batch') else: product_dir = batch_dir = odir mdir(product_dir, 'Product Dir', gid=comp_config['gid']) mdir(batch_dir, 'Batch Dir', gid=comp_config['gid']) # Loop over all times for i in range(len(times)): fname = "make_pabeam_{0}_{1}_{2}_{3:.2f}MHz".format( ra, dec, times[i], freq / 1e6) onamebase = "{0}_{1}_{2:.2f}MHz_tres{3}_pres{4}_{5}_{6}".format( obsid, float(times[i]), freq / 1e6, thetares, phires, ra, dec) commands = [] # Write out params commands.append("nprocesses={}".format(nprocesses)) commands.append("obsid={}".format(obsid)) commands.append("""ra='"{}"'""".format(ra)) commands.append("""dec='"{}"'""".format(dec)) commands.append("freq={}".format(freq)) commands.append("eff={}".format(eff)) commands.append('flags="{}"'.format(flags)) commands.append('delays="{}"'.format(delays)) commands.append("tres={}".format(thetares)) commands.append("pres={}".format(phires)) commands.append("obstime={}".format(times[i])) commands.append("odir={}".format(product_dir)) commands.append("metafits_loc={}".format(metafits_loc)) commands.append('beam="{}"'.format(beam_model)) # TODO remove this once hyperbeam is installed with python commands.append( "export PYTHONPATH=$PYTHONPATH:/pawsey/mwa/software/python3/hyperbeam/v0.3.0/lib/python3.8/site-packages" ) # Main command pabeam_command = "srun --export=all -u -n ${nprocesses} pabeam.py " +\ "-o ${obsid} -f ${freq} -t ${obstime} -e ${eff} -p ${ra} ${dec} --metafits ${metafits_loc} " +\ "--flagged_tiles ${flags} --grid_res ${tres} ${pres} --out_dir ${odir} --beam_model ${beam} --delays ${delays}" if write: pabeam_command = pabeam_command + " --write" commands.append('cd {}'.format(product_dir)) commands.append('echo "{}"'.format(pabeam_command)) commands.append(pabeam_command) # Combine the output files into one commands.append(pabeam_concat_cmd.format(onamebase, onamebase + ".dat")) # Remove the partial beam pattern files written by processes commands.append("rm {0}\n".format(onamebase + ".*.dat")) module_list = ['mpi4py', 'hyperbeam/v0.3.0'] submit_slurm(fname, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={ "time": "12:00:00", "nodes": nnodes, "ntasks-per-node": nprocesses }, vcstools_version=vcstools_version, queue='cpuq', mem=10240) if write_showspec: # Now write the showspec batch for this time write_showspec_batch(times[i], obsid, ra, dec, freq, (90 / thetares) + 1, 360 / phires, onamebase + ".dat", maploc)
def launch_pabeam_sim(obsid, pointing, begin, duration, source_name="noname", metafits_file=None, flagged_tiles=None, delays=None, efficiency=1, vcstools_version='master', args=None, common_metadata=None, output_dir=None): """Submit a job to run the pabeam code to estimate the system equivelent flux density and a dependent job to resume the submit_to_databse.py code if `args` is given. Parameters ---------- obsid : `int` The MWA observation ID. pointing : `str` The pointing of the simulation in the format HH:MM:SS.SS_DD:MM:SS.SS. begin : `int` The begining of the simulation in GPS time. duration : `int` The duration of the simulation in seconds (used to calculate the end of the simulation). source_name : `str`, optional The name of the source to be used to label output files. |br| Default: "noname". metafits_file : `str`, optional The location of the metafits file. If none given will assume the default location. flagged_tiles : `list`, optional A list of the flagged tiles. If none given will assume no tiles were flagged. efficiency : `float`, optional Frequency and pointing dependent array efficiency. |br| Default: 1. vcstools_version : `str`, optional VCSTools version to load in the job. args : `dict`, optional The argument parse dictionary from submit_to_database.py. If supplied will launch a dependedn job with submit_to_databse.py to complete the script. common_metadata : `list`, optional The list of common metadata generated from :py:meth:`vcstools.metadb_utils.get_common_obs_metadata`. output_dir : `str` The output directory of the simulation results. By default will put it in the VCS directory under <obsid>/sefd_simulations. Examples -------- A simple example: >>>launch_pabeam_sim(1206977296, "12:49:12_+27:12:00", 1206977300, 600, source_name="SEFD_test", output_dir=".") """ # Load computer dependant config file comp_config = load_config_file() # Ensure metafits file is there data_dir = "{}{}".format(comp_config['base_data_dir'], obsid) ensure_metafits(data_dir, obsid, "{0}_metafits_ppds.fits".format(obsid)) # Perform metadata calls if common_metadata is None: common_metadata = get_common_obs_metadata(obsid) # Get frequencies centre_freq = common_metadata[5] * 10e5 low_freq = common_metadata[6][0] * 1.28 * 10e5 high_freq = common_metadata[6][-1] * 1.28 * 10e5 sim_freqs = [str(low_freq), str(centre_freq), str(high_freq)] # Calculate required pixel res and cores/mem array_phase = get_obs_array_phase(obsid) fwhm = calc_ta_fwhm(high_freq / 10e5, array_phase=array_phase) #degrees phi_res = theta_res = fwhm / 3 if phi_res < 0.015: # Going any smaller causes memory errors phi_res = theta_res = 0.015 npixels = 360. // phi_res + 90. // theta_res cores_required = npixels * len(sim_freqs) // 600 nodes_required = cores_required // 24 + 1 # Make directories batch_dir = "{}/batch".format(data_dir) if output_dir is None: sefd_dir = "{}/sefd_simulations".format(data_dir) else: sefd_dir = output_dir if not os.path.exists(batch_dir): mdir(batch_dir, "Batch", gid=comp_config['gid']) if not os.path.exists(sefd_dir): mdir(sefd_dir, "SEFD", gid=comp_config['gid']) # Parse defaults if metafits_file is None: metafits_file = "{0}{1}/{1}_metafits_ppds.fits".format( comp_config['base_data_dir'], obsid) # Get delays if none given if delays is None: delays = get_common_obs_metadata(obsid)[4][0] print(delays) print(' '.join(np.array(delays, dtype=str))) # Set up pabeam command command = 'srun --export=all -u -n {} pabeam.py'.format( int(nodes_required * 24)) command += ' -o {}'.format(obsid) command += ' -b {}'.format(begin) command += ' -d {}'.format(int(duration)) command += ' -s {}'.format( int(duration // 4 - 1)) # force 4 time steps to get reasonable std command += ' -e {}'.format(efficiency) command += ' --metafits {}'.format(metafits_file) command += ' -p {}'.format(pointing) command += ' --grid_res {:.3f} {:.3f}'.format(theta_res, phi_res) command += ' --delays {}'.format(' '.join(np.array(delays, dtype=str))) command += ' --out_dir {}'.format(sefd_dir) command += ' --out_name {}'.format(source_name) command += ' --freq {}'.format(" ".join(sim_freqs)) if flagged_tiles is not None: logger.debug("flagged_tiles: {}".format(flagged_tiles)) command += ' --flagged_tiles {}'.format(' '.join(flagged_tiles)) # Set up and launch job batch_file_name = 'pabeam_{}_{}_{}'.format(obsid, source_name, pointing) job_id = submit_slurm(batch_file_name, [command], batch_dir=batch_dir, slurm_kwargs={ "time": datetime.timedelta(seconds=10 * 60 * 60), "nodes": int(nodes_required) }, module_list=['hyperbeam-python'], queue='cpuq', cpu_threads=24, mem=12288, vcstools_version=vcstools_version) if args: # Set up dependant submit_to_database.py job submit_args = vars(args) # Add sefd_file argument submit_args['sefd_file'] = "{}/{}*stats".format(sefd_dir, source_name) command_str = "submit_to_database.py" for key, val in submit_args.items(): if val: if val == True: command_str += " --{}".format(key) else: command_str += " --{} {}".format(key, val) batch_file_name = 'submit_to_database_{}_{}_{}'.format( obsid, source_name, pointing) job_id_dependant = submit_slurm( batch_file_name, [command_str], batch_dir=batch_dir, slurm_kwargs={"time": datetime.timedelta(seconds=1 * 60 * 60)}, queue='cpuq', vcstools_version=vcstools_version, depend=[job_id]) return job_id, job_id_dependant
def __init__(self, obsid, cal_obsid, metafits, srclist, n_int_bins=6, datadir=None, outdir=None, offline=False, beam_model="FEE2016", vcstools_version="master"): self.obsid = obsid # target observation ID self.cal_obsid = cal_obsid # calibrator observation ID self.offline = offline # switch to decide if offline correlated data or not self.utctime = None # start UTC time self.nfine_chan = None # number of fine channels self.channels = None # actual channel numbers self.fine_cbw = None # fine channel bandwidth self.max_frequency = None # the maximum frequency used by the RTS to calculate decorrelation self.corr_dump_time = None # correlator dump times (i.e. integration time) self.n_dumps_to_average = None # number of integration times to use for calibration self.PB_HA = None # primary beam HA self.PB_DEC = None # primary beam Dec self.freq_base = None # frequency base for RTS self.JD = None # time base for RTS self.metafits_RTSform = None # modified metafits file name for RTS self.ArrayPositionLat = -26.70331940 # MWA latitude self.ArrayPositionLong = 116.6708152 # MWA longitude self.n_integration_bins = n_int_bins # number of visibility integration groups for RTS self.base_str = None # base string to be written to file, will be editted by RTScal self.beam_model = beam_model # The beam model to use for the calibration solutions. Either 'ANALYTIC' or 'FEE2016' self.beam_model_bool = None self.vcstools_version = vcstools_version comp_config = load_config_file() # Check to make sure paths and files exist: # First, check that the actual data directory exists if datadir is None: # use the default data path self.data_dir = os.path.join(comp_config['base_data_dir'], str(obsid), "cal", str(cal_obsid), "vis") logger.info("Using default calibrator data path: {0}".format(self.data_dir)) if os.path.exists(os.path.realpath(self.data_dir)) is False: errmsg = "Default data directory ({0}) does not exist. Aborting.".format(self.data_dir) logger.error(errmsg) raise CalibrationError(errmsg) elif os.path.isdir(datadir): self.data_dir = os.path.realpath(datadir) logger.info("Using the user specified data directory: {0}".format(datadir)) else: errmsg = "Data directory ({0}) does not exist. Aborting.".format(datadir) logger.error(errmsg) raise CalibrationError(errmsg) # Then check if the specified output and batch directories exists if outdir is None: # this is the default logger.info("Assuming default directory structure...") self.output_dir = os.path.join(comp_config['base_data_dir'], str(self.obsid), "cal", str(self.cal_obsid), "rts") self.batch_dir =os.path.join(comp_config['base_data_dir'], str(self.obsid), "batch") logger.debug("RTS output directory is {0}".format(self.output_dir)) logger.debug("Batch directory is {0}".format(self.batch_dir)) mdir(self.output_dir, "RTS", gid=comp_config['gid']) mdir(self.batch_dir, "Batch", gid=comp_config['gid']) else: # mdir handles if the directory already exists self.output_dir = os.path.realpath(outdir + "/rts") self.batch_dir = os.path.realpath(outdir + "/batch") logger.warning("Non-standard RTS output path: {0}".format(self.output_dir)) logger.warning("Non-standard batch directory path: {0}".format(self.batch_dir)) mdir(self.output_dir, "RTS", gid=comp_config['gid']) mdir(self.batch_dir, "Batch", gid=comp_config['gid']) # Then check that the metafits file exists if os.path.isfile(metafits) is False: # file doesn't exist errmsg = "Given metafits file ({0}) does not exist.".format(metafits) logger.error(errmsg) raise CalibrationError(errmsg) elif "_ppds" not in metafits: # file doesn't have the correct naming convention errmsg = "Looks like you have an old-style metafits. " \ "You'll need to download the new version, which is named like: " \ "{0}_metafits_ppds.fits.".format(obsid) logger.error(errmsg) raise CalibrationError(errmsg) else: logger.info("Metafits file exists and is named correctly.") self.metafits = os.path.realpath(metafits) logger.debug(" {0}".format(self.metafits)) # the check that the source list exists if os.path.isfile(srclist) is False: # file doesn't exist errmsg = "Given source list file ({0}) does not exist.".format(srclist) logger.error(errmsg) raise CalibrationError(errmsg) else: logger.info("Checking source list file exists... Ok") self.source_list = os.path.realpath(srclist) # Check the 'beam_model' is one of the correct choices choices = ("FEE2016", "ANALYTIC") if self.beam_model not in choices: errmsg = "Given beam model: {0} not an available choice: {1}".format(self.beam_model, choices) logger.error(errmsg) raise CalibrationError(errmsg) else: logger.info("Using {0} beam model for calibration solution".format(self.beam_model)) self.beam_model_bool=int(bool(self.beam_model == "ANALYTIC")) #produces 1 for ANALYTIC, 0 for FEE2016 # set some RTS flags based on if we have offline correlated data or not logger.info("Setting RTS data input flags...") if self.offline: self.useCorrInput = 1 self.readDirect = 0 logger.debug("Offline correlation") else: self.useCorrInput = 0 self.readDirect = 1 logger.debug("Online correlation")
def vcs_download(obsid, start_time, stop_time, increment, data_dir, product_dir, parallel, ics=False, n_untar=2, keep="", vcstools_version="master", nice=0): #Load computer dependant config file comp_config = load_config_file() logger.info("Downloading files from archive") voltdownload = "voltdownload.py" obsinfo = meta.getmeta(service='obs', params={'obs_id':str(obsid)}) comb_del_check = meta.combined_deleted_check(obsid, begin=start_time, end=stop_time) data_format = obsinfo['dataquality'] if data_format == 1 or (comb_del_check and data_format == 6): # either only the raw data is available (data_format == 1) # or there was combined files but they were deleted (comb_del_check and data_format == 6) target_dir = link = '/raw' if ics: logger.error("Data have not been recombined in the " "archive yet. Exiting") sys.exit(0) data_type = 11 dl_dir = "{0}/{1}".format(data_dir, target_dir) dir_description = "Raw" elif data_format == 6: target_dir = link = '/combined' if ics: data_type = 15 else: data_type = 16 dl_dir = "{0}/{1}".format(data_dir, target_dir) dir_description = "Combined" else: logger.error("Unable to determine data format from archive. Exiting") sys.exit(0) mdir(dl_dir, dir_description, gid=comp_config['gid']) create_link(data_dir, target_dir, product_dir, link) batch_dir = product_dir+"/batch/" for time_to_get in range(start_time,stop_time,increment): if time_to_get + increment > stop_time: increment = stop_time - time_to_get + 1 #need to subtract 1 from increment since voltdownload wants how many #seconds PAST the first one voltdownload_batch = "volt_{0}".format(time_to_get) check_batch = "check_volt_{0}".format(time_to_get) volt_secs_to_run = datetime.timedelta(seconds=500*increment) check_secs_to_run = "15:00" if data_type == 16: check_secs_to_run = "10:15:00" checks = "checks.py" # Write out the checks batch file but don't submit it commands = [] commands.append("newcount=0") commands.append("let oldcount=$newcount-1") commands.append("sed -i -e \"s/oldcount=${{oldcount}}/oldcount=${{newcount}}/\" {0}".\ format(batch_dir+voltdownload_batch+".batch")) commands.append("oldcount=$newcount; let newcount=$newcount+1") commands.append("sed -i -e \"s/_${{oldcount}}.out/_${{newcount}}.out/\" {0}".\ format(batch_dir+voltdownload_batch+".batch")) checks_command = "-m download -o {0} -w {1} -b {2} -i {3} --data_type {4}".format(obsid, dl_dir, time_to_get, increment, data_type) commands.append('{0} {1}'.format(checks, checks_command)) commands.append("if [ $? -eq 1 ];then") commands.append("sbatch {0}".format(batch_dir+voltdownload_batch+".batch")) # if we have tarballs we send the untar jobs to the workq if data_type == 16: commands.append("else") untar = 'untar.sh' untar_command = "-w {0} -o {1} -b {2} -e {3} -j {4} {5}".format(dl_dir, obsid, time_to_get, time_to_get+increment-1, n_untar, keep) commands.append('{0} {1}'.format(untar, untar_command)) #commands.append("sbatch {0}.batch".format(batch_dir+tar_batch)) commands.append("fi") # Download and checks should be done on Zeus's cpuq. This will only work # on Galaxy as the Ozstar workflow is different submit_slurm(check_batch, commands, batch_dir=batch_dir, slurm_kwargs={"time": check_secs_to_run, "nice": nice}, vcstools_version=vcstools_version, submit=False, outfile=batch_dir+check_batch+"_0.out", queue="zcpuq", export="NONE", mem=10240, # Manually handing it the module dir as it should only run module_dir='/group/mwa/software/modulefiles') # Write out the tar batch file if in mode 15 #if format == 16: # body = [] # for t in range(time_to_get, time_to_get+increment): # body.append("aprun tar -xf {0}/1149620392_{1}_combined.tar".format(dl_dir,t)) # submit_slurm(tar_batch,body,batch_dir=working_dir+"/batch/", slurm_kwargs={"time":"1:00:00", "partition":"gpuq" }) #module_list=["mwa-voltage/master"] #removed the master version load because by default we load the python 3 version module_list=[] body = [] body.append("oldcount=0") body.append("let newcount=$oldcount+1") body.append("if [ ${newcount} -gt 10 ]; then") body.append("echo \"Tried ten times, this is silly. Aborting here.\";exit") body.append("fi") body.append("sed -i -e \"s/newcount=${{oldcount}}/newcount=${{newcount}}/\" {0}\n".\ format(batch_dir+check_batch+".batch")) body.append("sed -i -e \"s/_${{oldcount}}.out/_${{newcount}}.out/\" {0}".\ format(batch_dir+check_batch+".batch")) body.append("sbatch -d afterany:${{SLURM_JOB_ID}} {0}".\ format(batch_dir+check_batch+".batch")) voltdownload_command = "--obs={0} --type={1} --from={2} --duration={3} --parallel={4}"\ " --dir={5}".format(obsid, data_type, time_to_get, increment-1, parallel, dl_dir) body.append("{0} {1}".format(voltdownload, voltdownload_command)) submit_slurm(voltdownload_batch, body, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time" : str(volt_secs_to_run), "nice" : nice}, vcstools_version=vcstools_version, outfile=batch_dir+voltdownload_batch+"_1.out", queue="copyq", export="NONE", mem=5120, # Manually handing it the module dir as it should only run module_dir='/group/mwa/software/modulefiles')
def coherent_beam(obs_id, start, stop, data_dir, product_dir, batch_dir, metafits_file, nfine_chan, pointing_list, rts_flag_file=None, bf_formats=None, DI_dir=None, execpath=None, calibration_type='rts', ipfb_filter="LSQ12", vcstools_version="master", nice=0, channels_to_beamform=None, beam_version="FEE2016"): """ This function runs the new version of the beamformer. It is modelled after the old function above and will likely be able to be streamlined after working implementation (SET) Streamlining underway, as well as full replacement of the old function (SET March 28, 2018) """ #Load computer dependant config file comp_config = load_config_file() # If execpath is given, change the make_beam executable command # otherwise, it should be on the PATH if vcstools has been installed if execpath: make_beam_cmd = "{0}/make_beam".format(execpath) make_beam_version_cmd = "{0}/make_beam -V".format(execpath) else: make_beam_cmd = "make_beam" make_beam_version_cmd = "make_beam -V" make_beam_version = subprocess.Popen(make_beam_version_cmd, stdout=subprocess.PIPE, shell=True).communicate()[0] logger.info("Current version of make_beam = {0}".format(make_beam_version.strip())) metafile = "{0}/{1}.meta".format(product_dir, obs_id) channels = None # No channels given so first check for a metafile if os.path.isfile(metafile): logger.info("Found observation metafile: {0}".format(metafile)) with open(metafile, 'r') as m: for line in m.readlines(): if line.startswith("channels"): channels = line.split(",")[1:] channels = np.array(channels, dtype=np.int) else: logger.debug("No metafile in {0}".format(metafile)) logger.debug("Channels before meta.get_channels: {0}".format(channels)) # If channels is still None get_channels will get it from the metadata channels = meta.get_channels(obs_id, channels=channels) # Make a metafile containing the channels so no future metadata calls are required if not os.path.isfile(metafile): with open(metafile, "w") as m: m.write("#Metadata for obs ID {0} required to determine if: normal or " "picket-fence\n".format(obs_id)) m.write("channels,{0}".format(",".join([str(c) for c in channels]))) channels = np.array(channels, dtype=np.int) hichans = [c for c in channels if c>128] lochans = [c for c in channels if c<=128] lochans.extend(list(reversed(hichans))) ordered_channels = lochans if channels_to_beamform is None: # If no channels_to_beamform given fold on everything channels_to_beamform = ordered_channels # Run for each coarse channel. Calculates delays and makes beam if not DI_dir: logger.error("You need to specify the path to the calibrator files, " "either where the DIJs are or where the Offringa " "calibration_solutions.bin file is. Aborting here") sys.exit(0) DI_dir = os.path.abspath(DI_dir) # make_beam_small requires the start time in UTC, get it from the start utctime = gps_to_utc(start) P_dir = os.path.join(product_dir, "pointings") mdir(P_dir, "Pointings", gid=comp_config['gid']) mdir(os.path.join(product_dir, "incoh"), "Incoh", gid=comp_config['gid']) # startjobs = True # Set up supercomputer dependant parameters import socket hostname = socket.gethostname() if hostname.startswith('john') or hostname.startswith('farnarkle'): max_pointing = 120 else: max_pointing = 15 if comp_config['ssd_dir'] is None: temp_mem = None else: #Work out required SSD size obs_length = stop - start + 1. temp_mem = int(0.0012 * obs_length * max_pointing + 1) temp_mem_single = int(0.0024 * obs_length + 2) if "-s" not in bf_format: temp_mem = temp_mem * 4 temp_mem_single = temp_mem_single *4 # set up SLURM requirements if len(pointing_list) > max_pointing: seconds_to_run = 8 * (stop - start + 1) * max_pointing else: seconds_to_run = 8 * (stop - start + 1) * len(pointing_list) if seconds_to_run > 86399.: secs_to_run = datetime.timedelta(seconds=86399) else: secs_to_run = datetime.timedelta(seconds=seconds_to_run) # Get the project id (eg G0057) from the metafits file with pyfits.open(metafits_file) as hdul: project_id = hdul[0].header['project'] # splits the pointing list into lists of length max_pointing pointing_list_list = list(chunks(pointing_list, max_pointing)) time_now = str(datetime.datetime.now()).replace(" ", "_") logging.info("Running make_beam") job_id_list_list = [] for pl, pointing_list in enumerate(pointing_list_list): pointing_str = ",".join(pointing_list) # Run one coarse channel per node job_id_list = [] for gpubox, coarse_chan in enumerate(ordered_channels, 1): if coarse_chan not in channels_to_beamform: continue if calibration_type == 'rts': #chan_list = get_frequencies(metafits_file, resort=True) DI_file = "{0}/DI_JonesMatrices_node{1:0>3}.dat".format(DI_dir, gpubox) jones_option = "-J {0}".format(DI_file) elif calibration_type == 'offringa': #chan_list = get_frequencies(metafits_file, resort=False) DI_file = "{0}/calibration_solution.bin".format(DI_dir) jones_option = "-O {0} -C {1}".format(DI_file, int(gpubox) - 1) else: logger.info("Please an accepted calibratin type. Aborting here.") sys.exit(0) # Making pointing directories for pointing in pointing_list: mdir("{0}/{1}".format(P_dir, pointing), "Pointing {0}".format(pointing), gid=comp_config['gid']) n_omp_threads = 1 if "v" in bf_formats: for pointing in pointing_list: make_beam_small_batch = "mb_{0}_ch{1}".format(pointing, coarse_chan) module_list = [comp_config['container_module']] commands = [] commands.append("cd {0}/{1}".format(P_dir,pointing)) runline = "srun --export=all -n 1" runline += " -c {}".format(n_omp_threads) if comp_config['container_command'] !='': runline += " {} '".format(comp_config['container_command']) runline += " {}".format(make_beam_cmd) runline += " -o {}".format(obs_id) runline += " -b {}".format(start) runline += " -e {}".format(stop) runline += " -a 128" runline += " -n 128" runline += " -f {}".format(coarse_chan) runline += " {}".format(jones_option) runline += " -d {}/combined".format(data_dir) runline += " -P {}".format(pointing) runline += " -r 10000" runline += " -m {}".format(metafits_file) runline += " -z {}".format(utctime) runline += " {}".format(bf_formats) runline += " -F {}".format(rts_flag_file) runline += " -S {}".format(ipfb_filter) if beam_version == "ANALYTIC": runline += " -H" if comp_config['container_command'] !='': runline += "'" commands.append(runline) job_id = submit_slurm(make_beam_small_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time":secs_to_run, "nice":nice}, queue='gpuq', vcstools_version=vcstools_version,#forces olf version with vdif submit=True, export="NONE", gpu_res=1, cpu_threads=n_omp_threads, mem=comp_config['gpu_beamform_mem'], temp_mem=temp_mem_single) job_id_list.append(job_id) else: make_beam_small_batch = "mb_{0}_{1}_ch{2}".format(pl, time_now, coarse_chan) module_list = [comp_config['container_module']] commands = [] if comp_config['ssd_dir'] is None: # Write outputs to SSDs if on Ozstar commands.append("cd {0}".format(P_dir)) else: commands.append("cd {0}".format(comp_config['ssd_dir'])) runline = "srun --export=all -n 1" runline += " -c {}".format(n_omp_threads) if comp_config['container_command'] !='': runline += " {} '".format(comp_config['container_command']) runline += " {}".format(make_beam_cmd) runline += " -o {}".format(obs_id) runline += " -b {}".format(start) runline += " -e {}".format(stop) runline += " -a 128" runline += " -n 128" runline += " -f {}".format(coarse_chan) runline += " {}".format(jones_option) runline += " -d {}/combined".format(data_dir) runline += " -P {}".format(pointing_str) runline += " -r 10000" runline += " -m {}".format(metafits_file) runline += " -z {}".format(utctime) runline += " {}".format(bf_formats) runline += " -F {}".format(rts_flag_file) if beam_version == "ANALYTIC": runline += " -H" if comp_config['container_command'] !='': runline += "'" commands.append(runline) commands.append("") if comp_config['ssd_dir'] is not None: for pointing in pointing_list: commands.append("cp {0}/{1}/{2}_{3}_{1}_ch{4}_00*.fits " "{5}/{1}/".format(comp_config['ssd_dir'], pointing, project_id, obs_id, coarse_chan, P_dir)) if 'i' in bf_formats: commands.append("cp {0}/{1}/{2}_{3}_{1}_ch{4}_00*.fits " "{5}/{1}/".format(comp_config['ssd_dir'], "incoh", project_id, obs_id, coarse_chan, product_dir)) commands.append("") job_id = submit_slurm(make_beam_small_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time":secs_to_run, "nice":nice}, queue='gpuq', vcstools_version=vcstools_version, submit=True, export="NONE", gpu_res=1, cpu_threads=n_omp_threads, mem=comp_config['gpu_beamform_mem'], temp_mem=temp_mem) job_id_list.append(job_id) job_id_list_list.append(job_id_list) return job_id_list_list, make_beam_small_batch.split('ch')[0]
def vcs_correlate(obsid,start,stop,increment, data_dir, product_dir, ft_res, metafits, vcstools_version="master", nice=0): #Load computer dependant config file comp_config = load_config_file() logger.info("Correlating files at {0} kHz and {1} milliseconds".\ format(ft_res[0], ft_res[1])) batch_dir = product_dir+"/batch/" target_dir = link = 'vis' if data_dir == product_dir: corr_dir = "{0}/cal/{1}/{2}".format(product_dir, obsid, target_dir) else: corr_dir = "{0}/{1}".format(data_dir, target_dir) product_dir = "{0}/cal/{1}/".format(product_dir, obsid) mdir(product_dir, "Correlator", gid=comp_config['gid']) mdir(corr_dir, "Correlator Product", gid=comp_config['gid']) create_link(data_dir, target_dir, product_dir, link) chan_list = get_frequencies(metafits_file, resort=True) #gpu_int = 0.01 # Code was compiled with a hard-coded 100 sample minimum intigration. For 'normal' data this means 0.01 seconds gpu_int = 10 # Code was compiled with a hard-coded 100 sample minimum integration. For 'normal' data this means 10 milliseconds. integrations=int(ft_res[1]/gpu_int) #num_frames=int(1.0/ft_res[1]) num_frames=int(1000/ft_res[1]) logger.info("Input chan list is {0}".format(chan_list)) for time_to_get in range(start,stop,increment): inc_start = time_to_get inc_stop = time_to_get+increment for index,channel in enumerate(chan_list): gpubox_label = (index+1) f=[] for time_to_corr in range(inc_start,inc_stop,1): file_to_process = "{0}/combined/{1}_{2}_ch{3:0>2}.dat".\ format(data_dir,obsid,time_to_corr,channel) #check the file exists if (os.path.isfile(file_to_process) == True): f.append(file_to_process) #now have a full list of files #for this increment #and this channel if (len(f) > 0): corr_batch = "correlator_{0}_gpubox{1:0>2}".format(inc_start,gpubox_label) body = [] to_corr = 0 for file in f: (current_time,_) = os.path.splitext(os.path.basename(file)) (obsid,gpstime,_) = current_time.split('_') t = Time(int(gpstime), format='gps', scale='utc') unix_time = int(t.unix) offline_correlator_command = "-o {0}/{1} -s {2} -r {3} -i {4} -n {5} "\ "-c {6:0>2} -d {7}".format(corr_dir, obsid, unix_time, num_frames, integrations, int(ft_res[0]/10), gpubox_label, file) body.append("{0} {1}".format("offline_correlator", offline_correlator_command)) to_corr += 1 #module_list = ["module switch PrgEnv-cray PrgEnv-gnu"] module_list = ["offline_correlator/v1.0.0"] secs_to_run = str(datetime.timedelta(seconds=2*12*num_frames*to_corr)) # added factor two on 10 April 2017 as galaxy seemed really slow... submit_slurm(corr_batch, body, module_list=module_list, slurm_kwargs={"time": secs_to_run, "nice": nice}, queue='gpuq', vcstools_version=vcstools_version, batch_dir=batch_dir, export="NONE") else: logger.error("Couldn't find any recombine files. Aborting here.")
def vcs_recombine(obsid, start_time, stop_time, increment, data_dir, product_dir, vcstools_version="master", nice=0): #Load computer dependant config file comp_config = load_config_file() logger.info("Running recombine on files") jobs_per_node = 8 target_dir = link = 'combined' mdir(data_dir + '/' + target_dir, 'Combined', gid=comp_config['gid']) create_link(data_dir, target_dir, product_dir, link) batch_dir = product_dir+"/batch/" recombine = "recombine.py" checks = "checks.py" recombine_binary = "recombine" for time_to_get in range(start_time,stop_time,increment): process_nsecs = increment if (time_to_get + increment <= stop_time) \ else (stop_time - time_to_get + 1) if (jobs_per_node > process_nsecs): jobs_per_node = process_nsecs nodes = (increment+(-increment%jobs_per_node))//jobs_per_node + 1 # Integer division with ceiling result plus 1 for master node recombine_batch = "recombine_{0}".format(time_to_get) check_batch = "check_recombine_{0}".format(time_to_get) #module_list = ["module switch PrgEnv-cray PrgEnv-gnu", "python/3.6.3", "numpy/1.13.3", "mwa-voltage/master"] module_list = ["mwa-voltage/master"] commands = [] commands.append("newcount=0") commands.append("let oldcount=$newcount-1") commands.append("sed -i -e \"s/oldcount=${{oldcount}}/oldcount=${{newcount}}/\" {0}".\ format(batch_dir+recombine_batch+".batch")) commands.append("oldcount=$newcount; let newcount=$newcount+1") commands.append("sed -i -e \"s/_${{oldcount}}.out/_${{newcount}}.out/\" {0}".\ format(batch_dir+recombine_batch+".batch")) checks_command = "-m recombine -o {0} -w {1}/combined/ -b {2} -i {3}".format(obsid, data_dir, time_to_get, process_nsecs) commands.append("{0} {1}".format(checks, checks_command)) commands.append("if [ $? -eq 1 ];then") commands.append("sbatch {0}".format(batch_dir+recombine_batch+".batch")) commands.append("fi") submit_slurm(check_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time": "15:00", "nice": nice}, vcstools_version=vcstools_version, submit=False, outfile=batch_dir+check_batch+"_0.out", queue='gpuq', export="NONE") #module_list = ["module switch PrgEnv-cray PrgEnv-gnu", "python/3.6.3", # "numpy/1.13.3", "mwa-voltage/master", "mpi4py", "cfitsio"] module_list = ["mwa-voltage/master", "mpi4py"] commands = [] commands.append("oldcount=0") commands.append("let newcount=$oldcount+1") commands.append("if [ ${newcount} -gt 10 ]; then") commands.append("echo \"Tried ten times, this is silly. Aborting here.\";exit") commands.append("fi") commands.append("sed -i -e \"s/newcount=${{oldcount}}/newcount=${{newcount}}/\" {0}".\ format(batch_dir+check_batch+".batch")) commands.append("sed -i -e \"s/_${{oldcount}}.out/_${{newcount}}.out/\" {0}".\ format(batch_dir+check_batch+".batch")) commands.append("sbatch -d afterany:${{SLURM_JOB_ID}} {0}".\ format(batch_dir+check_batch+".batch")) recombine_command = "-o {0} -s {1} -w {2} -e {3}".format(obsid, time_to_get, data_dir, recombine_binary) commands.append("srun --export=all {0} {1}".format(recombine, recombine_command)) submit_slurm(recombine_batch, commands, batch_dir=batch_dir, module_list=module_list, slurm_kwargs={"time": "06:00:00", "nodes": str(nodes), "ntasks-per-node": jobs_per_node, "nice": nice}, vcstools_version=vcstools_version, outfile=batch_dir+recombine_batch+"_1.out", queue='gpuq', export="NONE")
#if args.execpath: # execpath = args.execpath #Load computer dependant config file comp_config = load_config_file() if args.work_dir: logger.warning("YOU ARE MESSING WITH THE DEFAULT DIRECTORY STRUCTURE " "FOR PROCESSING -- BE SURE YOU KNOW WHAT YOU ARE DOING!") sleep(5) data_dir = product_dir = "{0}/{1}".format(args.work_dir, args.obs) else: data_dir = '{0}{1}'.format(comp_config['base_data_dir'],args.obs) product_dir = '{0}{1}'.format(comp_config['base_data_dir'],args.obs) batch_dir = "{0}/batch".format(product_dir) mdir(data_dir, "Data", gid=comp_config['gid']) mdir(product_dir, "Products", gid=comp_config['gid']) mdir(batch_dir, "Batch", gid=comp_config['gid']) metafits_file = "{0}/{1}_metafits_ppds.fits".format(data_dir, args.obs) # TODO: modify metafits downloader to not just do a trivial wget logger.info("Processing Obs ID {0} from GPS times {1} till {2}".\ format(args.obs, args.begin, args.end)) if args.mode == 'download_ics': logger.info("Mode: {0}".format(args.mode)) vcs_download(args.obs, args.begin, args.end, args.increment, data_dir, product_dir, args.parallel_dl, ics=True, vcstools_version=args.vcstools_version, nice=args.nice) elif args.mode == 'download':
def create_dpp_dir(kwargs): dpp_dir = join(comp_config["base_data_dir"], str(kwargs["obsid"]), "dpp") mdir(dpp_dir, dpp_dir)
logger.debug(obs_chans_reordered) logger.debug(cal1_chans) logger.debug(cal1_chans_reordered) logger.debug(cal2_chans) logger.debug(cal2_chans_reordered) # Check input calbration obs have all the frequency channels we need for fc in obs_chans: if not (fc in cal1_chans or fc in cal2_chans): logger.error( f"Frequency channel {fc} is not found in either of the calibration obs. Exiting" ) sys.exit(1) # Make output dir mdir(out_dir, 'Combined Calibration', gid=comp_config['gid']) # Check flagged*.txt files are the same and move them to new directory if not filecmp.cmp(f"{cal_base_dir}/{args.cal1}/rts/flagged_tiles.txt", f"{cal_base_dir}/{args.cal1}/rts/flagged_tiles.txt"): logger.warn(f"{cal_base_dir}/{args.cal1}/rts/flagged_tiles.txt and {cal_base_dir}/{args.cal1}"+\ "/rts/flagged_tiles.txt are different. It is safer to recalibrate with identical flags") copyfile(f"{cal_base_dir}/{args.cal1}/rts/flagged_tiles.txt", f"{out_dir}/flagged_tiles.txt") if not filecmp.cmp(f"{cal_base_dir}/{args.cal1}/rts/flagged_channels.txt", f"{cal_base_dir}/{args.cal1}/rts/flagged_channels.txt"): logger.warn(f"{cal_base_dir}/{args.cal1}/rts/flagged_channels.txt and {cal_base_dir}/{args.cal1}"+\ "/rts/flagged_channels.txt are different. It is safer to recalibrate with identical flags") copyfile(f"{cal_base_dir}/{args.cal1}/rts/flagged_channels.txt", f"{out_dir}/flagged_channels.txt")