Example #1
0
def remove_raw(obs):
    comp_config = load_config_file()
    raw_folder = os.path.join(comp_config['base_data_dir'], str(obs), "raw")
    combined_folder = os.path.join(comp_config['base_data_dir'], str(obs), "combined")
    
    raw_files = False
    tar_files = False
    combined_files = False
    ics_files = False
    for file in glob.iglob("{0}/*".format(raw_folder)):
        if re.search('dat', file):
            raw_files = True
    for file in glob.iglob("{0}/*".format(combined_folder)):
        if re.search('tar', file):
            tar_files = True
        if re.search('ch\d{3}', file):
            combined_files = True
        if re.search('ics', file):
            ics_files = True
    
    if raw_files:
        munlink_files(raw_folder, "dat")
    if tar_files:
        munlink_files(combined_folder, "tar")
    if combined_files:
        munlink_files(combined_folder, "ch\d{3}")
    if ics_files:
        munlink_files(combined_folder, "ics")
Example #2
0
def stokes_fold(run_params):
    """
    Launches the stokes_fold part of the data processing pipeling

    Parameters:
    -----------
    run_params: object
        The run_params object defined by data_processing_pipeline
    """
    launch_line = stokes_launch_line(run_params)
    commands = [launch_line]
    name = "Stokes_Fold_init_{0}_{1}".format(run_params.pulsar,
                                             run_params.obsid)
    comp_config = load_config_file()
    batch_dir = "{0}{1}/batch/".format(comp_config['base_product_dir'],
                                       run_params.obsid)

    job_id = submit_slurm(name, commands,\
                        batch_dir=batch_dir,\
                        slurm_kwargs={"time": "00:10:00"},\
                        module_list=["mwa_search/{0}".format(run_params.mwa_search),\
                                    "dspsr/master", "psrchive/master"],\
                        submit=True, vcstools_version="{0}".format(run_params.vcs_tools))
    logger.info("Job successfully submitted: {0}".format(name))
    return job_id
Example #3
0
def check_recombine(obsID, directory=None, required_size=327680000, \
                        required_size_ics=30720000, startsec=None, n_secs=None):
    '''
    Checks that the number of files in directory (/astro/mwavcs/vcs/[obsID]/combined/) is ....
    as that found on the archive and also checks that all files have the same size (327680000 by default).
    '''
    comp_config = load_config_file()
    if not directory:
        directory = os.path.join(comp_config['base_data_dir'], str(obsID),
                                 "combined")
    base = "\n Checking file size and number of files for obsID {0} in {1} for ".format(
        obsID, directory)
    n_secs = n_secs if n_secs else 1
    logger.info(base +
                "gps times {0} to {1}".format(startsec, startsec + n_secs -
                                              1) if startsec else base +
                "the whole time range.")
    required_size = required_size
    # we need to get the number of unique seconds from the file names
    files = np.array(get_files(obsID))
    mask = np.array(['.dat' in file for file in files])
    if not startsec:
        times = [time[11:21] for time in files[mask]]
        n_secs = len(set(times))
        command = "ls -l %s/*ch*.dat | ((tee /dev/fd/5 | wc -l >/dev/fd/4) 5>&1 | " %(directory) + \
            "awk '($5!=%s){print $9}' | tee >> %s/%s_all.txt | xargs rm -rf) 4>&1;" %(required_size, directory, obsID) + \
            "cat %s/%s_all.txt; rm -rf %s/%s_all.txt" %(directory, obsID, directory, obsID)
        output = subprocess.Popen([command],
                                  stdout=subprocess.PIPE,
                                  shell=True).stdout
    else:
        output = subprocess.Popen(["count=0;for sec in `seq -w %s %s `;do let count=${count}+`ls -l %s/*${sec}*ch*.dat | " %(startsec, startsec+n_secs-1, directory) + \
                                       "((tee /dev/fd/5 | wc -l >/dev/fd/4) 5>&1 | awk '($5!=%s) " %(required_size) + \
                                       "{print $9}' | tee >> %s/errors_%s.txt | xargs rm -rf) 4>&1`;done;" %(directory,startsec) +\
                                       "echo ${count}; cat %s/errors_%s.txt;rm -rf %s/errors_%s.txt" %(directory,startsec,directory,startsec)],
                                  stdout=subprocess.PIPE, shell=True).stdout

    output = output.readlines()
    files_in_dir = int(output[0].strip())

    expected_files = n_secs * 25
    error = False
    error, n_ics = check_recombine_ics(directory=directory, \
                                           startsec=startsec, n_secs=n_secs, required_size=required_size_ics)
    files_in_dir += n_ics
    if not files_in_dir == expected_files:
        logger.error("We have {0} files but expected {1}".format(
            files_in_dir, expected_files))
        error = True
    for line in output[1:]:
        if b'dat' in line:
            logger.warning("Deleted {0} due to wrong size.".format(
                line.strip()))
            error = True
    if not error:
        logger.info("We have all {0} files as expected.".format(files_in_dir))
    return error
Example #4
0
def opt_parser(loglevels):
    comp_config = load_config_file()
    parser = argparse.ArgumentParser(
        description="scripts to check sanity of downloads and recombine.")
    parser.add_argument("-m", "--mode", type=str, choices=['download','recombine'],\
                          help="Mode you want to run: download, recombine", dest='mode', default=None)
    parser.add_argument("-d", "--data_type", type=str, choices=['11','15','16', 'raw','ics','tar_ics'],\
                          help="Only necessary when checking downloads. Types refer to those as definded " + \
                            "in voltdownload.py: 11 = Raw, 15 = ICS only, 16 = ICS and tarballs of recombined data.", \
                            dest='data_type', default=None)
    parser.add_argument("-o", "--obs", metavar="OBS ID", type=int, dest='obsID',\
                            help="Observation ID you want to process [no default]", default=None)
    parser.add_argument("-b", "--begin", metavar="start", type=int, dest='begin',\
                            help="gps time of first file to ckeck on [default=%(default)s]",\
                            default=None)
    parser.add_argument("-e", "--end", metavar="stop", type=int, dest='end',\
                            help="gps time of last file to ckeck on [default=%(default)s]",\
                            default=None)
    parser.add_argument(
        "-a",
        "--all",
        action="store_true",
        default=False,
        help=
        "Perform on entire observation span. Use instead of -b & -e. [default=%(default)s]"
    )
    parser.add_argument("-i", "--increment", metavar="time increment", type=int, \
                            dest='increment',\
                            help="Effectively the number of seconds to ckeck for " +\
                            "starting at start time [default=%(default)s]",\
                            default=None)
    parser.add_argument("-s", "--size", type=int, dest='size',\
                          help="The files size in bytes that you expect all files" +\
                          " to have. Per default will figure this out from files on he archive" +\
                            "We expect 253440000 (download raw), 327680000" +\
                          " (recombined, not ics), 7865368576 (tarballs)", default=None)
    parser.add_argument("-S", "--size_ics", type=int, help='Size in bytes that' +\
                            "you expect the ics files to have. Default = %(default)s", \
                            dest='size_ics', default=30720000)
    parser.add_argument('-w', '--work_dir', type=str, dest='work_dir',\
                            help="Directory to check the files in. " +\
                                 "Default is {0}[obsID]/[raw,combined]".format(comp_config['base_data_dir']))
    parser.add_argument("-V",
                        "--version",
                        action="store_true",
                        help="Print version and quit")
    parser.add_argument("-L",
                        "--loglvl",
                        type=str,
                        help="Logger verbosity level. Default: INFO",
                        choices=loglevels.keys(),
                        default="INFO")
    return parser.parse_args()
def check_data(obsid, beg=None, dur=None, base_dir=None):
    """
    Checks to see if all of the recombined files exist on disk

    Parameters:
    -----------
    obsid: int
        The observation ID to check
    beg: int
        OPTIONAL - The beginning time of the files to check. If none, will use entire obs. Default: None
    dur: int
        OPTIONAL - The duration in seconds to check since the beginning time. If none, will use entire obs. Default: None
    base_dir: string
        OPTIONAL - The base directory to use. If none, will load from config. Default: None

    Returns:
    ---------
    check: boolean
        True - all files are on disk. False - not all files are on disk
    """
    if base_dir is None:
        comp_config = load_config_file()
        base_dir = comp_config['base_data_dir']
    comb_dir = "{0}{1}/combined".format(base_dir, obsid)

    if not isinstance(beg, int):
        beg = int(beg)
    if not isinstance(dur, int):
        dur = int(dur)

    #Check to see if the files are combined properly
    if beg is not None and dur is not None:
        logger.info("Checking recombined files beginning at {0} and ending at {1}. Duration: {2} seconds"\
                    .format(beg, (beg+dur), dur))
        error = checks.check_recombine(obsid,
                                       startsec=beg,
                                       n_secs=dur,
                                       directory=comb_dir)
    else:
        logger.warn(
            "No start time information supplied. Comparing files with full obs"
        )
        error = checks.check_recombine(obsid, directory=comb_dir)

    if error == True:
        check = False
    else:
        check = True
    return check
Example #6
0
def remove_beamformed(obs, pointing=None):
    comp_config = load_config_file()
    pointing_folder = os.path.join(comp_config['base_data_dir'], str(obs), "pointings")
    if not pointing:
        authority = ('No pointing specified, would you like to remove all pointings for this observation?')
        if (authority == "Y") or (authority == "y"):
            pointings = glob.glob("{0}/*:*:*:*:*")
            if not pointings:
                logger.error("No valid pointings in {0}. Exiting...")
                sys.exit(0)
            else:
                for pointing in pointings:
                    logger.info("Checking if pointing {0} has been uploaded to MWA Pulsar Database...".format(pointing))
                    
            # Upload to MWA Pulsar Database if not there already
            # Remove each pointing
    return
Example #7
0
def binfind(run_params):
    """
    Launches the binfinding part of the data processing pipeline

    Parameters:
    -----------
    run_params: object
        The run_params object defined by data_proces_pipeline
    """
    launch_line = binfinder_launch_line(run_params)
    commands = [launch_line]
    #decide how much time to allocate based on number of pointings
    n_pointings = len(run_params.pointing_dir)
    if n_pointings < 100:
        time = "00:30:00"
    elif n_pointings < 400:
        time = "02:00:00"
    elif n_pointings < 1000:
        time = "05:00:00"
    else:
        time = "10:00:00"

    name = "bf_initiate_{0}_{1}".format(run_params.pulsar, run_params.obsid)
    logger.info("Submitting binfinder script:")
    logger.info("")
    logger.info("Job Name: {}".format(name))
    comp_config = load_config_file()
    batch_dir = "{0}{1}/batch/".format(comp_config['base_product_dir'],
                                       run_params.obsid)
    job_id = submit_slurm(name, commands,\
                        batch_dir=batch_dir,\
                        slurm_kwargs={"time": time},\
                        module_list=['mwa_search/{0}'.format(run_params.mwa_search),\
                                    'presto/no-python'],\
                        submit=True, vcstools_version="{0}".format(run_params.vcs_tools))

    logger.info("Job successfully submitted: {0}".format(name))
    return job_id
Example #8
0
    def __init__(self,
                 obsid,
                 cal_obsid,
                 metafits,
                 srclist,
                 n_int_bins=6,
                 datadir=None,
                 outdir=None,
                 offline=False,
                 beam_model="FEE2016",
                 vcstools_version="master"):
        self.obsid = obsid  # target observation ID
        self.cal_obsid = cal_obsid  # calibrator observation ID
        self.offline = offline  # switch to decide if offline correlated data or not
        self.utctime = None  # start UTC time
        self.nfine_chan = None  # number of fine channels
        self.channels = None  # actual channel numbers
        self.fine_cbw = None  # fine channel bandwidth
        self.max_frequency = None  # the maximum frequency used by the RTS to calculate decorrelation
        self.corr_dump_time = None  # correlator dump times (i.e. integration time)
        self.n_dumps_to_average = None  # number of integration times to use for calibration
        self.PB_HA = None  # primary beam HA
        self.PB_DEC = None  # primary beam Dec
        self.freq_base = None  # frequency base for RTS
        self.JD = None  # time base for RTS
        self.metafits_RTSform = None  # modified metafits file name for RTS
        self.ArrayPositionLat = -26.70331940  # MWA latitude
        self.ArrayPositionLong = 116.6708152  # MWA longitude
        self.n_integration_bins = n_int_bins  # number of visibility integration groups for RTS
        self.base_str = None  # base string to be written to file, will be editted by RTScal
        self.beam_model = beam_model  # The beam model to use for the calibration solutions. Either 'ANALYTIC' or 'FEE2016'
        self.beam_model_bool = None
        self.vcstools_version = vcstools_version

        comp_config = load_config_file()

        # Check to make sure paths and files exist:
        # First, check that the actual data directory exists
        if datadir is None:
            # use the default data path
            self.data_dir = os.path.join(comp_config['base_data_dir'],
                                         str(obsid), "cal", str(cal_obsid),
                                         "vis")
            logger.info("Using default calibrator data path: {0}".format(
                self.data_dir))
            if os.path.exists(os.path.realpath(self.data_dir)) is False:
                errmsg = "Default data directory ({0}) does not exist. Aborting.".format(
                    self.data_dir)
                logger.error(errmsg)
                raise CalibrationError(errmsg)
        elif os.path.isdir(datadir):
            self.data_dir = os.path.realpath(datadir)
            logger.info(
                "Using the user specified data directory: {0}".format(datadir))
        else:
            errmsg = "Data directory ({0}) does not exist. Aborting.".format(
                datadir)
            logger.error(errmsg)
            raise CalibrationError(errmsg)

        # Then check if the specified output and batch directories exists
        if outdir is None:
            # this is the default
            logger.info("Assuming default directory structure...")
            self.output_dir = os.path.join(comp_config['base_data_dir'],
                                           str(self.obsid), "cal",
                                           str(self.cal_obsid), "rts")
            self.batch_dir = os.path.join(comp_config['base_data_dir'],
                                          str(self.obsid), "batch")
            logger.debug("RTS output directory is {0}".format(self.output_dir))
            logger.debug("Batch directory is {0}".format(self.batch_dir))
            mdir(self.output_dir, "RTS", gid=comp_config['gid'])
            mdir(self.batch_dir, "Batch", gid=comp_config['gid'])
        else:
            # mdir handles if the directory already exists
            self.output_dir = os.path.realpath(outdir + "/rts")
            self.batch_dir = os.path.realpath(outdir + "/batch")
            logger.warning("Non-standard RTS output path: {0}".format(
                self.output_dir))
            logger.warning("Non-standard batch directory path: {0}".format(
                self.batch_dir))
            mdir(self.output_dir, "RTS", gid=comp_config['gid'])
            mdir(self.batch_dir, "Batch", gid=comp_config['gid'])

        # Then check that the metafits file exists
        if os.path.isfile(metafits) is False:
            # file doesn't exist
            errmsg = "Given metafits file ({0}) does not exist.".format(
                metafits)
            logger.error(errmsg)
            raise CalibrationError(errmsg)
        elif "_ppds" not in metafits:
            # file doesn't have the correct naming convention
            errmsg = "Looks like you have an old-style metafits. " \
                     "You'll need to download the new version, which is named like: " \
                     "{0}_metafits_ppds.fits.".format(obsid)
            logger.error(errmsg)
            raise CalibrationError(errmsg)
        else:
            logger.info("Metafits file exists and is named correctly.")
            self.metafits = os.path.realpath(metafits)
            logger.debug("    {0}".format(self.metafits))

        # the check that the source list exists
        if os.path.isfile(srclist) is False:
            # file doesn't exist
            errmsg = "Given source list file ({0}) does not exist.".format(
                srclist)
            logger.error(errmsg)
            raise CalibrationError(errmsg)
        else:
            logger.info("Checking source list file exists... Ok")
            self.source_list = os.path.realpath(srclist)

        # Check the 'beam_model' is one of the correct choices
        choices = ("FEE2016", "ANALYTIC")
        if self.beam_model not in choices:
            errmsg = "Given beam model: {0} not an available choice: {1}".format(
                self.beam_model, choices)
            logger.error(errmsg)
            raise CalibrationError(errmsg)
        else:
            logger.info("Using {0} beam model for calibration solution".format(
                self.beam_model))
            self.beam_model_bool = int(
                bool(self.beam_model ==
                     "ANALYTIC"))  #produces 1 for ANALYTIC, 0 for FEE2016

        # set some RTS flags based on if we have offline correlated data or not
        logger.info("Setting RTS data input flags...")
        if self.offline:
            self.useCorrInput = 1
            self.readDirect = 0
            logger.debug("Offline correlation")
        else:
            self.useCorrInput = 0
            self.readDirect = 1
            logger.debug("Online correlation")
Example #9
0
def submit_slurm(name,
                 commands,
                 tmpl=SLURM_TMPL,
                 slurm_kwargs=None,
                 module_list=[],
                 vcstools_version="master",
                 batch_dir="batch/",
                 depend=None,
                 depend_type='afterok',
                 submit=True,
                 outfile=None,
                 queue="cpuq",
                 export="NONE",
                 gpu_res=None,
                 mem=1024,
                 cpu_threads=1,
                 temp_mem=None,
                 nice=0,
                 shebag='#!/bin/bash -l',
                 module_dir=None,
                 load_vcstools=True):
    """
    Making this function to cleanly submit SLURM jobs using a simple template.

    Parameters
    ----------
    name : str
        The base name that is used to create the "`name`.batch" and "`name`.out" files.

    commands : list of strs
        The actual bash script commands you wnat to run.
        Expects a list where each element is a single line of the bash script.

    tmpl : str
        A template header string with format place holders: export, outfile,
        cluster, header and script.
        This is used to create the final string to be written to the job script.
        For this function, it is required to be SLURM compliant.
        Default: `SLURM_TMPL`

    slurm_kwargs : dict [optional]
        A dictionary of SLURM keyword, value pairs to fill in whatever is not
        in the template supplied to `tmpl`.
        Default: `{}` (empty dictionary, i.e. no additional header parameters)

    module_list : list of str [optional]
        A list of module names (including versions if applicable) that will
        be included in the header for the batch
        scripts. e.g. ["vcstools/master", "mwa-voltage/master", "presto/master"] would append
            module load vcstools/master
            module load mwa-voltage/master
            module load presto/master
        to the header of the batch script. This can also invoke "module use ..." commands.
        NOTE: /group/mwa/software/modulefiles is used and vcstools/master is loaded by default.

    vcstools_version :  str
        The version of vcstools to load. Default: master.

    batch_dir : str [optional]
        The LOCAL directory where you want to write the batch scripts
        (i.e. it will write to `$PWD/batch_dir`).
        Default: "batch/"

    depend : list or None [optional]
        A list of the SLURM job IDs that your would like this job to depend on.
        If `None` then it is assumed there is no dependency on any other job.
        Default: `None`

    depend_type : str [optional]
        The type of slurm dependancy required. For example if you wanted the
        job to run after the jobs have been terminated use 'afterany'.
        Default: "afterok"

    submit : boolean [optional]
        Whether to write and submit the job scripts (`True`) or only write the scripts (`False`).
        Default: `True`

    outfile : str [optional]
        The output file name if "`name`.out" is not desirable.
        Default: `None` (i.e. "`batch_dir`/`name`.out")

    queue : str [optional]
        The type of queue you require (cpuq, gpuq or copyq) then the script will
        choose the correct partitions and clusters for the job to run on
        Default: "cpuq"

    export : str [optional]
        Switch that lets SLURM use your login environment on the compute
        nodes ("ALL") or not ("NONE").
        Default: "None"

    gpu_res : int [optional]
        Number of GPUs that the SLURM job will reserve.
        Default: "None"

    mem : int [optional]
        The MB of ram required for your slurm job.
        Default: 8192

    cpu_threads : int [optional]
        The number of cpu threads required for your slurm job.
        Default: 1


    Returns
    -------
    jobid : int
        The unique SLURM job ID associated with the submitted job.
    """
    if slurm_kwargs is None:
        slurm_kwargs = {}

    #Work out which partition and cluster to use based on the supercomputer
    #(in config file) and queue required
    comp_config = load_config_file()
    if queue == 'cpuq':
        cluster = comp_config['cpuq_cluster']
        partition = comp_config['cpuq_partition']
    elif queue == 'gpuq':
        cluster = comp_config['gpuq_cluster']
        partition = comp_config['gpuq_partition']
        if gpu_res is None:
            # No gpus reserved so change it to a default of 1
            gpu_res = 1
    elif queue == 'copyq':
        cluster = comp_config['copyq_cluster']
        partition = comp_config['copyq_partition']
    elif queue == 'zcpuq':
        # Download and checks should be done on Zeus's cpuq. This will only work
        # on Galaxy as the Ozstar workflow is different
        cluster = comp_config['zcpuq_cluster']
        partition = comp_config['zcpuq_partition']
    else:
        logger.error("No queue found, please use cpuq, gpuq or copyq")

    header = []

    if batch_dir.endswith("/") is False:
        batch_dir += "/"

    # define file names (both the batch job file and the output file)
    jobfile = batch_dir + name + ".batch"
    if not outfile:
        outfile = batch_dir + name + ".out"

    # create the header from supplied arguments
    for k, v in slurm_kwargs.items():
        if len(k) > 1:
            k = "--" + k + "="
        else:
            k = "-" + k + " "

        header.append("#SBATCH {0}{1}".format(k, v))

    # check if there are dependencies, and if so include that in the header
    if depend is not None:
        #assumes append is a list but if not will make an educated guess of how to reformat it
        if isinstance(depend, int):
            #assume it's ben given a single job id
            header.append("#SBATCH --dependency={0}:{1}".format(
                depend_type, depend))
        if isinstance(depend, str):
            if ":" in depend:
                #assume it has been given an already formated string
                if depend.startswith(":"):
                    depend = depend[1:]
            #or a single jobid
            header.append("#SBATCH --dependency={0}:{1}".format(
                depend_type, depend))
        if isinstance(depend, list):
            depend_str = ""
            for job_id in depend:
                depend_str += ":" + str(job_id)
            header.append("#SBATCH --dependency={0}{1}".format(
                depend_type, depend_str))

    # add a gpu res to header
    if gpu_res is not None:
        header.append('#SBATCH --gres=gpu:{0}'.format(gpu_res))

    # add temp SSD memory to combat I/O issues. Only availble on Ozstar
    hostname = socket.gethostname()
    if temp_mem is not None:
        header.append("#SBATCH --tmp={0}GB".format(temp_mem))

    if module_dir is None:
        module_dir = comp_config['module_dir']

    # now join the header into one string
    header = "\n".join(header)

    # construct the module loads
    if load_vcstools:
        modules = ["module load vcstools/{0}\n".format(vcstools_version)]
    else:
        modules = []
    switches = []
    for m in module_list:
        if m == "vcstools":
            # don't do anything as vcstools is loaded automatically
            continue
        if "module switch" in m:
            # if a module switch command is included rather than just a module name, then add it to a separate list
            switches.append(m)
        elif "module" in m:
            modules.append("{0}\n".format(m))
        else:
            modules.append("module load {0}\n".format(m))

    # join the module loads and switches into a single string
    switches = "\n".join(switches)
    modules = "\n".join(modules)

    # join the commands into a single string
    commands = "\n".join(commands)

    # some little hacks to make jobs work on the shanghai server
    if hostname.startswith('x86') or hostname.startswith('arm'):
        if vcstools_version == 'master':
            vcstools_version = 'cpu-master'
        if export == "NONE":
            export = "ALL"
        if shebag == "#!/bin/bash -l":
            shebag = "#!/bin/bash"

    # format the template script
    tmpl = tmpl.format(shebag=shebag,
                       script=commands,
                       outfile=outfile,
                       header=header,
                       switches=switches,
                       modules=modules,
                       cluster=cluster,
                       partition=partition,
                       export=export,
                       account=comp_config['group_account'][queue],
                       module_dir=module_dir,
                       threads=cpu_threads,
                       mem=mem,
                       nice=nice)

    # write the formatted template to the job file for submission
    with open(jobfile, "w") as fh:
        fh.write(tmpl)

    # submit the jobs
    batch_submit_line = "sbatch {0}".format(jobfile)
    jobid = None
    if submit:
        submit_cmd = subprocess.Popen(batch_submit_line,
                                      shell=True,
                                      stdout=subprocess.PIPE)
        for line in submit_cmd.stdout:
            if b"Submitted" in line:
                jobid = str(line.split(b" ")[3].decode())
        if jobid is None:
            logger.debug(batch_submit_line)
            logger.debug(submit_cmd.stdout)
            return
        else:
            return jobid
    else:
        return
Example #10
0
                        default="INFO")

    args = parser.parse_args()

    # set up the logger for stand-alone execution
    logger.setLevel(loglevels[args.loglvl])
    ch = logging.StreamHandler()
    ch.setLevel(loglevels[args.loglvl])
    formatter = logging.Formatter(
        '%(asctime)s  %(filename)s  %(name)s  %(lineno)-4d  %(levelname)-9s :: %(message)s'
    )
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    logger.propagate = False

    comp_config = load_config_file()
    if args.base_dir is None:
        data_dir = os.path.join(comp_config['base_product_dir'],
                                str(args.obsID), "combined")
        ics_dir = os.path.join(comp_config['base_product_dir'],
                               str(args.obsID), "ics")
    else:
        data_dir = "{base_dir}/{obsid}/combined".format(base_dir=args.base_dir,
                                                        obsid=args.obsID)
        ics_dir = "{base_dir}/{obsid}/ics".format(base_dir=args.base_dir,
                                                  obsid=args.obsID)

    logger.info("Data directory: {data_dir}".format(data_dir=data_dir))
    logger.info("ICS output directory: {ics_dir}".format(ics_dir=ics_dir))

    data_files = sorted(
Example #11
0
def check_download(obsID,
                   directory=None,
                   startsec=None,
                   n_secs=None,
                   data_type='raw'):
    '''
    Checks that the number of files in directory (default is /astro/mwavcs/vcs/[obsID]/raw/) is the same
    as that found on the archive and also checks that all files have the same size (253440000 for raw, 7864340480 for recombined tarballs by default).
    '''
    comp_config = load_config_file()
    if not data_type in ['raw', 'tar_ics', 'ics']:
        logger.error("Wrong data type given to download check.")
        return True
    if not directory:
        directory = os.path.join(
            comp_config['base_data_dir'], str(obsID),
            "raw") if data_type == 'raw' else os.path.join(
                comp_config['base_data_dir'], str(obsID), "combined")
    base = "\n Checking file size and number of files for obsID {0} in {1} for ".format(
        obsID, directory)
    n_secs = n_secs if n_secs else 1
    logger.info(base +
                "gps times {0} to {1}".format(startsec, startsec + n_secs -
                                              1) if startsec else base +
                "the whole time range.")

    # put files in
    try:
        files, suffix, required_size = get_files_and_sizes(obsID,
                                                           data_type,
                                                           mintime=startsec,
                                                           maxtime=startsec +
                                                           n_secs)
    except:
        return True

    if not startsec:
        n_files_expected = len(files)
        command = "ls -l %s/*%s | ((tee /dev/fd/5 | wc -l >/dev/fd/4) 5>&1 | " %(directory, suffix) + \
            "awk '($5!=%s){print \"file \" $9 \" has size \" $5 \" (expected %s)\"}' >> %s/%s_all.txt) 4>&1;" %(required_size, required_size,directory, obsID) + \
            "cat %s/%s_all.txt; rm -rf %s/%s_all.txt" %(directory, obsID, directory, obsID)
        output = subprocess.Popen([command],
                                  stdout=subprocess.PIPE,
                                  shell=True).stdout
    else:
        n_files_expected = 0
        #remove stray metafits from list that causes int errors
        files = [x for x in files if "metafits" not in x]
        times = [int(time[11:21]) for time in files]
        for sec in range(startsec, startsec + n_secs):
            n_files_expected += times.count(sec)
        output = subprocess.Popen(["count=0;for sec in `seq -w %s %s `;do let count=${count}+`ls -l %s/*${sec}*%s | " %(startsec, startsec+n_secs-1, directory, suffix) + \
                                       "((tee /dev/fd/5 | wc -l >/dev/fd/4) 5>&1 | awk '($5!=%s) " %(required_size) + \
                                       "{print \"file \" $9 \" has size \" $5 \" (expected %s)\"}' >> %s/errors_%s.txt) 4>&1`;done;" %(required_size,directory,startsec) +\
                                       "echo ${count}; cat %s/errors_%s.txt;rm -rf %s/errors_%s.txt" %(directory,startsec,directory,startsec)],
                                  stdout=subprocess.PIPE, shell=True).stdout
    output = output.readlines()
    files_in_dir = int(output[0].strip())

    error = False

    # in case we're checking for downloaded tarballs also need to check ics-files.
    if data_type == 'tar_ics':
        logger.info("Now checking ICS files")
        error, n_ics = check_recombine_ics(
            directory=directory,
            startsec=startsec,
            n_secs=n_secs,  #n_files_expected,
            obsID=obsID)
        n_files_expected *= 2
        files_in_dir += n_ics

    if not files_in_dir == n_files_expected:
        logger.error("We have {0} files but expected {1}".format(
            files_in_dir, n_files_expected))
        error = True
    for line in output[1:]:
        if b'file' in line:
            logger.error(line)
            error = True
    if not error:
        logger.info("We have all {0} {1} files as expected.".format(
            files_in_dir, data_type))
    return error
def search_for_cal_srclist(obsid,
                           cal_id,
                           all_cal_returns=False,
                           all_srclist_returns=False):
    """
    Given an obsid, searches common locations for the rts folder(s) as well as the sourcelist(s)

    Parameters:
    -----------
    obsid: int
        The observation ID
    cal_id: int
        The calibrator ID
    all_cal_returns: boolean
        OPTIONAL - If true, will return all RTS directories found. Default: False
    all_srclist_returns: boolean
        OPTIONAL - If true, will return all sourcelist files found. Default: False

    Returns:
    --------
    cal_path: string
        The location of the calibrator path
    srclist: string
        The pathname of the sourcelist
    """
    comp_config = load_config_file()
    base_dir = comp_config['base_product_dir']
    cal_dir = os.path.join(base_dir, str(obsid), "cal", str(cal_id))
    cal_dirs = []
    srclists = []
    #search all subdirectories
    for root, dirs, files in os.walk(cal_dir):
        if "rts" in dirs:
            cal_dirs.append(os.path.join(root, "rts"))
        for f in files:
            if f.endswith(".txt") and "srclist" in f:
                srclists.append(os.path.join(root, f))

    #handle multiple rts folders with user input
    if not all_cal_returns and len(cal_dirs) > 1:
        valid = False
        while not valid:
            print("Multiple RTS files found. Please choose one")
            for i, a_dir in enumerate(cal_dirs):
                print("{0}: {1}".format(i + 1, a_dir))
            choice = int(
                input("Choose a number between 1 and {0}: ".format(
                    len(cal_dirs))))
            if choice >= 1 and choice <= len(cal_dirs):
                valid = True
                my_cal_dir = cal_dirs[choice - 1]
                print("Using RTS directory: {}".format(my_cal_dir))
                cal_dirs = [my_cal_dir]
            else:
                print("## Not a valid choice! ##")

    if not all_cal_returns and len(cal_dirs) == 1:
        cal_dirs = cal_dirs[0]

    #handle multiple sourcelist files with user input
    if not all_srclist_returns and len(srclists) > 1:
        valid = False
        print("Multiple sourcelist files found. Please choose one")
        while not valid:
            for i, a_file in enumerate(srclists):
                print("{0}: {1}".format(i + 1, a_file))
            choice = int(
                input("Choose a number between 1 and {0}: ".format(
                    len(srclists))))
            if choice >= 1 and choice <= len(srclists):
                valid = True
                my_srclist = srclists[choice - 1]
                print("Using sourcelist directory: {}".format(my_srclist))
                srclists = [my_srclist]
            else:
                print("## Not a valid choice! ##")

    if not all_srclist_returns and len(srclists) == 1:
        srclists = srclists[0]

    return cal_dirs, srclists