Example #1
0
 def _search_files(self):
     """
     Search for the data-files. The value of `self.inputs['job_name']` is
     used to compose the glob search pattern. It is split into parts
     separated by '_'. The first part should (in principle) be identical to
     the MAC/SAS observation ID (e.g., L29066). The second (optional) part
     specifies the sub-array-pointing(e.g., 1); it defaults to 0.
     """
     job_name_parts = self.inputs['job_name'].split('_')
     job = job_name_parts[0]
     sap = 0
     try:
         errmsg = (
             "Job-name part indicating sub-array-pointing index is %s, "
             "defaulting to 0")
         sap = int(job_name_parts[1])
     except IndexError:
         self.logger.debug(errmsg % "missing")
     except ValueError:
         self.logger.warn(errmsg % "non-numeric")
     ms_pattern = os.path.join(
         self.inputs['observation_dir'],
         '%s_SAP%03d_SB???_uv.MS{,.dppp}' % (job, sap))
     self.logger.debug("Searching for data files: %s" % ms_pattern)
     data = findFiles(ms_pattern, '-1d')
     return list(zip(data[0], data[1]))
Example #2
0
def tally_data_map(data, glob, logger=None):
    """
    Verify that the file specified in the data map `data` exist on the cluster.
    The glob pattern `glob` should contain the pattern to be used in the search.
    This function will return a list of booleans: True for each item in `data`
    that is present on the cluster; False otherwise.
    This method is deprecated, because the new data-map file keep track of the
    `skip` attribute of each data product in the data-map.
    """
    import os
    from lofar.mstools import findFiles

    # Determine the directories to search. Get unique directory names from
    # `data` by creating a set first.
    dirs = list(set(os.path.dirname(d.file) for d in data))

    # Compose the filename glob-pattern.
    glob = ' '.join(os.path.join(d, glob) for d in dirs)

    # Search the file on the cluster using the glob-pattern; turn them into a
    # list of tuples.
    if logger:
        logger.debug("Searching for file: %s" % glob)
    found = zip(*findFiles(glob, '-1d'))

    # Return a mask containing True if file exists, False otherwise
    return [(f.host, f.file) in found for f in data]
Example #3
0
def tally_data_map(data, glob, logger=None):
    """
    Verify that the file specified in the data map `data` exist on the cluster.
    The glob pattern `glob` should contain the pattern to be used in the search.
    This function will return a list of booleans: True for each item in `data`
    that is present on the cluster; False otherwise.
    This method is deprecated, because the new data-map file keep track of the
    `skip` attribute of each data product in the data-map.
    """
    import os
    from lofar.mstools import findFiles

    # Determine the directories to search. Get unique directory names from
    # `data` by creating a set first.
    dirs = list(set(os.path.dirname(d.file) for d in data))

    # Compose the filename glob-pattern.
    glob = ' '.join(os.path.join(d, glob) for d in dirs)

    # Search the file on the cluster using the glob-pattern; turn them into a
    # list of tuples.
    if logger:
        logger.debug("Searching for file: %s" % glob)
    found = zip(*findFiles(glob, '-1d'))

    # Return a mask containing True if file exists, False otherwise
    return [(f.host, f.file) in found for f in data]
Example #4
0
 def _search_files(self):
     """
     Search for the data-files. The value of `self.inputs['job_name']` is
     used to compose the glob search pattern. It is split into parts
     separated by '_'. The first part should (in principle) be identical to
     the MAC/SAS observation ID (e.g., L29066). The second (optional) part
     specifies the sub-array-pointing(e.g., 1); it defaults to 0.
     """
     job_name_parts = self.inputs['job_name'].split('_')
     job = job_name_parts[0]
     sap = 0
     try:
         errmsg = (
             "Job-name part indicating sub-array-pointing index is %s, "
             "defaulting to 0"
         )
         sap = int(job_name_parts[1])
     except IndexError:
         self.logger.debug(errmsg % "missing")
     except ValueError:
         self.logger.warn(errmsg % "non-numeric")
     ms_pattern = os.path.join(
         self.inputs['observation_dir'],
         '%s_SAP%03d_SB???_uv.MS{,.dppp}' % (job, sap)
     )
     self.logger.debug("Searching for data files: %s" % ms_pattern)
     data = findFiles(ms_pattern, '-1d')
     return zip(data[0], data[1])