def _search_files(self): """ Search for the data-files. The value of `self.inputs['job_name']` is used to compose the glob search pattern. It is split into parts separated by '_'. The first part should (in principle) be identical to the MAC/SAS observation ID (e.g., L29066). The second (optional) part specifies the sub-array-pointing(e.g., 1); it defaults to 0. """ job_name_parts = self.inputs['job_name'].split('_') job = job_name_parts[0] sap = 0 try: errmsg = ( "Job-name part indicating sub-array-pointing index is %s, " "defaulting to 0") sap = int(job_name_parts[1]) except IndexError: self.logger.debug(errmsg % "missing") except ValueError: self.logger.warn(errmsg % "non-numeric") ms_pattern = os.path.join( self.inputs['observation_dir'], '%s_SAP%03d_SB???_uv.MS{,.dppp}' % (job, sap)) self.logger.debug("Searching for data files: %s" % ms_pattern) data = findFiles(ms_pattern, '-1d') return list(zip(data[0], data[1]))
def tally_data_map(data, glob, logger=None): """ Verify that the file specified in the data map `data` exist on the cluster. The glob pattern `glob` should contain the pattern to be used in the search. This function will return a list of booleans: True for each item in `data` that is present on the cluster; False otherwise. This method is deprecated, because the new data-map file keep track of the `skip` attribute of each data product in the data-map. """ import os from lofar.mstools import findFiles # Determine the directories to search. Get unique directory names from # `data` by creating a set first. dirs = list(set(os.path.dirname(d.file) for d in data)) # Compose the filename glob-pattern. glob = ' '.join(os.path.join(d, glob) for d in dirs) # Search the file on the cluster using the glob-pattern; turn them into a # list of tuples. if logger: logger.debug("Searching for file: %s" % glob) found = zip(*findFiles(glob, '-1d')) # Return a mask containing True if file exists, False otherwise return [(f.host, f.file) in found for f in data]
def _search_files(self): """ Search for the data-files. The value of `self.inputs['job_name']` is used to compose the glob search pattern. It is split into parts separated by '_'. The first part should (in principle) be identical to the MAC/SAS observation ID (e.g., L29066). The second (optional) part specifies the sub-array-pointing(e.g., 1); it defaults to 0. """ job_name_parts = self.inputs['job_name'].split('_') job = job_name_parts[0] sap = 0 try: errmsg = ( "Job-name part indicating sub-array-pointing index is %s, " "defaulting to 0" ) sap = int(job_name_parts[1]) except IndexError: self.logger.debug(errmsg % "missing") except ValueError: self.logger.warn(errmsg % "non-numeric") ms_pattern = os.path.join( self.inputs['observation_dir'], '%s_SAP%03d_SB???_uv.MS{,.dppp}' % (job, sap) ) self.logger.debug("Searching for data files: %s" % ms_pattern) data = findFiles(ms_pattern, '-1d') return zip(data[0], data[1])