def datacatalog_query(query, folder=None, site=None): from DataCatalog import DataCatalog if folder is None: folder = os.environ['LCATR_DATACATALOG_FOLDER'] if site is None: site = getSiteName() datacat = DataCatalog(folder=folder, site=site) return datacat.find_datasets(query)
def get_met_scan_data(sensor_id, pattern, root_folder='LSST/vendorData', site='slac.lca.archive', sort=False, description='Metrology Scan Files:'): try: folder = os.environ['LCATR_DATACATALOG_FOLDER'] except KeyError: folder = _folder(sensor_id, root_folder=root_folder) dc = DataCatalog(folder=folder, site=site) query = '&&'.join(('DATA_PRODUCT=="MET_SCAN"', 'TEST_CATEGORY=="MET"', 'LSST_NUM=="%(sensor_id)s"')) query = query % locals() datasets = dc.find_datasets(query) file_list = [] for item in datasets.full_paths(): if fnmatch.fnmatch(os.path.basename(item), pattern): file_list.append(item) if sort: file_list = sorted(file_list) siteUtils.print_file_list(description, file_list) return file_list
def find(self, FType=None, XtraOpts=None, testName=None, run=None): if "D" not in run: self.mirrorName = "INT-prod" self.db = "Prod" else: self.mirrorName = "INT-test" self.db = "Dev" if FType is not None: self.FType = FType if XtraOpts is not None: self.XtraOpts = XtraOpts if testName is not None: self.testName = testName if run is not None: self.run = run run_info = self.connections["connect"][self.db].getRunResults( run=self.run) focal_plane = run_info['experimentSN'] if 'CRYO' not in focal_plane.upper(): print("Run " + run + " not a BOT run") raise ValueError # check that this run is for full focal plane sourceMap = { 'INT-prod': 'SLAC-prod/prod/', 'INT-test': 'SLAC-test/test/', } use_latest_activity = False query = '' site = self.site folderList = [] # get the path to this run step_info = self.connections["connect"][self.db].getRunFilepaths( run=self.run) acq_step = "BOT_acq_sim" for steps in step_info: if "acq" in steps.lower(): acq_step = steps break if acq_step == "BOT_acq_sim": acq_step = self.testName file_base = "/LSST/mirror/" + sourceMap[self.mirrorName] + "LCA-10134_Cryostat/" + focal_plane + \ "/" + self.run + "/" + acq_step + "/v0/" folderList.append(file_base) if self.XtraOpts is not None: if query == '': query = self.XtraOpts else: query += "&&" + self.XtraOpts dsList = [] for folder in folderList: datacatalog = DataCatalog(folder=folder, site=site, use_newest_subfolder=use_latest_activity) datasets = datacatalog.find_datasets(query) if len(datasets) != 0: dsList.append(datasets) files = [] for ds in dsList: pathsList = ds.full_paths() for item in pathsList: if (self.FType is None) or \ (self.FType is not None and item.endswith(self.FType)): # if item not in files: if (self.testName is None) or \ (self.testName is not None and self.testName.upper() in item.upper()): files.append(item) return files
def find(self, mirrorName=None, FType=None, XtraOpts=None, testName=None, sensorId=None, run=None, outputFile=None, site=None, Print=None): if mirrorName is not None: self.mirrorName = mirrorName if FType is not None: self.FType = FType if XtraOpts is not None: self.XtraOpts = XtraOpts if testName is not None: self.testName = testName if sensorId is not None: self.sensorId = sensorId if outputFile is not None: self.outputFile = outputFile if site is not None: self.site = site if Print is not None: self.Print = Print if run is not None: self.run = run sourceMap = { 'BNL-prod': 'BNL-prod/prod/', 'BNL-test': 'BNL-test/test/', 'INT-prod': 'SLAC-prod/prod/', 'INT-test': 'SLAC-test/test/', 'vendorCopy-prod': 'SLAC-prod/prod/', 'vendorCopy-test': 'SLAC-test/test/', 'vendor': 'vendorData/', 'SAWG-BNL': 'BNL-SAWG/SAWG/' } folder = '/LSST/' use_latest_activity = False query = '' site = self.site use_query_eT = True if ('vendorCopy' in self.mirrorName or "INT" in self.mirrorName): site = "SLAC" elif (self.mirrorName == 'vendor'): folder = folder + \ sourceMap['vendor'] + \ self.CCDType.split('-')[0] + '/' + self.sensorId + '/' + self.db + '/' use_latest_activity = True site = "slac.lca.archive" use_query_eT = False elif (self.mirrorName == 'SAWG-BNL'): folder = folder + 'mirror/' + \ sourceMap[self.mirrorName] + self.CCDType + \ '/' + self.sensorId + '/' + self.testName use_latest_activity = True use_query_eT = False folderList = [] if use_query_eT is True: run_str = self.run if type(run_str) != str: run_str = str(self.run) kwds = {'run': run_str, 'stepName': self.testName} filePaths = self.connect.getRunFilepaths(**kwds) # get the unique directory paths for test in filePaths: for f in filePaths[test]: dirpath = os.path.dirname(f['virtualPath']) + '/' if dirpath not in folderList: if self.sensorId in os.path.basename(f['virtualPath']): folderList.append(dirpath) else: folderList.append(folder) if self.XtraOpts is not None: if query == '': query = self.XtraOpts else: query += "&&" + self.XtraOpts dsList = [] for folder in folderList: datacatalog = DataCatalog(folder=folder, site=site, use_newest_subfolder=use_latest_activity) datasets = datacatalog.find_datasets(query) if len(datasets) != 0: dsList.append(datasets) files = [] for ds in dsList: pathsList = ds.full_paths() for item in pathsList: if (self.FType is None) or \ (self.FType is not None and item.endswith(self.FType)): if item not in files: files.append(item) if self.Print: print("File paths for files at %s:" % site) for item in files: print(item) # Write file with list of found data files, if requested if self.outputFile is not None and len(datasets) > 0: print('Writing output file ', self.outputFile, '...') ofile = open(self.outputFile, 'w') for line in files: ofile.write(line + '\n') pass ofile.close() elif self.outputFile is not None: print("Result file requested, but no files found") pass return files
def get_template_files(root_folder, sensor_type, sensor_id, process_name, **kwargs): """ Get files that serve as templates for a particular process_type Parameters ---------- root_folder : str Top level data catalog folder for search sensor_type : str Type of sensor, e.g., 'E2V-CCD' sensor_id : str Name of the sensor, e.g., 'E2V-CCD250-104' process_name : str Name of the eTraveler process, e.g., 'fe55_acq' or 'dark_acq' Keyword arguments ----------- image_type : str or None Type of images to find test_type : str or None Type of test to find images for pattern : str Regular expression specifying which files to get / copy site : str Specifies data catalog database to access sort : bool Sort the file names before returning them test_version : str Version of the test process to search for Returns ---------- file_list : list List of file names for files that match the process_name and sensor_id """ pattern = kwargs.get('pattern', '*.fits') image_type = kwargs.get('image_type', None) test_type = kwargs.get('test_type', None) try: folder = os.environ['LCATR_DATACATALOG_FOLDER'] except KeyError: folder = make_datacat_path(root_folder=root_folder, sensor_type=sensor_type, sensor_id=sensor_id) datacat = DataCatalog(folder=folder, site=kwargs.get('site', 'slac.lca.archive')) query = '&&'.join(('LSST_NUM=="%s"' % sensor_id, 'ProcessName=="%s"' % process_name)) if image_type is not None: query += '&& IMGTYPE == "%s"' % image_type if test_type is not None: query += '&& TESTTYPE == "%s"' % test_type file_list = [] try: datasets = datacat.find_datasets(query) except DcClientException as eobj: # Make the error message a bit more useful for debbuging msg = eobj.message + (":\nFolder = %s\n" % folder) msg += "Query = %s\n" % query sys.stderr.write(msg) return file_list #raise DcClientException(msg) for item in datasets.full_paths(): if fnmatch.fnmatch(os.path.basename(item), pattern): file_list.append(item) file_list = sort_unique(file_list) if kwargs.get('sort', False): file_list = sorted(file_list) return file_list