def check_inputs(self, ekey): """ Check which input files/lists do not exist Parameters ---------- ekey : str The section of the WCL to look in. Returns ------- list The input files that were found. """ self.start_exec_task('check_inputs') existfiles = {} ins, _ = intgmisc.get_fullnames(self.inputwcl, self.inputwcl, ekey) for sect in ins: exists, missing = intgmisc.check_files(ins[sect]) existfiles[sect] = exists if missing: for mfile in missing: miscutils.fwdebug_print( "ERROR: input '%s' does not exist." % mfile, WRAPPER_OUTPUT_PREFIX) os.system("pwd") os.system("find . -type f") sys.exit(3) #raise IOError("At least one input file not found.") # if missing inputs, just abort self.end_exec_task(0) return existfiles
def get_filename_id_map(self, prov): """ Return a mapping of filename to desfile id """ if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print(f"prov = {prov}") allfiles = set() if provdefs.PROV_USED in prov: for filenames in prov[provdefs.PROV_USED].values(): for fname in filenames.split(provdefs.PROV_DELIM): allfiles.add(fname.strip()) if provdefs.PROV_WDF in prov: for tuples in prov[provdefs.PROV_WDF].values(): for filenames in tuples.values(): for fname in filenames.split(provdefs.PROV_DELIM): allfiles.add(fname.strip()) result = [] if allfiles: # build a map between filenames (with compression extension) and desfile ID gtt_name = self.load_filename_gtt(allfiles) sqlstr = f"""SELECT f.filename || f.compression, d.ID FROM DESFILE d, {gtt_name} f WHERE d.filename=f.filename and nullcmp(d.compression, f.compression) = 1""" cursor = self.cursor() cursor.execute(sqlstr) result = cursor.fetchall() cursor.close() return dict(result) return result
def insert_dictionary_db(self, query, dictionary): """ Execute a query and return a cursor to a query :param query: string with query statement :param dictionary: dictionary to use in query """ try: cur = self.dbh.cursor() cur.execute(query, dictionary) if miscutils.fwdebug_check(6, 'FTMGMT_DEBUG'): miscutils.fwdebug_print( f"dictionary into database {dictionary}") success = 1 #except cx_Oracle.IntegrityError as e: except cx_Oracle.DatabaseError as exc: error, = exc.args if error.code == 955: print('Table already exists') elif error.code == 1031: print('Insufficient privileges') print(error.code) print(error.message) print(error.context) success = 0 raise return success
def set(self, key, val): """ Sets value of key in wcl, follows section notation Parameters ---------- key : str The key to set the value for. val : str The value to set. """ if miscutils.fwdebug_check(9, "WCL_DEBUG"): miscutils.fwdebug_print("BEG key=%s, val=%s" % (key, val)) subkeys = key.split('.') valkey = subkeys.pop() wcldict = self for k in subkeys: wcldict = OrderedDict.__getitem__(wcldict, k) OrderedDict.__setitem__(wcldict, valkey, val) if miscutils.fwdebug_check(9, "WCL_DEBUG"): miscutils.fwdebug_print("END")
def check_inputs(self, ekey): """ Check which input files/lists do not exist """ self.start_exec_task('check_inputs') existfiles = {} ins, _ = intgmisc.get_fullnames(self.inputwcl, self.inputwcl, ekey, get_outputs=False) for sect in ins: exists, missing = intgmisc.check_files(ins[sect]) existfiles[sect] = exists if missing: for mfile in missing: miscutils.fwdebug_print( f"ERROR: input '{mfile}' does not exist.", WRAPPER_OUTPUT_PREFIX) os.system("pwd") os.system("find . -type f") sys.exit(3) #raise IOError("At least one input file not found.") # if missing inputs, just abort self.end_exec_task(0) return existfiles
def job2home(self, filelist, verify=False): """ Transfer files from the job to the home archive Parameters ---------- filelist : dict Dictionary containing the file names and path information Returns ------- dict The results """ if miscutils.fwdebug_check(3, "JOBFILEMVMT_DEBUG"): miscutils.fwdebug_print("len(filelist)=%s" % len(filelist)) if miscutils.fwdebug_check(6, "JOBFILEMVMT_DEBUG"): miscutils.fwdebug_print("filelist=%s" % filelist) # if staging outside job, this function shouldn't be called if self.home is None: raise Exception( "Home archive info is None. Should not be calling this function" ) absfilelist = copy.deepcopy(filelist) for finfo in absfilelist.values(): finfo['dst'] = self.home['root'] + '/' + finfo['dst'] if self.tstats is not None: self.tstats.stat_beg_batch( 'job2home', 'job_scratch', self.home['name'], self.__module__ + '.' + self.__class__.__name__) (status, results) = disk_utils_local.copyfiles(absfilelist, self.tstats, verify) if self.tstats is not None: self.tstats.stat_end_batch(status) return results
def make_where_clause(dbh, key, value): """ return properly formatted string for a where clause """ if miscutils.fwdebug_check(1, 'PFWFILELIST_DEBUG'): miscutils.fwdebug_print("key = %s" % (key)) miscutils.fwdebug_print("value = %s" % str(value)) if ',' in value: value = value.replace(' ', '').split(',') condition = "" if type(value) is list: # multiple values extra = [] ins = [] nots = [] for val in value: if '%' in val: extra.append(make_where_clause(dbh, key, val)) elif '!' in val: nots.append(make_where_clause(dbh, key, val)) else: ins.append(dbh.quote(val)) if len(ins) > 0: condition += "%s IN (%s)" % (key, ','.join(ins)) if len(extra) > 0: condition += ' OR ' if len(extra) > 0: condition += ' OR '.join(extra) if ' OR ' in condition: condition = '(%s)' % condition if len(nots) > 0: condition += ' AND '.join(nots) elif '*' in value or '^' in value or '$' in value or \ '[' in value or ']' in value or '&' in value: condition = dbh.get_regexp_clause(key, value) elif '%' in value and '!' not in value: condition = '%s like %s' % (key, dbh.quote(value)) if '\\' in value: condition += " ESCAPE '\\'" elif '%' in value and '!' in value: condition = '%s not like %s' % (key, dbh.quote(value)) if '\\' in value: condition += " ESCAPE '\\'" elif '!' in value: if value.lower() == 'null': condition = "%s is not NULL" % key else: condition = '%s != %s' % (key, dbh.quote(value)) else: if value.lower() == 'null': condition = "%s is NULL" % key else: condition = "%s = %s" % (key, dbh.quote(value)) return condition
def update_job_info(self, wcl, jobnum, jobinfo): """Update job information. Updates row in pfw_job with information gathered post job from condor log. """ if miscutils.fwdebug_check(1, 'PFWDB_DEBUG'): miscutils.fwdebug_print("Updating job information post job (%s)" % jobnum) if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print("jobinfo=%s" % jobinfo) wherevals = {} wherevals['task_id'] = wcl['task_id']['job'][jobnum] if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print("wherevals = %s" % (wherevals)) if len(jobinfo) > 0: self.update_PFW_row('PFW_JOB', jobinfo, wherevals) else: if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print("Found 0 values to update (%s)" % (wherevals)) if miscutils.fwdebug_check(6, 'PFWDB_DEBUG'): miscutils.fwdebug_print("\tjobnum = %s, jobinfo = %s" % (jobnum, jobinfo))
def insert_data_query(self, wcl, modname, datatype, dataname, execname, cmdargs, version): """Insert row into pfw_data_query table. """ if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print("BEG") parent_tid = wcl['task_id']['begblock'] row = {} row['pfw_attempt_id'] = wcl['pfw_attempt_id'] row['pfw_block_task_id'] = wcl['task_id']['block'][wcl['blknum']] row['modname'] = modname row['datatype'] = datatype # file, list row['dataname'] = dataname row['task_id'] = self.create_task(name='dataquery', info_table='PFW_DATA_QUERY', parent_task_id=parent_tid, root_task_id=int( wcl['task_id']['attempt']), label=None, do_begin=True, do_commit=True) row['execname'] = os.path.basename(execname) row['cmdargs'] = cmdargs row['version'] = version self.insert_PFW_row('PFW_DATA_QUERY', row) if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print("END") return row['task_id']
def insert_attempt_val(self, config): """Insert key/val pairs into the pfw_attempt_val table. Insert key/val pairs of information about an attempt into the pfw_attempt_val table. """ if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print("Inserting into pfw_attempt_val table\n") row = {} row['pfw_attempt_id'] = config['pfw_attempt_id'] if pfwdefs.SW_SAVE_RUN_VALS in config: keys2save = config.getfull(pfwdefs.SW_SAVE_RUN_VALS) keys = miscutils.fwsplit(keys2save, ',') for key in keys: row['key'] = key val = config.getfull(key) if isinstance(val, list): for v in val: row['val'] = v self.insert_PFW_row('PFW_ATTEMPT_VAL', row) else: row['val'] = val self.insert_PFW_row('PFW_ATTEMPT_VAL', row)
def insert_job(self, wcl, jobdict): """Insert an entry into the pfw_job table. """ if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print("Inserting to pfw_job table\n") blknum = wcl[pfwdefs.PF_BLKNUM] blktid = int(wcl['task_id']['block'][blknum]) row = {} row['pfw_attempt_id'] = wcl['pfw_attempt_id'] row['pfw_block_task_id'] = blktid row['jobnum'] = int(jobdict['jobnum']) row['expect_num_wrap'] = jobdict['numexpwrap'] row['pipeprod'] = wcl['pipeprod'] row['pipever'] = wcl['pipever'] row['task_id'] = self.create_task( name='job', info_table='pfw_job', parent_task_id=wcl['task_id']['block'][blknum], root_task_id=int(wcl['task_id']['attempt']), label=None, do_commit=False) wcl['task_id']['job'][jobdict['jobnum']] = row['task_id'] if 'jobkeys' in jobdict: row['jobkeys'] = jobdict['jobkeys'] self.insert_PFW_row('PFW_JOB', row)
def stat_beg_batch(self, transfer_name, src, dst, transclass=None): """ Starting a batch transfer between src and dst (archive or job scratch) """ if miscutils.fwdebug_check(3, 'TRANSFERSTATS_DEBUG'): miscutils.fwdebug_print(f"beg {transfer_name} {src} {dst} {transclass}") self.currvals['transfer_name'] = transfer_name self.currvals['src'] = src self.currvals['dst'] = dst self.currvals['batch_task_id'] = self.create_task(name=transfer_name, info_table='transfer_batch', parent_task_id=self.parent_task_id, root_task_id=self.root_task_id, label=None, do_begin=True, do_commit=False) row = {'src': src, 'dst': dst, 'transfer_class': transclass, 'parent_task_id': self.parent_task_id, 'task_id': self.currvals['batch_task_id']} self.basic_insert_row('transfer_batch', row) self.commit() if miscutils.fwdebug_check(3, 'TRANSFERSTATS_DEBUG'): miscutils.fwdebug_print("end") return self.currvals['batch_task_id']
def stat_beg_file(self, filename): """ Insert a row into a file transfer stats table (and task table) and commit """ self.currvals['numfiles'] += 1 self.currvals['file_task_id'] = -1 if self.transfer_stats_per_file: if miscutils.fwdebug_check(3, 'TRANSFERSTATS_DEBUG'): miscutils.fwdebug_print(f"beg - {filename}") if self.currvals['batch_task_id'] is None: raise Exception('Cannot call this function without prior calling stat_beg_batch') row = {'filename': filename} row['task_id'] = self.create_task(name='transfer_file', info_table='transfer_file', parent_task_id=self.currvals['batch_task_id'], root_task_id=self.root_task_id, label=None, do_begin=True, do_commit=False) row['batch_task_id'] = self.currvals['batch_task_id'] self.basic_insert_row('transfer_file', row) self.commit() self.currvals['file_task_id'] = row['task_id'] if miscutils.fwdebug_check(3, 'TRANSFERSTATS_DEBUG'): miscutils.fwdebug_print(f"end - file_task_id = {self.currvals['file_task_id']}") return self.currvals['file_task_id']
def write_outputwcl(self, outfilename=None): """ Write output wcl to file Parameters ---------- outfilename : str, optional The anem of the output wcl file to write. Default is ``None`` which indicates that the file name is stored in the inputwcl. """ if outfilename is None: outfilename = self.inputwcl['wrapper']['outputwcl'] if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("outfilename = %s" % outfilename, WRAPPER_OUTPUT_PREFIX) # create output wcl directory if needed outwcldir = miscutils.parse_fullname(outfilename, miscutils.CU_PARSE_PATH) if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'): miscutils.fwdebug_print("outwcldir = %s" % outwcldir, WRAPPER_OUTPUT_PREFIX) miscutils.coremakedirs(outwcldir) with open(outfilename, 'w') as wclfh: self.outputwcl.write(wclfh, True)
def isLoaded(self): """ Determine if the data have already been loaded into the database, based on file name Returns ------- bool """ loaded = False numDbObjects = self.numAlreadyIngested() numCatObjects = self.getNumObjects() if numDbObjects > 0: loaded = True if numDbObjects == numCatObjects: self.info("INFO: file " + self.fullfilename + " already ingested with the same number of" + " objects. Skipping.") else: # pragma: no cover miscutils.fwdebug_print( "ERROR: file " + self.fullfilename + " already ingested, but the number of objects is" + " DIFFERENT: catalog=" + str(numCatObjects) + "; DB=" + str(numDbObjects) + ".") return loaded
def insert_PFW_row(self, pfwtable, row): """Insert a row into a PFW table and commit. """ self.basic_insert_row(pfwtable, row) self.commit() if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print("end")
def home2job(self, filelist): """ From inside job, pull files from home archive to job scratch directory Parameters ---------- filelist : dict Dictionary containing the file names and path information Returns ------- dict of the results """ if miscutils.fwdebug_check(3, "JOBFILEMVMT_DEBUG"): miscutils.fwdebug_print("len(filelist)=%s" % len(filelist)) if miscutils.fwdebug_check(6, "JOBFILEMVMT_DEBUG"): miscutils.fwdebug_print("filelist=%s" % filelist) # if staging outside job, this function shouldn't be called if self.home is None: raise Exception("Home archive info is None. Should not be calling this function") absfilelist = copy.deepcopy(filelist) for finfo in absfilelist.values(): finfo['src'] = self.home['root_http'] + '/' + finfo['src'] if self.tstats is not None: self.tstats.stat_beg_batch('home2job', self.home['name'], 'job_scratch', self.__module__ + '.' + self.__class__.__name__) (status, results) = self.HU.copyfiles(absfilelist, self.tstats) if self.tstats is not None: self.tstats.stat_end_batch(status) return results
def ingest_contents(self, listfullnames, **kwargs): """ Ingest certain content into a non-metadata table """ assert isinstance(listfullnames, list) for fname in listfullnames: miscutils.fwdebug_print("********************* %s" % fname) numrows = dfiutils.datafile_ingest_main(self.dbh, self.filetype, fname, self.tablename, self.didatadefs) if numrows in [None, 0]: miscutils.fwdebug_print( f"WARN: 0 rows ingested from {fname} for table {self.tablename}" ) elif miscutils.fwdebug_check(1, 'FTMGMT_DEBUG'): miscutils.fwdebug_print( f"INFO: {numrows} rows ingested from {fname} for table {self.tablename}" ) numrows = dfiutils.datafile_ingest_main(self.dbh, self.filetype2, fname, self.tablename2, self.didatadefs2) if numrows in [None, 0]: miscutils.fwdebug_print( f"WARN: 0 rows ingested from {fname} for table {self.tablename2}" ) elif miscutils.fwdebug_check(1, 'FTMGMT_DEBUG'): miscutils.fwdebug_print( f"INFO: {numrows} rows ingested from {fname} for table {self.tablename2}" )
def target2job(self, filelist): """ Transfer files from the target archive Parameters ---------- filelist : dict Dictionary containing the file names and path information Returns ------- dict The results """ if miscutils.fwdebug_check(3, "JOBFILEMVMT_DEBUG"): miscutils.fwdebug_print("len(filelist)=%s" % len(filelist)) if miscutils.fwdebug_check(6, "JOBFILEMVMT_DEBUG"): miscutils.fwdebug_print("filelist=%s" % filelist) if self.target is None: raise Exception( "Target archive info is None. Should not be calling this function" ) absfilelist = copy.deepcopy(filelist) for finfo in absfilelist.values(): finfo['src'] = self.target['root'] + '/' + finfo['src'] if self.tstats is not None: self.tstats.stat_beg_batch( 'target2job', self.target['name'], 'job_scratch', self.__module__ + '.' + self.__class__.__name__) (status, results) = disk_utils_local.copyfiles(absfilelist, self.tstats) if self.tstats is not None: self.tstats.stat_end_batch(status) return results
def generateRows(self): """ Method to convert the input data into a list of lists """ try: types = [] # create a list of objects used to cast the data for item in self.dbDict[self.hdu].values(): if item.dtype.upper() == "INT": types.append(int) elif item.dtype.upper() == "FLOAT": types.append(float) else: types.append(str) self.parseCSV(self.fullfilename, types) self.orderedColumns = list(self.dbDict[self.hdu]) if self.checkcount and len(self.idDict) != len(self.sqldata): self.status = 1 miscutils.fwdebug_print(f"Incorrect number of rows in {self.shortfilename}. Count is {len(self.sqldata):d}, should be {len(self.idDict):d}") return 1 return 0 except: # pragma: no cover se = sys.exc_info() e = se[1] tb = se[2] print("Exception raised:", e) print("Traceback: ") traceback.print_tb(tb) print(" ") self.status = 1 return 1
def get_grid_info(self): """Create dictionary of grid job submission options. """ vals = {} for key in [ 'stdout', 'stderr', 'queue', 'psn', 'job_type', 'max_wall_time', 'max_time', 'max_cpu_time', 'max_memory', 'min_memory', 'count', 'host_count', 'host_types', 'host_xcount', 'xcount', 'reservation_id', 'grid_resource', 'grid_type', 'grid_host', 'grid_port', 'batch_type', 'globus_extra', 'environment', 'dynslots' ]: newkey = key.replace('_', '') (exists, value) = self.search(key) if exists: vals[newkey] = value else: (exists, value) = self.search(newkey) if exists: vals[newkey] = value elif miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'): miscutils.fwdebug_print("Could not find value for %s(%s)" % (key, newkey)) return vals
def get_file_fullnames(sect, filewcl, fullwcl): """ Get the full name of the files in the specified section. Parameters ---------- sect : str The WCL section to use filewcl : WCL The WCl to use fullwcl : WCL The full WCL, used to generate the full names Returns ------- set The full file names """ sectkeys = sect.split('.') sectname = sectkeys[1] if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'): miscutils.fwdebug_print("INFO: Beg sectname=%s" % sectname) fnames = [] if sectname in filewcl: filesect = filewcl[sectname] if 'fullname' in filesect: fnames = replfuncs.replace_vars(filesect['fullname'], fullwcl)[0] fnames = miscutils.fwsplit(fnames, ',') if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'): miscutils.fwdebug_print("INFO: fullname = %s" % fnames) return set(fnames)
def blocking_transfer(self, filelist): """ Do a blocking transfer Parameters ---------- filelist : dict Dictionary of the files to transfer Returns ------- tuple Transfer results """ miscutils.fwdebug_print("\tNumber files to transfer: %d" % len(filelist)) if miscutils.fwdebug_check(1, "ARCHIVETRANSFER_DEBUG"): miscutils.fwdebug_print("\tfilelist: %s" % filelist) srcroot = self.src_archive_info['root'] dstroot = self.dst_archive_info['root'] files2copy = copy.deepcopy(filelist) for _, finfo in files2copy.items(): finfo['src'] = '%s/%s' % (srcroot, finfo['src']) finfo['dst'] = '%s/%s' % (dstroot, finfo['dst']) transresults = disk_utils_local.copyfiles(files2copy, None) return transresults
def get_exec_sections(wcl, prefix): """ Returns exec sections appearing in given wcl Parameters ---------- wcl : WCL The WCL object to look in. prefix : str The exec prefix to look for. Returns ------- dict Dictionary of the found exec section names and their contents. """ execs = {} for key, val in wcl.items(): if miscutils.fwdebug_check(3, "DEBUG"): miscutils.fwdebug_print("\tsearching for exec prefix in %s" % key) if re.search(r"^%s\d+$" % prefix, key): if miscutils.fwdebug_check(4, "DEBUG"): miscutils.fwdebug_print("\tFound exec prefex %s" % key) execs[key] = val return execs
def basic_register_file_data(self, ftype, fullnames, pfw_attempt_id, wgb_task_id, do_update, update_info=None, filepat=None): """ Save artifact, metadata, wgb provenance, and simple contents for given files """ self.dynam_load_ftmgmt(ftype, filepat) results = {} for fname in fullnames: metadata = {} fileinfo = {} try: metadata = self.ftmgmt.perform_metadata_tasks(fname, do_update, update_info) if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'): miscutils.fwdebug_print("INFO: metadata to ingest" + metadata) fileinfo = diskutils.get_single_file_disk_info(fname, save_md5sum=True, archive_root=None) fileinfo['filetype'] = ftype fileinfo['wgb_task_id'] = int(wgb_task_id) if pfw_attempt_id is None: fileinfo['pfw_attempt_id'] = None else: fileinfo['pfw_attempt_id'] = int(pfw_attempt_id) del fileinfo['path'] results[fname] = {'diskinfo': fileinfo, 'metadata': metadata} except IOError: miscutils.fwdebug_print(f"\n\nError: Problem gathering data for file {fname}") traceback.print_exc(1, sys.stdout) results[fname] = None continue return results
def get_single_file_disk_info(fname, save_md5sum=False, archive_root=None): """ Method to get disk info for a single file """ if miscutils.fwdebug_check(3, "DISK_UTILS_LOCAL_DEBUG"): miscutils.fwdebug_print(f"fname={fname}, save_md5sum={save_md5sum}, archive_root={archive_root}") parsemask = miscutils.CU_PARSE_PATH | miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_COMPRESSION (path, filename, compress) = miscutils.parse_fullname(fname, parsemask) if miscutils.fwdebug_check(3, "DISK_UTILS_LOCAL_DEBUG"): miscutils.fwdebug_print(f"path={path}, filename={filename}, compress={compress}") fdict = {'filename' : filename, 'compression': compress, 'path': path, 'filesize': os.path.getsize(fname) } if save_md5sum: fdict['md5sum'] = get_md5sum_file(fname) if archive_root and path.startswith('/'): fdict['relpath'] = path[len(archive_root)+1:] if compress is None: compext = "" else: compext = compress fdict['rel_filename'] = f"{fdict['relpath']}/{filename}{compext}" return fdict
def _get_file_header_key_info(self, key): """ From definitions of file header keys, return comment and fits data type Parameters ---------- key : str The key to look for in the header Returns ------- tuple of the description and data type """ file_header_info = self.config['file_header'] ucomment = None udatatype = None if key in file_header_info: if 'description' in file_header_info[key]: ucomment = file_header_info[key]['description'] else: miscutils.fwdebug_print("WARN: could not find description for key=%s" % (key)) if 'fits_data_type' in file_header_info[key]: udatatype = file_header_info[key]['fits_data_type'] else: miscutils.fwdebug_print("WARN: could not find fits_data_type for key=%s" % (key)) return ucomment, udatatype
def list_missing_contents(filemgmt, ftype, filelist): """ Return list of files from given set which still need contents ingested """ # filelist = list of file dicts if miscutils.fwdebug_check(6, "REGISTER_FILES_DEBUG"): miscutils.fwdebug_print(f"filelist={filelist}") print("\tChecking which files still need contents ingested", flush=True) starttime = time.time() results = filemgmt.has_contents_ingested(ftype, filelist) endtime = time.time() print(f"({endtime - starttime:0.2f} secs)", flush=True) # no metadata if results[name] == False misslist = [fname for fname in results if not results[fname]] print( f"\t\t{len(filelist) - len(misslist):0d} file(s) already have content ingested", flush=True) print(f"\t\t{len(misslist):0d} file(s) still to have content ingested", flush=True) if miscutils.fwdebug_check(6, "REGISTER_FILES_DEBUG"): miscutils.fwdebug_print(f"misslist={misslist}") return misslist
def _gather_metadata_file(self, fullname, **kwargs): """ Gather metadata for a single file """ if miscutils.fwdebug_check(3, 'FTMGMT_DEBUG'): miscutils.fwdebug_print(f"INFO: beg file={fullname}") metadata = FtMgmtGeneric._gather_metadata_file(self, fullname, **kwargs) # need nite for the archive path with open(fullname, 'r') as jsonfh: line = jsonfh.readline() linedata = json.loads(line) expcnt = 0 while expcnt < len(linedata['exposures']) and \ 'date' not in linedata['exposures'][expcnt]: expcnt += 1 if expcnt >= len(linedata['exposures']): raise KeyError( 'Could not find date value for any exposure in manifest') datestr = linedata['exposures'][expcnt]['date'] metadata['nite'] = misctime.convert_utc_str_to_nite(datestr) if miscutils.fwdebug_check(3, 'FTMGMT_DEBUG'): miscutils.fwdebug_print("INFO: end") return metadata
def get_job_info(self, wherevals): """ Get job information """ whclause = [] for c in wherevals.keys(): whclause.append(f"{c}={self.get_named_bind_string(c)}") sql = f"select j.jobkeys as jobkeys,j.jobnum as jobnum, j.expect_num_wrap as expect_num_wrap, j.task_id as task_id, j.pfw_block_task_id as pfw_block_task_id, t.status as status, t.start_time as start_time, t.end_time as end_time from pfw_job j, task t where t.id=j.task_id and {' and '.join(whclause)}" #sql = "select j.*,t.* from pfw_job j, task t where t.id=j.task_id and %s" % (' and '.join(whclause)) if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print(f"sql> {sql}") if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'): miscutils.fwdebug_print(f"params> {wherevals}") curs = self.cursor() curs.execute(sql, wherevals) desc = [d[0].lower() for d in curs.description] jobinfo = {} get_messages = [] for line in curs: d = dict(zip(desc, line)) d['message'] = [] if d['status'] != pfwdefs.PF_EXIT_SUCCESS: get_messages.append(d['task_id']) jobinfo[d['task_id']] = d if not get_messages: qdbh = qcfdb.QCFDB(connection=self) qcmsg = qdbh.get_all_qcf_messages_by_task_id(get_messages, level=3) for tid, val in qcmsg.items(): jobinfo[tid]['message'] = val return jobinfo