def check_inputs(self, ekey):
        """ Check which input files/lists do not exist

            Parameters
            ----------
            ekey : str
                The section of the WCL to look in.

            Returns
            -------
            list
                The input files that were found.
        """

        self.start_exec_task('check_inputs')

        existfiles = {}

        ins, _ = intgmisc.get_fullnames(self.inputwcl, self.inputwcl, ekey)
        for sect in ins:
            exists, missing = intgmisc.check_files(ins[sect])
            existfiles[sect] = exists

            if missing:
                for mfile in missing:
                    miscutils.fwdebug_print(
                        "ERROR: input '%s' does not exist." % mfile,
                        WRAPPER_OUTPUT_PREFIX)
                os.system("pwd")
                os.system("find . -type f")
                sys.exit(3)
                #raise IOError("At least one input file not found.")    # if missing inputs, just abort

        self.end_exec_task(0)
        return existfiles
    def get_filename_id_map(self, prov):
        """ Return a mapping of filename to desfile id """

        if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'):
            miscutils.fwdebug_print(f"prov = {prov}")

        allfiles = set()
        if provdefs.PROV_USED in prov:
            for filenames in prov[provdefs.PROV_USED].values():
                for fname in filenames.split(provdefs.PROV_DELIM):
                    allfiles.add(fname.strip())
        if provdefs.PROV_WDF in prov:
            for tuples in prov[provdefs.PROV_WDF].values():
                for filenames in tuples.values():
                    for fname in filenames.split(provdefs.PROV_DELIM):
                        allfiles.add(fname.strip())

        result = []
        if allfiles:
            # build a map between filenames (with compression extension) and desfile ID
            gtt_name = self.load_filename_gtt(allfiles)
            sqlstr = f"""SELECT f.filename || f.compression, d.ID
                FROM DESFILE d, {gtt_name} f
                WHERE d.filename=f.filename and
                      nullcmp(d.compression, f.compression) = 1"""
            cursor = self.cursor()
            cursor.execute(sqlstr)
            result = cursor.fetchall()
            cursor.close()

            return dict(result)
        return result
    def insert_dictionary_db(self, query, dictionary):
        """ Execute a query and return a cursor to a query
            :param query: string with query statement
            :param dictionary: dictionary to use in query

        """

        try:
            cur = self.dbh.cursor()
            cur.execute(query, dictionary)
            if miscutils.fwdebug_check(6, 'FTMGMT_DEBUG'):
                miscutils.fwdebug_print(
                    f"dictionary into database {dictionary}")
            success = 1
        #except cx_Oracle.IntegrityError as e:
        except cx_Oracle.DatabaseError as exc:
            error, = exc.args
            if error.code == 955:
                print('Table already exists')
            elif error.code == 1031:
                print('Insufficient privileges')
            print(error.code)
            print(error.message)
            print(error.context)
            success = 0
            raise
        return success
Exemple #4
0
    def set(self, key, val):
        """ Sets value of key in wcl, follows section notation

            Parameters
            ----------
            key : str
                The key to set the value for.

            val : str
                The value to set.
        """

        if miscutils.fwdebug_check(9, "WCL_DEBUG"):
            miscutils.fwdebug_print("BEG key=%s, val=%s" % (key, val))

        subkeys = key.split('.')
        valkey = subkeys.pop()
        wcldict = self
        for k in subkeys:
            wcldict = OrderedDict.__getitem__(wcldict, k)

        OrderedDict.__setitem__(wcldict, valkey, val)

        if miscutils.fwdebug_check(9, "WCL_DEBUG"):
            miscutils.fwdebug_print("END")
    def check_inputs(self, ekey):
        """ Check which input files/lists do not exist """

        self.start_exec_task('check_inputs')

        existfiles = {}
        ins, _ = intgmisc.get_fullnames(self.inputwcl,
                                        self.inputwcl,
                                        ekey,
                                        get_outputs=False)
        for sect in ins:
            exists, missing = intgmisc.check_files(ins[sect])
            existfiles[sect] = exists

            if missing:
                for mfile in missing:
                    miscutils.fwdebug_print(
                        f"ERROR: input '{mfile}' does not exist.",
                        WRAPPER_OUTPUT_PREFIX)
                os.system("pwd")
                os.system("find . -type f")
                sys.exit(3)
                #raise IOError("At least one input file not found.")    # if missing inputs, just abort

        self.end_exec_task(0)
        return existfiles
Exemple #6
0
    def job2home(self, filelist, verify=False):
        """ Transfer files from the job to the home archive

            Parameters
            ----------
            filelist : dict
                Dictionary containing the file names and path information

            Returns
            -------
            dict
                The results
        """
        if miscutils.fwdebug_check(3, "JOBFILEMVMT_DEBUG"):
            miscutils.fwdebug_print("len(filelist)=%s" % len(filelist))
        if miscutils.fwdebug_check(6, "JOBFILEMVMT_DEBUG"):
            miscutils.fwdebug_print("filelist=%s" % filelist)
        # if staging outside job, this function shouldn't be called
        if self.home is None:
            raise Exception(
                "Home archive info is None.   Should not be calling this function"
            )
        absfilelist = copy.deepcopy(filelist)
        for finfo in absfilelist.values():
            finfo['dst'] = self.home['root'] + '/' + finfo['dst']
        if self.tstats is not None:
            self.tstats.stat_beg_batch(
                'job2home', 'job_scratch', self.home['name'],
                self.__module__ + '.' + self.__class__.__name__)
        (status, results) = disk_utils_local.copyfiles(absfilelist,
                                                       self.tstats, verify)
        if self.tstats is not None:
            self.tstats.stat_end_batch(status)
        return results
Exemple #7
0
def make_where_clause(dbh, key, value):
    """ return properly formatted string for a where clause """

    if miscutils.fwdebug_check(1, 'PFWFILELIST_DEBUG'):
        miscutils.fwdebug_print("key = %s" % (key))
        miscutils.fwdebug_print("value = %s" % str(value))

    if ',' in value:
        value = value.replace(' ', '').split(',')

    condition = ""
    if type(value) is list:  # multiple values
        extra = []
        ins = []
        nots = []
        for val in value:
            if '%' in val:
                extra.append(make_where_clause(dbh, key, val))
            elif '!' in val:
                nots.append(make_where_clause(dbh, key, val))
            else:
                ins.append(dbh.quote(val))

        if len(ins) > 0:
            condition += "%s IN (%s)" % (key, ','.join(ins))
            if len(extra) > 0:
                condition += ' OR '

        if len(extra) > 0:
            condition += ' OR '.join(extra)

        if ' OR ' in condition:
            condition = '(%s)' % condition

        if len(nots) > 0:
            condition += ' AND '.join(nots)

    elif '*' in value or '^' in value or '$' in value or \
         '[' in value or ']' in value or '&' in value:
        condition = dbh.get_regexp_clause(key, value)
    elif '%' in value and '!' not in value:
        condition = '%s like %s' % (key, dbh.quote(value))
        if '\\' in value:
            condition += " ESCAPE '\\'"
    elif '%' in value and '!' in value:
        condition = '%s not like %s' % (key, dbh.quote(value))
        if '\\' in value:
            condition += " ESCAPE '\\'"
    elif '!' in value:
        if value.lower() == 'null':
            condition = "%s is not NULL" % key
        else:
            condition = '%s != %s' % (key, dbh.quote(value))
    else:
        if value.lower() == 'null':
            condition = "%s is NULL" % key
        else:
            condition = "%s = %s" % (key, dbh.quote(value))

    return condition
Exemple #8
0
    def update_job_info(self, wcl, jobnum, jobinfo):
        """Update job information.

        Updates row in pfw_job with information gathered post job from condor
        log.
        """
        if miscutils.fwdebug_check(1, 'PFWDB_DEBUG'):
            miscutils.fwdebug_print("Updating job information post job (%s)" %
                                    jobnum)
        if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
            miscutils.fwdebug_print("jobinfo=%s" % jobinfo)

        wherevals = {}
        wherevals['task_id'] = wcl['task_id']['job'][jobnum]
        if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
            miscutils.fwdebug_print("wherevals = %s" % (wherevals))

        if len(jobinfo) > 0:
            self.update_PFW_row('PFW_JOB', jobinfo, wherevals)
        else:
            if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
                miscutils.fwdebug_print("Found 0 values to update (%s)" %
                                        (wherevals))
            if miscutils.fwdebug_check(6, 'PFWDB_DEBUG'):
                miscutils.fwdebug_print("\tjobnum = %s, jobinfo = %s" %
                                        (jobnum, jobinfo))
Exemple #9
0
    def insert_data_query(self, wcl, modname, datatype, dataname, execname,
                          cmdargs, version):
        """Insert row into pfw_data_query table.
        """
        if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
            miscutils.fwdebug_print("BEG")

        parent_tid = wcl['task_id']['begblock']

        row = {}
        row['pfw_attempt_id'] = wcl['pfw_attempt_id']
        row['pfw_block_task_id'] = wcl['task_id']['block'][wcl['blknum']]
        row['modname'] = modname
        row['datatype'] = datatype  # file, list
        row['dataname'] = dataname
        row['task_id'] = self.create_task(name='dataquery',
                                          info_table='PFW_DATA_QUERY',
                                          parent_task_id=parent_tid,
                                          root_task_id=int(
                                              wcl['task_id']['attempt']),
                                          label=None,
                                          do_begin=True,
                                          do_commit=True)
        row['execname'] = os.path.basename(execname)
        row['cmdargs'] = cmdargs
        row['version'] = version
        self.insert_PFW_row('PFW_DATA_QUERY', row)
        if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
            miscutils.fwdebug_print("END")
        return row['task_id']
Exemple #10
0
    def insert_attempt_val(self, config):
        """Insert key/val pairs into the pfw_attempt_val table.

        Insert key/val pairs of information about an attempt into the
        pfw_attempt_val table.
        """
        if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
            miscutils.fwdebug_print("Inserting into pfw_attempt_val table\n")

        row = {}
        row['pfw_attempt_id'] = config['pfw_attempt_id']

        if pfwdefs.SW_SAVE_RUN_VALS in config:
            keys2save = config.getfull(pfwdefs.SW_SAVE_RUN_VALS)
            keys = miscutils.fwsplit(keys2save, ',')
            for key in keys:
                row['key'] = key
                val = config.getfull(key)
                if isinstance(val, list):
                    for v in val:
                        row['val'] = v
                        self.insert_PFW_row('PFW_ATTEMPT_VAL', row)
                else:
                    row['val'] = val
                    self.insert_PFW_row('PFW_ATTEMPT_VAL', row)
Exemple #11
0
    def insert_job(self, wcl, jobdict):
        """Insert an entry into the pfw_job table.
        """
        if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
            miscutils.fwdebug_print("Inserting to pfw_job table\n")

        blknum = wcl[pfwdefs.PF_BLKNUM]
        blktid = int(wcl['task_id']['block'][blknum])

        row = {}
        row['pfw_attempt_id'] = wcl['pfw_attempt_id']
        row['pfw_block_task_id'] = blktid
        row['jobnum'] = int(jobdict['jobnum'])
        row['expect_num_wrap'] = jobdict['numexpwrap']
        row['pipeprod'] = wcl['pipeprod']
        row['pipever'] = wcl['pipever']
        row['task_id'] = self.create_task(
            name='job',
            info_table='pfw_job',
            parent_task_id=wcl['task_id']['block'][blknum],
            root_task_id=int(wcl['task_id']['attempt']),
            label=None,
            do_commit=False)
        wcl['task_id']['job'][jobdict['jobnum']] = row['task_id']

        if 'jobkeys' in jobdict:
            row['jobkeys'] = jobdict['jobkeys']
        self.insert_PFW_row('PFW_JOB', row)
    def stat_beg_batch(self, transfer_name, src, dst, transclass=None):
        """ Starting a batch transfer between src and dst (archive or job scratch) """

        if miscutils.fwdebug_check(3, 'TRANSFERSTATS_DEBUG'):
            miscutils.fwdebug_print(f"beg {transfer_name} {src} {dst} {transclass}")
        self.currvals['transfer_name'] = transfer_name
        self.currvals['src'] = src
        self.currvals['dst'] = dst
        self.currvals['batch_task_id'] = self.create_task(name=transfer_name,
                                                          info_table='transfer_batch',
                                                          parent_task_id=self.parent_task_id,
                                                          root_task_id=self.root_task_id,
                                                          label=None,
                                                          do_begin=True,
                                                          do_commit=False)

        row = {'src': src, 'dst': dst,
               'transfer_class': transclass,
               'parent_task_id': self.parent_task_id,
               'task_id': self.currvals['batch_task_id']}

        self.basic_insert_row('transfer_batch', row)
        self.commit()

        if miscutils.fwdebug_check(3, 'TRANSFERSTATS_DEBUG'):
            miscutils.fwdebug_print("end")
        return self.currvals['batch_task_id']
    def stat_beg_file(self, filename):
        """ Insert a row into a file transfer stats table (and task table) and commit """

        self.currvals['numfiles'] += 1
        self.currvals['file_task_id'] = -1

        if self.transfer_stats_per_file:
            if miscutils.fwdebug_check(3, 'TRANSFERSTATS_DEBUG'):
                miscutils.fwdebug_print(f"beg - {filename}")
            if self.currvals['batch_task_id'] is None:
                raise Exception('Cannot call this function without prior calling stat_beg_batch')

            row = {'filename': filename}
            row['task_id'] = self.create_task(name='transfer_file',
                                              info_table='transfer_file',
                                              parent_task_id=self.currvals['batch_task_id'],
                                              root_task_id=self.root_task_id,
                                              label=None,
                                              do_begin=True,
                                              do_commit=False)

            row['batch_task_id'] = self.currvals['batch_task_id']
            self.basic_insert_row('transfer_file', row)
            self.commit()

            self.currvals['file_task_id'] = row['task_id']
            if miscutils.fwdebug_check(3, 'TRANSFERSTATS_DEBUG'):
                miscutils.fwdebug_print(f"end - file_task_id = {self.currvals['file_task_id']}")
        return self.currvals['file_task_id']
    def write_outputwcl(self, outfilename=None):
        """ Write output wcl to file

            Parameters
            ----------
            outfilename : str, optional
                The anem of the output wcl file to write. Default is ``None``
                which indicates that the file name is stored in the inputwcl.
        """

        if outfilename is None:
            outfilename = self.inputwcl['wrapper']['outputwcl']

        if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'):
            miscutils.fwdebug_print("outfilename = %s" % outfilename,
                                    WRAPPER_OUTPUT_PREFIX)

        # create output wcl directory if needed
        outwcldir = miscutils.parse_fullname(outfilename,
                                             miscutils.CU_PARSE_PATH)
        if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'):
            miscutils.fwdebug_print("outwcldir = %s" % outwcldir,
                                    WRAPPER_OUTPUT_PREFIX)
        miscutils.coremakedirs(outwcldir)

        with open(outfilename, 'w') as wclfh:
            self.outputwcl.write(wclfh, True)
Exemple #15
0
    def isLoaded(self):
        """ Determine if the data have already been loaded into the database,
            based on file name

            Returns
            -------
            bool
        """
        loaded = False

        numDbObjects = self.numAlreadyIngested()
        numCatObjects = self.getNumObjects()
        if numDbObjects > 0:
            loaded = True
            if numDbObjects == numCatObjects:
                self.info("INFO: file " + self.fullfilename +
                          " already ingested with the same number of" +
                          " objects. Skipping.")
            else:  # pragma: no cover
                miscutils.fwdebug_print(
                    "ERROR: file " + self.fullfilename +
                    " already ingested, but the number of objects is" +
                    " DIFFERENT: catalog=" + str(numCatObjects) + "; DB=" +
                    str(numDbObjects) + ".")

        return loaded
Exemple #16
0
 def insert_PFW_row(self, pfwtable, row):
     """Insert a row into a PFW table and commit.
     """
     self.basic_insert_row(pfwtable, row)
     self.commit()
     if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
         miscutils.fwdebug_print("end")
    def home2job(self, filelist):
        """ From inside job, pull files from home archive to job scratch directory

            Parameters
            ----------
            filelist : dict
                Dictionary containing the file names and path information

            Returns
            -------
            dict of the results
        """
        if miscutils.fwdebug_check(3, "JOBFILEMVMT_DEBUG"):
            miscutils.fwdebug_print("len(filelist)=%s" % len(filelist))
        if miscutils.fwdebug_check(6, "JOBFILEMVMT_DEBUG"):
            miscutils.fwdebug_print("filelist=%s" % filelist)
        # if staging outside job, this function shouldn't be called
        if self.home is None:
            raise Exception("Home archive info is None.   Should not be calling this function")

        absfilelist = copy.deepcopy(filelist)
        for finfo in absfilelist.values():
            finfo['src'] = self.home['root_http'] + '/' + finfo['src']

        if self.tstats is not None:
            self.tstats.stat_beg_batch('home2job', self.home['name'], 'job_scratch',
                                       self.__module__ + '.' + self.__class__.__name__)
        (status, results) = self.HU.copyfiles(absfilelist, self.tstats)
        if self.tstats is not None:
            self.tstats.stat_end_batch(status)
        return results
Exemple #18
0
    def ingest_contents(self, listfullnames, **kwargs):
        """ Ingest certain content into a non-metadata table """

        assert isinstance(listfullnames, list)

        for fname in listfullnames:
            miscutils.fwdebug_print("********************* %s" % fname)
            numrows = dfiutils.datafile_ingest_main(self.dbh, self.filetype,
                                                    fname, self.tablename,
                                                    self.didatadefs)
            if numrows in [None, 0]:
                miscutils.fwdebug_print(
                    f"WARN: 0 rows ingested from {fname} for table {self.tablename}"
                )
            elif miscutils.fwdebug_check(1, 'FTMGMT_DEBUG'):
                miscutils.fwdebug_print(
                    f"INFO: {numrows} rows ingested from {fname} for table {self.tablename}"
                )

            numrows = dfiutils.datafile_ingest_main(self.dbh, self.filetype2,
                                                    fname, self.tablename2,
                                                    self.didatadefs2)
            if numrows in [None, 0]:
                miscutils.fwdebug_print(
                    f"WARN: 0 rows ingested from {fname} for table {self.tablename2}"
                )
            elif miscutils.fwdebug_check(1, 'FTMGMT_DEBUG'):
                miscutils.fwdebug_print(
                    f"INFO: {numrows} rows ingested from {fname} for table {self.tablename2}"
                )
Exemple #19
0
    def target2job(self, filelist):
        """ Transfer files from the target archive

            Parameters
            ----------
            filelist : dict
                Dictionary containing the file names and path information

            Returns
            -------
            dict
                The results
        """
        if miscutils.fwdebug_check(3, "JOBFILEMVMT_DEBUG"):
            miscutils.fwdebug_print("len(filelist)=%s" % len(filelist))
        if miscutils.fwdebug_check(6, "JOBFILEMVMT_DEBUG"):
            miscutils.fwdebug_print("filelist=%s" % filelist)
        if self.target is None:
            raise Exception(
                "Target archive info is None.   Should not be calling this function"
            )
        absfilelist = copy.deepcopy(filelist)
        for finfo in absfilelist.values():
            finfo['src'] = self.target['root'] + '/' + finfo['src']
        if self.tstats is not None:
            self.tstats.stat_beg_batch(
                'target2job', self.target['name'], 'job_scratch',
                self.__module__ + '.' + self.__class__.__name__)
        (status, results) = disk_utils_local.copyfiles(absfilelist,
                                                       self.tstats)
        if self.tstats is not None:
            self.tstats.stat_end_batch(status)
        return results
Exemple #20
0
    def generateRows(self):
        """ Method to convert the input data into a list of lists

        """
        try:
            types = []
            # create a list of objects used to cast the data
            for item in self.dbDict[self.hdu].values():
                if item.dtype.upper() == "INT":
                    types.append(int)
                elif item.dtype.upper() == "FLOAT":
                    types.append(float)
                else:
                    types.append(str)
            self.parseCSV(self.fullfilename, types)
            self.orderedColumns = list(self.dbDict[self.hdu])
            if self.checkcount and len(self.idDict) != len(self.sqldata):
                self.status = 1
                miscutils.fwdebug_print(f"Incorrect number of rows in {self.shortfilename}. Count is {len(self.sqldata):d}, should be {len(self.idDict):d}")
                return 1
            return 0
        except:  # pragma: no cover
            se = sys.exc_info()
            e = se[1]
            tb = se[2]
            print("Exception raised:", e)
            print("Traceback: ")
            traceback.print_tb(tb)
            print(" ")
            self.status = 1
            return 1
Exemple #21
0
    def get_grid_info(self):
        """Create dictionary of grid job submission options.
        """
        vals = {}
        for key in [
                'stdout', 'stderr', 'queue', 'psn', 'job_type',
                'max_wall_time', 'max_time', 'max_cpu_time', 'max_memory',
                'min_memory', 'count', 'host_count', 'host_types',
                'host_xcount', 'xcount', 'reservation_id', 'grid_resource',
                'grid_type', 'grid_host', 'grid_port', 'batch_type',
                'globus_extra', 'environment', 'dynslots'
        ]:
            newkey = key.replace('_', '')
            (exists, value) = self.search(key)
            if exists:
                vals[newkey] = value
            else:
                (exists, value) = self.search(newkey)
                if exists:
                    vals[newkey] = value
                elif miscutils.fwdebug_check(3, 'PFWCONFIG_DEBUG'):
                    miscutils.fwdebug_print("Could not find value for %s(%s)" %
                                            (key, newkey))

        return vals
def get_file_fullnames(sect, filewcl, fullwcl):
    """ Get the full name of the files in the specified section.

        Parameters
        ----------
        sect : str
            The WCL section to use

        filewcl : WCL
            The WCl to use

        fullwcl : WCL
            The full WCL, used to generate the full names

        Returns
        -------
        set
            The full file names
    """
    sectkeys = sect.split('.')
    sectname = sectkeys[1]

    if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'):
        miscutils.fwdebug_print("INFO: Beg sectname=%s" % sectname)

    fnames = []
    if sectname in filewcl:
        filesect = filewcl[sectname]
        if 'fullname' in filesect:
            fnames = replfuncs.replace_vars(filesect['fullname'], fullwcl)[0]
            fnames = miscutils.fwsplit(fnames, ',')
            if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'):
                miscutils.fwdebug_print("INFO: fullname = %s" % fnames)

    return set(fnames)
    def blocking_transfer(self, filelist):
        """ Do a blocking transfer

            Parameters
            ----------
            filelist : dict
                Dictionary of the files to transfer

            Returns
            -------
            tuple
                Transfer results
        """
        miscutils.fwdebug_print("\tNumber files to transfer: %d" %
                                len(filelist))
        if miscutils.fwdebug_check(1, "ARCHIVETRANSFER_DEBUG"):
            miscutils.fwdebug_print("\tfilelist: %s" % filelist)

        srcroot = self.src_archive_info['root']
        dstroot = self.dst_archive_info['root']

        files2copy = copy.deepcopy(filelist)
        for _, finfo in files2copy.items():
            finfo['src'] = '%s/%s' % (srcroot, finfo['src'])
            finfo['dst'] = '%s/%s' % (dstroot, finfo['dst'])

        transresults = disk_utils_local.copyfiles(files2copy, None)

        return transresults
def get_exec_sections(wcl, prefix):
    """ Returns exec sections appearing in given wcl

        Parameters
        ----------
        wcl : WCL
            The WCL object to look in.

        prefix : str
            The exec prefix to look for.

        Returns
        -------
        dict
            Dictionary of the found exec section names and their contents.
    """
    execs = {}
    for key, val in wcl.items():
        if miscutils.fwdebug_check(3, "DEBUG"):
            miscutils.fwdebug_print("\tsearching for exec prefix in %s" % key)

        if re.search(r"^%s\d+$" % prefix, key):
            if miscutils.fwdebug_check(4, "DEBUG"):
                miscutils.fwdebug_print("\tFound exec prefex %s" % key)
            execs[key] = val
    return execs
    def basic_register_file_data(self, ftype, fullnames, pfw_attempt_id, wgb_task_id,
                                 do_update, update_info=None, filepat=None):
        """ Save artifact, metadata, wgb provenance, and simple contents for given files """
        self.dynam_load_ftmgmt(ftype, filepat)

        results = {}

        for fname in fullnames:
            metadata = {}
            fileinfo = {}

            try:
                metadata = self.ftmgmt.perform_metadata_tasks(fname, do_update, update_info)
                if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'):
                    miscutils.fwdebug_print("INFO: metadata to ingest" + metadata)
                fileinfo = diskutils.get_single_file_disk_info(fname,
                                                               save_md5sum=True,
                                                               archive_root=None)
                fileinfo['filetype'] = ftype
                fileinfo['wgb_task_id'] = int(wgb_task_id)
                if pfw_attempt_id is None:
                    fileinfo['pfw_attempt_id'] = None
                else:
                    fileinfo['pfw_attempt_id'] = int(pfw_attempt_id)

                del fileinfo['path']
                results[fname] = {'diskinfo': fileinfo, 'metadata': metadata}

            except IOError:
                miscutils.fwdebug_print(f"\n\nError: Problem gathering data for file {fname}")
                traceback.print_exc(1, sys.stdout)
                results[fname] = None
                continue
        return results
Exemple #26
0
def get_single_file_disk_info(fname, save_md5sum=False, archive_root=None):
    """ Method to get disk info for a single file

    """
    if miscutils.fwdebug_check(3, "DISK_UTILS_LOCAL_DEBUG"):
        miscutils.fwdebug_print(f"fname={fname}, save_md5sum={save_md5sum}, archive_root={archive_root}")

    parsemask = miscutils.CU_PARSE_PATH | miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_COMPRESSION

    (path, filename, compress) = miscutils.parse_fullname(fname, parsemask)
    if miscutils.fwdebug_check(3, "DISK_UTILS_LOCAL_DEBUG"):
        miscutils.fwdebug_print(f"path={path}, filename={filename}, compress={compress}")

    fdict = {'filename' : filename,
             'compression': compress,
             'path': path,
             'filesize': os.path.getsize(fname)
             }

    if save_md5sum:
        fdict['md5sum'] = get_md5sum_file(fname)

    if archive_root and path.startswith('/'):
        fdict['relpath'] = path[len(archive_root)+1:]

        if compress is None:
            compext = ""
        else:
            compext = compress

        fdict['rel_filename'] = f"{fdict['relpath']}/{filename}{compext}"

    return fdict
    def _get_file_header_key_info(self, key):
        """ From definitions of file header keys, return comment and fits data type

            Parameters
            ----------
            key : str
                The key to look for in the header

            Returns
            -------
            tuple of the description and data type
        """

        file_header_info = self.config['file_header']
        ucomment = None
        udatatype = None
        if key in file_header_info:
            if 'description' in file_header_info[key]:
                ucomment = file_header_info[key]['description']
            else:
                miscutils.fwdebug_print("WARN: could not find description for key=%s" % (key))

            if 'fits_data_type' in file_header_info[key]:
                udatatype = file_header_info[key]['fits_data_type']
            else:
                miscutils.fwdebug_print("WARN: could not find fits_data_type for key=%s" % (key))
        return ucomment, udatatype
def list_missing_contents(filemgmt, ftype, filelist):
    """ Return list of files from given set which still need contents ingested """
    # filelist = list of file dicts

    if miscutils.fwdebug_check(6, "REGISTER_FILES_DEBUG"):
        miscutils.fwdebug_print(f"filelist={filelist}")

    print("\tChecking which files still need contents ingested", flush=True)
    starttime = time.time()
    results = filemgmt.has_contents_ingested(ftype, filelist)
    endtime = time.time()
    print(f"({endtime - starttime:0.2f} secs)", flush=True)

    # no metadata if results[name] == False
    misslist = [fname for fname in results if not results[fname]]

    print(
        f"\t\t{len(filelist) - len(misslist):0d} file(s) already have content ingested",
        flush=True)
    print(f"\t\t{len(misslist):0d} file(s) still to have content ingested",
          flush=True)

    if miscutils.fwdebug_check(6, "REGISTER_FILES_DEBUG"):
        miscutils.fwdebug_print(f"misslist={misslist}")

    return misslist
    def _gather_metadata_file(self, fullname, **kwargs):
        """ Gather metadata for a single file """

        if miscutils.fwdebug_check(3, 'FTMGMT_DEBUG'):
            miscutils.fwdebug_print(f"INFO: beg  file={fullname}")

        metadata = FtMgmtGeneric._gather_metadata_file(self, fullname,
                                                       **kwargs)

        # need nite for the archive path
        with open(fullname, 'r') as jsonfh:
            line = jsonfh.readline()
            linedata = json.loads(line)
            expcnt = 0
            while expcnt < len(linedata['exposures']) and \
                  'date' not in linedata['exposures'][expcnt]:
                expcnt += 1
            if expcnt >= len(linedata['exposures']):
                raise KeyError(
                    'Could not find date value for any exposure in manifest')
            datestr = linedata['exposures'][expcnt]['date']
            metadata['nite'] = misctime.convert_utc_str_to_nite(datestr)

        if miscutils.fwdebug_check(3, 'FTMGMT_DEBUG'):
            miscutils.fwdebug_print("INFO: end")
        return metadata
    def get_job_info(self, wherevals):
        """ Get job information """
        whclause = []
        for c in wherevals.keys():
            whclause.append(f"{c}={self.get_named_bind_string(c)}")
        sql = f"select j.jobkeys as jobkeys,j.jobnum as jobnum, j.expect_num_wrap as expect_num_wrap, j.task_id as task_id, j.pfw_block_task_id as pfw_block_task_id, t.status as status, t.start_time as start_time, t.end_time as end_time from pfw_job j, task t where t.id=j.task_id and {' and '.join(whclause)}"
        #sql = "select j.*,t.* from pfw_job j, task t where t.id=j.task_id and %s" % (' and '.join(whclause))
        if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
            miscutils.fwdebug_print(f"sql> {sql}")
        if miscutils.fwdebug_check(3, 'PFWDB_DEBUG'):
            miscutils.fwdebug_print(f"params> {wherevals}")
        curs = self.cursor()
        curs.execute(sql, wherevals)
        desc = [d[0].lower() for d in curs.description]


        jobinfo = {}
        get_messages = []
        for line in curs:
            d = dict(zip(desc, line))
            d['message'] = []
            if d['status'] != pfwdefs.PF_EXIT_SUCCESS:
                get_messages.append(d['task_id'])
            jobinfo[d['task_id']] = d
        if not get_messages:
            qdbh = qcfdb.QCFDB(connection=self)
            qcmsg = qdbh.get_all_qcf_messages_by_task_id(get_messages, level=3)
            for tid, val in qcmsg.items():
                jobinfo[tid]['message'] = val

        return jobinfo