コード例 #1
0
def run_post_steps(filelist, config, fmobj):
    """ Performs steps necessary for each file """

    #print config.keys()
    firstname = miscutils.parse_fullname(filelist[0], miscutils.CU_PARSE_FILENAME)
    filetype = dtsutils.determine_filetype(firstname)
    miscutils.fwdebug(3, "DTSFILEHANDLER_DEBUG", "filetype = %s" % filetype)

    # dynamically load class specific to filetype
    classkey = 'dts_filetype_class_' + filetype
    filetype_class = miscutils.dynamically_load_class(config[classkey])
    valdict = fmutils.get_config_vals({}, config, filetype_class.requested_config_vals())
    ftobj = filetype_class(dbh=fmobj, config=valdict)

    for fullname in filelist:
        filename = miscutils.parse_fullname(fullname, miscutils.CU_PARSE_FILENAME)
        miscutils.fwdebug(3, "DTSFILEHANDLER_DEBUG", "filename = %s" % filename)

        if dtsutils.check_already_registered(filename, fmobj):
            ftobj.post_steps(fullname)  # e.g., Rasicam

            # if success
            fmobj.commit()
        else:
            print "File must already be registered in order to run post_steps"
コード例 #2
0
def run_post_steps(filelist, config, fmobj):
    """ Performs steps necessary for each file """

    #print config.keys()
    firstname = miscutils.parse_fullname(filelist[0], miscutils.CU_PARSE_FILENAME)
    filetype = dtsutils.determine_filetype(firstname)
    miscutils.fwdebug(3, "DTSFILEHANDLER_DEBUG", "filetype = %s" % filetype)

    # dynamically load class specific to filetype
    classkey = 'dts_filetype_class_' + filetype
    filetype_class = miscutils.dynamically_load_class(config[classkey])
    valdict = fmutils.get_config_vals({}, config, filetype_class.requested_config_vals())
    ftobj = filetype_class(dbh=fmobj, config=valdict)

    for fullname in filelist:
        filename = miscutils.parse_fullname(fullname, miscutils.CU_PARSE_FILENAME)
        miscutils.fwdebug(3, "DTSFILEHANDLER_DEBUG", "filename = %s" % filename)

        if dtsutils.check_already_registered(filename, fmobj):
            ftobj.post_steps(fullname)  # e.g., Rasicam

            # if success
            fmobj.commit()
        else:
            print "File must already be registered in order to run post_steps"
コード例 #3
0
    def load_filename_gtt(self, filelist):
        """ insert filenames into filename global temp table

            Parameters
            ----------
            filelist : list
                List of strings of the file names, or of dictionaries describing the file names

            Returns
            -------
            str
                The temp table name
        """
        # returns filename GTT table name

        # make sure table is empty before loading it
        self.empty_gtt(dmdbdefs.DB_GTT_FILENAME)

        colmap = [dmdbdefs.DB_COL_FILENAME, dmdbdefs.DB_COL_COMPRESSION]
        rows = []
        for _file in filelist:
            fname = None
            comp = None
            if isinstance(_file, str):
                (fname, comp) = miscutils.parse_fullname(
                    _file,
                    miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_EXTENSION)
            elif isinstance(
                    _file,
                    dict) and (dmdbdefs.DB_COL_FILENAME in _file
                               or dmdbdefs.DB_COL_FILENAME.lower() in _file):
                if dmdbdefs.DB_COL_COMPRESSION in _file:
                    fname = _file[dmdbdefs.DB_COL_FILENAME]
                    comp = _file[dmdbdefs.DB_COL_COMPRESSION]
                elif dmdbdefs.DB_COL_COMPRESSION.lower() in _file:
                    fname = _file[dmdbdefs.DB_COL_FILENAME.lower()]
                    comp = _file[dmdbdefs.DB_COL_COMPRESSION.lower()]
                elif dmdbdefs.DB_COL_FILENAME in _file:
                    (fname, comp) = miscutils.parse_fullname(
                        _file[dmdbdefs.DB_COL_FILENAME],
                        miscutils.CU_PARSE_FILENAME
                        | miscutils.CU_PARSE_EXTENSION)
                else:
                    (fname, comp) = miscutils.parse_fullname(
                        _file[dmdbdefs.DB_COL_FILENAME.lower()],
                        miscutils.CU_PARSE_FILENAME
                        | miscutils.CU_PARSE_EXTENSION)
            else:
                raise ValueError(f"Invalid entry filelist({_file})")
            rows.append({
                dmdbdefs.DB_COL_FILENAME: fname,
                dmdbdefs.DB_COL_COMPRESSION: comp
            })
        self.insert_many(dmdbdefs.DB_GTT_FILENAME, colmap, rows)
        return dmdbdefs.DB_GTT_FILENAME
コード例 #4
0
    def write_outputwcl(self, outfilename=None):
        """ Write output wcl to file

            Parameters
            ----------
            outfilename : str, optional
                The anem of the output wcl file to write. Default is ``None``
                which indicates that the file name is stored in the inputwcl.
        """

        if outfilename is None:
            outfilename = self.inputwcl['wrapper']['outputwcl']

        if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'):
            miscutils.fwdebug_print("outfilename = %s" % outfilename,
                                    WRAPPER_OUTPUT_PREFIX)

        # create output wcl directory if needed
        outwcldir = miscutils.parse_fullname(outfilename,
                                             miscutils.CU_PARSE_PATH)
        if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'):
            miscutils.fwdebug_print("outwcldir = %s" % outwcldir,
                                    WRAPPER_OUTPUT_PREFIX)
        miscutils.coremakedirs(outwcldir)

        with open(outfilename, 'w') as wclfh:
            self.outputwcl.write(wclfh, True)
コード例 #5
0
    def _gather_metadata_from_config(self, fullname, metakeys):
        """ Get values from config

            Parameters
            ----------
            fullname : str
                The name of the file to gather data about

            metakeys : list
                List of keys to look for

            Returns
            -------
            dict
                The metadata
        """
        metadata = OrderedDict()

        for wclkey in metakeys:
            metakey = wclkey.split('.')[-1]
            if metakey == 'fullname':
                metadata['fullname'] = fullname
            elif metakey == 'filename':
                metadata['filename'] = miscutils.parse_fullname(
                    fullname, miscutils.CU_PARSE_FILENAME)
            elif metakey == 'filetype':
                metadata['filetype'] = self.filetype
            else:
                #if miscutils.fwdebug_check(6, 'FTMGMT_DEBUG'):
                #    miscutils.fwdebug_print("INFO: wclkey=%s" % (wclkey))
                (exists, val) = self.config.search(wclkey)
                if exists:
                    metadata[metakey] = val

        return metadata
コード例 #6
0
def get_single_file_disk_info(fname, save_md5sum=False, archive_root=None):
    """ Method to get disk info for a single file

    """
    if miscutils.fwdebug_check(3, "DISK_UTILS_LOCAL_DEBUG"):
        miscutils.fwdebug_print(f"fname={fname}, save_md5sum={save_md5sum}, archive_root={archive_root}")

    parsemask = miscutils.CU_PARSE_PATH | miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_COMPRESSION

    (path, filename, compress) = miscutils.parse_fullname(fname, parsemask)
    if miscutils.fwdebug_check(3, "DISK_UTILS_LOCAL_DEBUG"):
        miscutils.fwdebug_print(f"path={path}, filename={filename}, compress={compress}")

    fdict = {'filename' : filename,
             'compression': compress,
             'path': path,
             'filesize': os.path.getsize(fname)
             }

    if save_md5sum:
        fdict['md5sum'] = get_md5sum_file(fname)

    if archive_root and path.startswith('/'):
        fdict['relpath'] = path[len(archive_root)+1:]

        if compress is None:
            compext = ""
        else:
            compext = compress

        fdict['rel_filename'] = f"{fdict['relpath']}/{filename}{compext}"

    return fdict
コード例 #7
0
    def ingest_contents(self, listfullnames, **kwargs):
        """ Ingest data into non-metadata table - rasicam_decam"""

        assert isinstance(listfullnames, list)

        dbtable = 'rasicam_decam'

        for fullname in listfullnames:
            if not os.path.isfile(fullname):
                raise OSError(f"Exposure file not found: '{fullname}'")

            filename = miscutils.parse_fullname(fullname,
                                                miscutils.CU_PARSE_FILENAME)

            primary_hdr = None
            if 'prihdr' in kwargs:
                primary_hdr = kwargs['prihdr']
            elif 'hdulist' in kwargs:
                hdulist = kwargs['hdulist']
                primary_hdr = hdulist[0].header
            else:
                primary_hdr = fits.getheader(fullname, 0)

            row = get_vals_from_header(primary_hdr)
            row['filename'] = filename
            row['source'] = 'HEADER'
            row['analyst'] = 'DTS.ingest'

            if row:
                self.dbh.basic_insert_row(dbtable, row)
            else:
                raise Exception(
                    f"No RASICAM header keywords identified for {filename}")
コード例 #8
0
def generate_provenance(fullname):
    """ Generate provenance wcl """
    (fname, compression) = miscutils.parse_fullname(fullname, miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_EXTENSION)
    if compression is not None:
        fname += compression
    prov = {'was_generated_by': {'exec_1': fname}}    # includes compression extension
    return prov
コード例 #9
0
    def has_contents_ingested(self, listfullnames):
        """ Check if file has contents ingested """

        assert isinstance(listfullnames, list)

        # assume uncompressed and compressed files have same metadata
        # choosing either doesn't matter
        byfilename = {}
        for fname in listfullnames:
            filename = miscutils.parse_fullname(fname,
                                                miscutils.CU_PARSE_FILENAME)
            byfilename[filename] = fname

        #self.dbh.empty_gtt(dmdbdefs.DB_GTT_FILENAME)
        self.dbh.load_filename_gtt(list(byfilename.keys()))

        dbq = f"select m.manifest_filename from MANIFEST_EXPOSURE m, {dmdbdefs.DB_GTT_FILENAME} g where m.manifest_filename=g.filename"
        curs = self.dbh.cursor()
        curs.execute(dbq)

        results = {}
        for row in curs:
            results[byfilename[row[0]]] = True

        for fname in listfullnames:
            if fname not in results:
                results[fname] = False

        #self.dbh.empty_gtt(dmdbdefs.DB_GTT_FILENAME)

        return results
コード例 #10
0
def read_fullnames_from_listfile(listfile, linefmt, colstr):
    """ Read a list file returning fullnames from the list """

    if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'):
        miscutils.fwdebug_print('colstr=%s' % colstr)

    columns = convert_col_string_to_list(colstr, False)

    if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'):
        miscutils.fwdebug_print('columns=%s' % columns)

    fullnames = {}
    pos2fsect = {}
    for pos in range(0, len(columns)):
        lcol = columns[pos].lower()
        if lcol.endswith('.fullname'):
            filesect = lcol[:-9]
            pos2fsect[pos] = filesect
            fullnames[filesect] = []
        # else a data column instead of a filename

    if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'):
        miscutils.fwdebug_print('pos2fsect=%s' % pos2fsect)

    if linefmt == 'config' or linefmt == 'wcl':
        miscutils.fwdie(
            'Error:  wcl list format not currently supported (%s)' % listfile,
            1)
    else:
        with open(listfile, 'r') as listfh:
            for line in listfh:
                line = line.strip()

                # convert line into python list
                lineinfo = []
                if linefmt == 'textcsv':
                    lineinfo = miscutils.fwsplit(line, ',')
                elif linefmt == 'texttab':
                    lineinfo = miscutils.fwsplit(line, '\t')
                elif linefmt == 'textsp':
                    lineinfo = miscutils.fwsplit(line, ' ')
                else:
                    miscutils.fwdie('Error:  unknown linefmt (%s)' % linefmt,
                                    1)

                # save each fullname in line
                for pos in pos2fsect:
                    # use common routine to parse actual fullname (e.g., remove [0])
                    parsemask = miscutils.CU_PARSE_PATH | miscutils.CU_PARSE_FILENAME | \
                                miscutils.CU_PARSE_COMPRESSION
                    (path, filename, compression) = miscutils.parse_fullname(
                        lineinfo[pos], parsemask)
                    fname = "%s/%s" % (path, filename)
                    if compression is not None:
                        fname += compression
                    fullnames[pos2fsect[pos]].append(fname)

    if miscutils.fwdebug_check(6, 'INTGMISC_DEBUG'):
        miscutils.fwdebug_print('fullnames = %s' % fullnames)
    return fullnames
コード例 #11
0
    def get_file_archive_info(self,
                              filelist,
                              arname,
                              compress_order=fmdefs.FM_PREFER_COMPRESSED):

        # sanity checks
        if 'archive' not in self.config:
            miscutils.fwdie('Error: Missing archive section in config', 1)

        if arname not in self.config['archive']:
            miscutils.fwdie(f'Error: Invalid archive name ({arname})', 1)

        if 'root' not in self.config['archive'][arname]:
            miscutils.fwdie(
                f"Error: Missing root in archive def ({self.config['archive'][arname]})",
                1)

        if not isinstance(compress_order, list):
            miscutils.fwdie(
                'Error:  Invalid compress_order.  It must be a list of compression extensions (including None)',
                1)

        # walk archive to get all files
        fullnames = {}
        for p in compress_order:
            fullnames[p] = {}

        root = self.config['archive'][arname]['root']
        root = root.rstrip("/")  # canonicalize - remove trailing / to ensure

        for (dirpath, _, filenames) in os.walk(root, followlinks=True):
            for fname in filenames:
                d = {}
                (d['filename'],
                 d['compression']) = miscutils.parse_fullname(fname, 3)
                d['filesize'] = os.path.getsize(f"{dirpath}/{fname}")
                d['path'] = dirpath[len(root) + 1:]
                if d['compression'] is None:
                    compext = ""
                else:
                    compext = d['compression']
                d['rel_filename'] = f"{d['path']}/{d['filename']}{compext}"
                fullnames[d['compression']][d['filename']] = d

        print("uncompressed:", len(fullnames[None]))
        print("compressed:", len(fullnames['.fz']))

        # go through given list of filenames and find archive location and compreesion
        archiveinfo = {}
        for name in filelist:
            #print name
            for p in compress_order:  # follow compression preference
                #print "p = ", p
                if name in fullnames[p]:
                    archiveinfo[name] = fullnames[p][name]
                    break

        print("archiveinfo = ", archiveinfo)
        return archiveinfo
コード例 #12
0
    def get_metadata(self, fullname):
        ftype = 'snmanifest'

        filename = miscutils.parse_fullname(fullname, miscutils.CU_PARSE_FILENAME)
        filemeta = {'file_1': {'filename': filename, 'filetype':ftype}}
        self.filemeta = filemeta['file_1']

        return self.filemeta
コード例 #13
0
    def insert_rasicam(self, fullname):
        DBtable='rasicam_decam'

        #  Keyword list needed to update the database.
        #     i=int, f=float, b=bool, s=str, date=date
        keylist = { 'EXPNUM':'i',
                    'INSTRUME':'s', 
                    'SKYSTAT':'b',
                    'SKYUPDAT':'date',
                    'GSKYPHOT':'b',
                    'LSKYPHOT':'b',
                    'GSKYVAR':'f',
                    'GSKYHOT':'f',
                    'LSKYVAR':'f',
                    'LSKYHOT':'f',
                    'LSKYPOW':'f' }

        if (not(os.path.isfile(fullname))):
            raise Exception("Exposure not found: '%s'" % fullname)

        filename = miscutils.parse_fullname(fullname, miscutils.CU_PARSE_FILENAME)

        row = {}
        row['filename'] = filename
        row['source'] = 'HEADER'
        row['analyst'] = 'DTS.ingest'

        hdulist = pyfits.open(fullname)
        primary_hdr = hdulist[0].header

        numkey_found = 0
        for key, ktype in keylist.items():
            if (key.upper() in primary_hdr):
                numkey_found += 1
                value = primary_hdr[key]
                #print primary_hdr[key]
                if (key == 'SKYUPDAT'):  # entry_time is time exposure taken 
                    row['ENTRY_TIME'] = datetime.strptime(value, "%Y-%m-%dT%H:%M:%S")
                elif (key == 'INSTRUME'):
                    row['CAMSYM'] = wrapfuncs.func_camsym(fullname)   
                elif (ktype == 'b'):
                    if (value):
                        row[key] = 'T'
                    else:
                        row[key] = 'F'
                elif (ktype == 'i'):
                    if value != 'NaN':
                        row[key] = int(value)
                else:
                    if value != 'NaN':
                        row[key] = float(value)

        #print "row = %s" % row

        if (numkey_found > 0):
            self.dbh.basic_insert_row(DBtable, row)
        else:
            raise Exception("No RASICAM header keywords identified for %s" % filename)
コード例 #14
0
    def get_metadata(self, fullname):
        ftype = 'snmanifest'

        filename = miscutils.parse_fullname(fullname,
                                            miscutils.CU_PARSE_FILENAME)
        filemeta = {'file_1': {'filename': filename, 'filetype': ftype}}
        self.filemeta = filemeta['file_1']

        return self.filemeta
コード例 #15
0
    def has_contents_ingested(self, listfullnames):
        """ Check if file has contents ingested """
        #starttime = time.time()

        assert isinstance(listfullnames, list)

        results = {}
        for fname in listfullnames:
            filename = miscutils.parse_fullname(fname, miscutils.CU_PARSE_FILENAME)
            results[fname] = dfiutils.is_ingested(filename, self.tablename, self.dbh)
        return results
コード例 #16
0
def get_single_file_disk_info(fname, save_md5sum=False, archive_root=None):
    """ Method to get disk info for a single file

        Parameters
        ----------
        fname : str
            The name of the file

        save_md5sum : bool
            Whether to calculate the md5sum (True) or no (False), default is False

        archive_root : str
            The archive root path to prepend to the output data, default is None
    """
    if miscutils.fwdebug_check(3, "DISK_UTILS_LOCAL_DEBUG"):
        miscutils.fwdebug_print("fname=%s, save_md5sum=%s, archive_root=%s" % \
                                (fname, save_md5sum, archive_root))

    parsemask = miscutils.CU_PARSE_PATH | miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_COMPRESSION

    (path, filename, compress) = miscutils.parse_fullname(fname, parsemask)
    if miscutils.fwdebug_check(3, "DISK_UTILS_LOCAL_DEBUG"):
        miscutils.fwdebug_print("path=%s, filename=%s, compress=%s" %
                                (path, filename, compress))

    fdict = {
        'filename': filename,
        'compression': compress,
        'path': path,
        'filesize': os.path.getsize(fname)
    }

    if save_md5sum:
        fdict['md5sum'] = get_md5sum_file(fname)

    if archive_root and path.startswith('/'):
        fdict['relpath'] = path[len(archive_root) + 1:]

        if compress is None:
            compext = ""
        else:
            compext = compress

        fdict['rel_filename'] = "%s/%s%s" % (fdict['relpath'], filename,
                                             compext)

    return fdict
コード例 #17
0
def check_single_valid(keywords, fullname,
                       verbose):  # should raise exception if not valid
    """ Check whether the given file is a valid raw file """

    # check fits file
    hdulist = fits.open(fullname)
    prihdr = hdulist[0].header

    # check exposure has correct filename (sometimes get NOAO-science-archive renamed exposures)
    correct_filename = prihdr['FILENAME']
    actual_filename = miscutils.parse_fullname(fullname,
                                               miscutils.CU_PARSE_FILENAME)
    if actual_filename != correct_filename:
        raise ValueError(f'Error: invalid filename ({actual_filename})')

    instrume = prihdr['INSTRUME'].lower()

    req_num_hdus = -1
    if instrume == 'decam':
        req_num_hdus = 71
    else:
        raise ValueError(f'Error:  Unknown instrume ({instrume})')

    # check # hdus
    num_hdus = len(hdulist)
    if num_hdus != req_num_hdus:
        raise ValueError(f'Error:  Invalid number of hdus ({num_hdus})')

    # check keywords
    for hdunum in range(0, num_hdus):
        hdr = hdulist[hdunum].header
        (req, want, extra) = check_header_keywords(keywords, hdunum, hdr)

        if verbose > 1:
            if want is not None and want:
                print(f"HDU #{hdunum:02d} Missing requested keywords: {want}")
            if extra is not None and extra:
                print(f"HDU #{hdunum:02d} Extra keywords: {extra}")

        if req is not None and req:
            raise ValueError(
                f'Error: HDU #{hdunum:02d} Missing required keywords ({req})')

    return True
コード例 #18
0
def datafile_ingest_main(dbh, filetype, fullname, tablename, didatadefs):
    """ Control process for ingesting data from a file """

    #sections_wanted = get_sections_for_filetype(filetype, dbh)
    sections_wanted = list(didatadefs.keys())

    if 'xml' in filetype:
        datadict = Xmlslurper(fullname, sections_wanted).gettables()
    else:
        if len(sections_wanted) > 1:
            raise ValueError("Multiple hdus not yet supported\n")
        datadict = get_fits_data(fullname, sections_wanted[0])

    filename = miscutils.parse_fullname(fullname, miscutils.CU_PARSE_FILENAME)

    numrows = ingest_datafile_contents(filename, filetype, tablename,
                                       didatadefs, datadict, dbh)

    return numrows
コード例 #19
0
    def has_metadata_ingested(self, listfullnames):
        """ Check if file has row in metadata table """

        assert isinstance(listfullnames, list)

        # assume uncompressed and compressed files have same metadata
        # choosing either doesn't matter
        byfilename = {}
        for fname in listfullnames:
            filename = miscutils.parse_fullname(fname,
                                                miscutils.CU_PARSE_FILENAME)
            byfilename[filename] = fname

        #self.dbh.empty_gtt(dmdbdefs.DB_GTT_FILENAME)
        if miscutils.fwdebug_check(3, 'FTMGMT_DEBUG'):
            miscutils.fwdebug_print(
                f"Loading filename_gtt with: {list(byfilename.keys())}")
        self.dbh.load_filename_gtt(list(byfilename.keys()))

        metadata_table = self.config['filetype_metadata'][
            self.filetype]['metadata_table']

        if metadata_table.lower() == 'genfile':
            metadata_table = 'desfile'

        dbq = f"select m.filename from {metadata_table} m, {dmdbdefs.DB_GTT_FILENAME} g where m.filename=g.filename"
        curs = self.dbh.cursor()
        if miscutils.fwdebug_check(3, 'FTMGMT_DEBUG'):
            miscutils.fwdebug_print(f"Metadata check query: {dbq}")
        curs.execute(dbq)

        results = {}
        for row in curs:
            results[byfilename[row[0]]] = True

        for fname in listfullnames:
            if fname not in results:
                results[fname] = False

        if miscutils.fwdebug_check(3, 'FTMGMT_DEBUG'):
            miscutils.fwdebug_print(f"Metadata check results: {results}")
        return results
コード例 #20
0
    def write_outputwcl(self, outfilename=None):
        """ Write output wcl to file """

        if outfilename is None:
            outfilename = self.inputwcl['wrapper']['outputwcl']

        if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'):
            miscutils.fwdebug_print(f"outfilename = {outfilename}",
                                    WRAPPER_OUTPUT_PREFIX)

        # create output wcl directory if needed
        outwcldir = miscutils.parse_fullname(outfilename,
                                             miscutils.CU_PARSE_PATH)
        if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'):
            miscutils.fwdebug_print(f"outwcldir = {outwcldir}",
                                    WRAPPER_OUTPUT_PREFIX)
        miscutils.coremakedirs(outwcldir)

        with open(outfilename, 'w') as wclfh:
            self.outputwcl.write(wclfh, True)
コード例 #21
0
    def _gather_metadata_from_config(self, fullname, metakeys):
        """ Get values from config """
        metadata = collections.OrderedDict()

        for wclkey in metakeys:
            metakey = wclkey.split('.')[-1]
            if metakey == 'fullname':
                metadata['fullname'] = fullname
            elif metakey == 'filename':
                metadata['filename'] = miscutils.parse_fullname(
                    fullname, miscutils.CU_PARSE_FILENAME)
            elif metakey == 'filetype':
                metadata['filetype'] = self.filetype
            else:
                if miscutils.fwdebug_check(6, 'FTMGMT_DEBUG'):
                    miscutils.fwdebug_print(f"INFO: wclkey={wclkey}")
                (exists, val) = self.config.search(wclkey)
                if exists:
                    metadata[metakey] = val

        return metadata
コード例 #22
0
def list_missing_archive(filemgmt, filelist, archive_name):
    """ Return list of files from given list which are not listed in archive """

    print("\tChecking which files are already registered in archive",
          flush=True)
    starttime = time.time()
    existing = filemgmt.is_file_in_archive(filelist, archive_name)
    endtime = time.time()
    print(f"({endtime - starttime:0.2f} secs)", flush=True)

    filenames = {}
    for fullname in filelist:
        fname = miscutils.parse_fullname(fullname, miscutils.CU_PARSE_BASENAME)
        filenames[fname] = fullname

    missing_basenames = set(filenames.keys()) - set(existing)
    misslist = [filenames[f] for f in missing_basenames]

    print(f"\t\t{len(existing):0d} file(s) already in archive", flush=True)
    print(f"\t\t{len(misslist):0d} file(s) still to be registered to archive",
          flush=True)
    return misslist
コード例 #23
0
ファイル: gonline.py プロジェクト: astro-friedel/FileMgmt
    def get_file_disk_info(self, filelist, endpoint):
        """ get info on the files

            Parameters
            ----------
            filelist : list
                List of files to look at

            endpoint : str
                The endpoint to use

            Returns
            -------
            dict of the results
        """
        # endpoint_ls currently only does ls for directories not single files

        # determine directories for which to get listing
        pathlist = {}
        filebypath = {}
        for fname in filelist:
            (path, _) = miscutils.parse_fullname(
                fname, miscutils.CU_PARSE_PATH | miscutils.CU_PARSE_FILENAME)
            pathlist[path] = True
            if path not in filebypath:
                filebypath[path] = {}
            filebypath[path][fname] = True

        # get directory listing from endpoint
        diskinfo = {}
        _ = self.endpoint_activate(endpoint)
        for path in pathlist.keys():
            dirlist = self.get_directory_listing(path, endpoint, False)
            for fullname, finfo in dirlist.items():
                if fullname in filebypath[path]:
                    diskinfo[fullname] = finfo

        return diskinfo
コード例 #24
0
def handle_file(notify_file, delivery_fullname, config, filemgmt, task_id):
    """ Performs steps necessary for each file """

    filetype = None
    metadata = None
    disk_info = None
    prov = None

    # read values from notify file
    notifydict = read_notify_file(notify_file)

    # use dts_md5sum from notify_file
    dts_md5sum = None
    if 'md5sum' in notifydict:
        dts_md5sum = notifydict['md5sum']

    print "%s: dts md5sum = %s" % (delivery_fullname, dts_md5sum)

    #print config.keys()
    try: 
        filename = miscutils.parse_fullname(delivery_fullname, miscutils.CU_PARSE_FILENAME)
        miscutils.fwdebug(0, "DTSFILEHANDLER_DEBUG", "filename = %s" % filename)

        if not os.path.exists(delivery_fullname):
            print "Warning:  delivered file does not exist:"
            print "\tnotification file: %s" % notify_file
            print "\tdelivered file: %s" % delivery_fullname
            print "\tRemoving notification file and continuing"
            os.unlink(notify_file)
            return
            
        if dts_md5sum is not None:
            starttime = datetime.now()
            fileinfo_before_move = diskutils.get_single_file_disk_info(deliver_fullname, True, None) 
            endtime = datetime.now()
            print "%s: md5sum before move %s (%0.2f secs)" % (delivery_fullname, 
                                                              fileinfo_before_move['md5sum'], 
                                                              endtime-starttime)
            if fileinfo_before_move['md5sum'] != dts_md5sum:
                print "%s: dts md5sum = %s" % (delivery_fullname, dts_md5sum)
                print "%s: py  md5sum = %s" % (delivery_fullname, fileinfo_before_move['md5sum'])
                raise Exception("Error: md5sum in delivery dir not the same as DTS-provided md5sum")

        if not dtsutils.check_already_registered(filename, filemgmt):
            filetype = dtsutils.determine_filetype(filename)
            miscutils.fwdebug(3, "DTSFILEHANDLER_DEBUG", "filetype = %s" % filetype)

            # dynamically load class specific to filetype
            classkey = 'dts_filetype_class_' + filetype
            filetype_class = miscutils.dynamically_load_class(config[classkey]) 
            valDict = fmutils.get_config_vals({}, config, filetype_class.requested_config_vals())
            filetypeObj = filetype_class(dbh=filemgmt, config=valDict)

            metadata = filetypeObj.get_metadata(delivery_fullname)
            metadata['filename'] = filename 
            metadata['filetype'] = filetype
            miscutils.fwdebug(3, "DTSFILEHANDLER_DEBUG", 'len(metadata) = %s' % len(metadata))
            miscutils.fwdebug(6, "DTSFILEHANDLER_DEBUG", 'metadata = %s' % metadata)

            filetypeObj.check_valid(delivery_fullname)  # should raise exception if not valid
            prov = generate_provenance(delivery_fullname)

            miscutils.fwdebug(3, "DTSFILEHANDLER_DEBUG", 'archive_rel_path = %s' % archive_rel_path)
            miscutils.fwdebug(3, "DTSFILEHANDLER_DEBUG", 'prov = %s' % prov)

            archive_rel_path = filetypeObj.get_archive_path(delivery_fullname)
            disk_info = move_file_to_archive(config, delivery_fullname, archive_rel_path, dts_md5sum)

            save_data_db(filemgmt, task_id, {'file_1': metadata}, disk_info, prov)

            filetypeObj.post_steps(disk_info['fullname'])  # e.g., Rasicam

            # if success
            filemgmt.commit()
            os.unlink(notify_file)
        else:
            handle_bad_file(config, notify_file, delivery_fullname, filemgmt, 
                            filetype, metadata, disk_info, prov, 
                            "already registered")
    except Exception as err:
        (type, value, trback) = sys.exc_info()
        print "******************************"
        print "Error: %s" % delivery_fullname
        traceback.print_exception(type, value, trback, file=sys.stdout)
        print "******************************"

        handle_bad_file(config, notify_file, delivery_fullname, filemgmt, 
                        filetype, metadata, disk_info, prov, 
                        "Exception: %s" % err)
    except SystemExit:   # Wrappers code calls exit if cannot find header value
        handle_bad_file(config, notify_file, delivery_fullname, filemgmt, 
                        filetype, metadata, disk_info, prov, 
                        "SystemExit: Probably missing header value.  Check log for error msg.")
        
    filemgmt.commit()
コード例 #25
0
    def _gather_metadata_from_filename(self, fullname, metakeys):
        """ Parse filename using given filepat

                        Parameters
            ----------
            fullname : str
                The name of the file to gather data about

            metakeys : list
                List of keys to look for

            Returns
            -------
            dict
                The metadata
        """

        if self.filepat is None:
            raise TypeError("None filepat for filetype %s" % self.filetype)

        # change wcl file pattern into a pattern usable by re
        newfilepat = copy.deepcopy(self.filepat)
        varpat = r"\$\{([^$}]+:\d+)\}|\$\{([^$}]+)\}"
        listvar = []
        m = re.search(varpat, newfilepat)
        while m:
            #print m.group(1), m.group(2)
            if m.group(1) is not None:
                m2 = re.search(r'([^:]+):(\d+)', m.group(1))
                #print m2.group(1), m2.group(2)
                listvar.append(m2.group(1))

                # create a pattern that will remove the 0-padding
                newfilepat = re.sub(r"\${%s}" % (m.group(1)),
                                    r'(\d{%s})' % m2.group(2), newfilepat)
            else:
                newfilepat = re.sub(r"\${%s}" % (m.group(2)), r'(\S+)',
                                    newfilepat)
                listvar.append(m.group(2))

            m = re.search(varpat, newfilepat)

        # now that have re pattern, parse the filename for values
        filename = miscutils.parse_fullname(fullname,
                                            miscutils.CU_PARSE_FILENAME)

        #if miscutils.fwdebug_check(3, 'FTMGMT_DEBUG'):
        #    miscutils.fwdebug_print("INFO: newfilepat = %s" % newfilepat)
        #    miscutils.fwdebug_print("INFO: filename = %s" % filename)

        m = re.search(newfilepat, filename)
        if m is None:
            miscutils.fwdebug_print("INFO: newfilepat = %s" % newfilepat)
            miscutils.fwdebug_print("INFO: filename = %s" % filename)
            raise ValueError("Pattern (%s) did not match filename (%s)" %
                             (newfilepat, filename))

        #if miscutils.fwdebug_check(3, 'FTMGMT_DEBUG'):
        #    miscutils.fwdebug_print("INFO: m.group() = %s" %  m.group())
        #    miscutils.fwdebug_print("INFO: listvar = %s" % listvar)

        # only save values parsed from filename that were requested per metakeys
        mddict = {}
        for cnt, key in enumerate(listvar):
            if key in metakeys:
                #if miscutils.fwdebug_check(6, 'FTMGMT_DEBUG'):
                #    miscutils.fwdebug_print("INFO: saving as metadata key = %s, cnt = %s" % (key, cnt))
                mddict[key] = m.group(cnt + 1)
            #elif miscutils.fwdebug_check(6, 'FTMGMT_DEBUG'):
            #    miscutils.fwdebug_print("INFO: skipping key = %s because not in metakeys" % key)

        #if miscutils.fwdebug_check(6, 'FTMGMT_DEBUG'):
        #    miscutils.fwdebug_print("INFO: mddict = %s" % mddict)

        return mddict
コード例 #26
0
    def read_json_single(self, json_file,allMandatoryExposureKeys, debug):
    
        miscutils.fwdebug(3, 'DTSSNMANIFEST_DEBUG', "reading file %s" % json_file)

        allExposures = []
            
        my_header = {}
        numseq = {}
        all_exposures = dict()
        with open(json_file) as my_json:
            for line in my_json:
                all_data = json.loads(line)
    
                for key, value in all_data.items():
                    errorFlag = 0
                    if key == 'header':
                        #read the values for the header (date and set_type are here)
                        my_head = value
    
                        allExposures.append(str(my_head['set_type']))
                        allExposures.append(str(my_head['createdAt']))
                        
                    if key == 'exposures':
                        #read all the exposures that were taken for the set_type in header
                        my_header =  value
    
                        #Total Number of exposures in manifest file 
                        tot_exposures = len(my_header)

                        if tot_exposures is None or tot_exposures == 0:    
                            raise Exception("0 SN exposures parsed from json file")

                        for i in range(tot_exposures):
                            numseq = my_header[i]['sequence']
                            mytime = my_header[i]['acttime']
                            if mytime > 10 and numseq['seqnum'] == 2:
                                first_expnum = my_header[i]['expid']
                            
                            #Validate if acctime has a meaningful value. If acttime = 0.0, then it's a bad exposure. Skip it from the manifest.
                            if mytime == 0.0:
                                continue
                                
                            try:
                                for mandatoryExposureKey in (allMandatoryExposureKeys):
                                    miscutils.fwdebug(3, 'DTSSNMANIFEST_DEBUG', "mandatory key %s" % mandatoryExposureKey)
                                    key = str(mandatoryExposureKey)
                                    
                                    if my_header[i][mandatoryExposureKey]:
                                        miscutils.fwdebug(3, 'DTSSNMANIFEST_DEBUG', "mandatory key '%s' found %s" % (mandatoryExposureKey, my_header[i][mandatoryExposureKey]))
                                        miscutils.fwdebug(6, 'DTSSNMANIFEST_DEBUG', "allExposures in for: %s" % allExposures)

                                        try:
                                            if key == 'acttime':
                                                key = 'EXPTIME'
                                                all_exposures[key].append(my_header[i][mandatoryExposureKey])
                                            elif key == 'filter':
                                                key = 'BAND'
                                                all_exposures[key].append(str(my_header[i][mandatoryExposureKey]))
                                            elif key == 'expid':
                                                key = 'EXPNUM'
                                                all_exposures[key].append(my_header[i][mandatoryExposureKey])
                                            else:
                                                all_exposures[key].append(my_header[i][mandatoryExposureKey])
                                        except KeyError:
                                            all_exposures[key] = [my_header[i][mandatoryExposureKey]]
    
    
                            except KeyError:
                                miscutils.fwdebug(0, 'DTSSNMANIFEST_DEBUG', "keyError: missing key %s in json entity: %s " % (mandatoryExposureKey,line))
                                errorFlag = 1
                                raise
                        
                        timestamp = all_exposures['date'][0]
                        nite = dtsutils.convert_UTCstr_to_nite(timestamp)
            
                        # get field by parsing set_type
                        #print 'xxxx', my_head['set_type']
                        myfield = my_head['set_type']
                        if len(myfield) > 5:
                            newfield = myfield[:5]
                        else: 
                            newfield = myfield

                        camsym = 'D'   # no way to currently tell CAMSYM/INSTRUME from manifest file

                        if not newfield.startswith('SN-'):
                            raise ValueError("Invalid field (%s).  set_type = '%s'" % (newfield, my_head['set_type']))

                        #if json_file contains a path or compression extension, then cut it to only the filename
                        jsonFile = miscutils.parse_fullname(json_file, miscutils.CU_PARSE_FILENAME)
                        
                        if tot_exposures is None or tot_exposures == 0:    
                            raise Exception("0 SN exposures parsed from json file")

                        for i in range(tot_exposures):
                            if my_header[i]['acttime'] == 0.0:
                                continue
                            if i == 0:
                                #all_exposures['FIELD'] = [str(my_head['set_type'])]
                                all_exposures['FIELD'] = [newfield]
                                all_exposures['CREATEDAT'] = [str(my_head['createdAt'])]                
                                all_exposures['MANIFEST_FILENAME'] = [jsonFile]
                                all_exposures['NITE'] = [nite]
                                all_exposures['SEQNUM'] = [1]
                                all_exposures['CAMSYM'] = [camsym]
                            else:
                                #all_exposures['FIELD'].append(str(my_head['set_type']))
                                all_exposures['FIELD'].append(newfield)
                                all_exposures['CREATEDAT'].append(str(my_head['createdAt']))                
                                all_exposures['MANIFEST_FILENAME'].append(jsonFile)
                                all_exposures['NITE'].append(nite)
                                all_exposures['SEQNUM'].append(1)
                                all_exposures['CAMSYM'].append(camsym)
        
        # Add the manifest filename value in the dictionary
        #all_exposures['MANIFEST_FILENAME'] = json_file
        miscutils.fwdebug(6, 'DTSSNMANIFEST_DEBUG', "allExposures " % (all_exposures))
        
        return all_exposures
コード例 #27
0
    def load_artifact_gtt(self, filelist):
        """ insert file artifact information into global temp table

            Parameters
            ----------
            filelist : list
                List of dictionaries, one for each file, giving the file
                metadata to store.

            Returns
            -------
            str
                The name of the temp table
        """
        # filelist is list of file dictionaries
        # returns artifact GTT table name

        parsemask = miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_EXTENSION

        # make sure table is empty before loading it
        self.empty_gtt(dmdbdefs.DB_GTT_ARTIFACT)

        colmap = [
            dmdbdefs.DB_COL_FILENAME, dmdbdefs.DB_COL_COMPRESSION,
            dmdbdefs.DB_COL_MD5SUM, dmdbdefs.DB_COL_FILESIZE
        ]
        rows = []
        for _file in filelist:
            miscutils.fwdebug(3, 'DESDBI_DEBUG', f"file = {_file}")
            fname = None
            comp = None
            md5sum = None
            filesize = None
            if dmdbdefs.DB_COL_FILENAME in _file or dmdbdefs.DB_COL_FILENAME.lower(
            ) in _file:
                if dmdbdefs.DB_COL_COMPRESSION in _file:
                    fname = _file[dmdbdefs.DB_COL_FILENAME]
                    comp = _file[dmdbdefs.DB_COL_COMPRESSION]
                elif dmdbdefs.DB_COL_COMPRESSION.lower() in _file:
                    fname = _file[dmdbdefs.DB_COL_FILENAME.lower()]
                    comp = _file[dmdbdefs.DB_COL_COMPRESSION.lower()]
                elif dmdbdefs.DB_COL_FILENAME in _file:
                    (fname, comp) = miscutils.parse_fullname(
                        _file[dmdbdefs.DB_COL_FILENAME], parsemask)
                else:
                    (fname, comp) = miscutils.parse_fullname(
                        _file[dmdbdefs.DB_COL_FILENAME.lower()], parsemask)
                miscutils.fwdebug(3, 'DESDBI_DEBUG',
                                  f"fname={fname}, comp={comp}")
            elif 'fullname' in _file:
                (fname,
                 comp) = miscutils.parse_fullname(_file['fullname'], parsemask)
                miscutils.fwdebug(
                    3, 'DESDBI_DEBUG',
                    f"parse_fullname: fname={fname}, comp={comp}")
            else:
                miscutils.fwdebug(3, 'DESDBI_DEBUG', f"file={_file}")
                raise ValueError(f"Invalid entry filelist({_file})")

            if dmdbdefs.DB_COL_FILESIZE in _file:
                filesize = _file[dmdbdefs.DB_COL_FILESIZE]
            elif dmdbdefs.DB_COL_FILESIZE.lower() in _file:
                filesize = _file[dmdbdefs.DB_COL_FILESIZE.lower()]

            if dmdbdefs.DB_COL_MD5SUM in _file:
                md5sum = _file[dmdbdefs.DB_COL_MD5SUM]
            elif dmdbdefs.DB_COL_MD5SUM.lower() in _file:
                md5sum = _file[dmdbdefs.DB_COL_MD5SUM.lower()]

            miscutils.fwdebug(
                3, 'DESDBI_DEBUG',
                f"row: fname={fname}, comp={comp}, filesize={filesize}, md5sum={md5sum}"
            )
            rows.append({
                dmdbdefs.DB_COL_FILENAME: fname,
                dmdbdefs.DB_COL_COMPRESSION: comp,
                dmdbdefs.DB_COL_FILESIZE: filesize,
                dmdbdefs.DB_COL_MD5SUM: md5sum
            })

        self.insert_many(dmdbdefs.DB_GTT_ARTIFACT, colmap, rows)
        return dmdbdefs.DB_GTT_ARTIFACT
コード例 #28
0
    def register_file_in_archive(self, filelist, archive_name):
        """ Saves filesystem information about file like relative path
            in archive, compression extension, etc """
        # assumes files have already been declared to database (i.e., metadata)
        # caller of program must have already verified given filelist matches given archive
        # if giving fullnames, must include archive root
        # keys to each file dict must be lowercase column names, missing data must be None

        #if miscutils.fwdebug_check(6, 'FILEMGMT_DEBUG'):
        #    miscutils.fwdebug_print("filelist = %s" % filelist)


        archivedict = self.config['archive'][archive_name]
        archiveroot = archivedict['root']

        origfilelist = filelist
        if isinstance(origfilelist, str):
            filelist = [origfilelist]

        if filelist:
            # get id from desfile table
            gtt_name = self.load_filename_gtt(filelist)
            idsql = f"""select d.filename, d.compression, d.id
                       from desfile d, {gtt_name} g
                       where d.filename=g.filename and
                       nullcmp(d.compression, g.compression) = 1"""
            ids = {}
            curs = self.cursor()
            curs.execute(idsql)
            for row in curs:
                ids[row[0]] = {row[1]: row[2]}
            #self.empty_gtt(gtt_name)

            # create dict of info to insert into file_archive_info
            insfilelist = []
            for onefile in filelist:
                nfiledict = {}
                nfiledict['archive_name'] = archive_name
                if isinstance(onefile, dict):
                    if 'filename' in onefile and 'path' in onefile and 'compression' in onefile:
                        nfiledict['filename'] = onefile['filename']
                        nfiledict['compression'] = onefile['compression']
                        path = onefile['path']
                    elif 'fullname' in onefile:
                        parsemask = miscutils.CU_PARSE_PATH | \
                                    miscutils.CU_PARSE_FILENAME | \
                                    miscutils.CU_PARSE_COMPRESSION
                        (path, nfiledict['filename'], nfiledict['compression']) = miscutils.parse_fullname(onefile['fullname'], parsemask)
                    else:
                        miscutils.fwdie(f"Error:   Incomplete info for a file to register.   Given {onefile}", 1)
                elif isinstance(onefile, str):  # fullname
                    parsemask = miscutils.CU_PARSE_PATH | miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_COMPRESSION
                    (path, nfiledict['filename'], nfiledict['compression']) = miscutils.parse_fullname(onefile, parsemask)


                # make sure compression starts with .
                if nfiledict['compression'] is not None and not re.match(r'^\.', nfiledict['compression']):
                    nfiledict['compression'] = '.' + nfiledict['compression']

                # get matching desfile id
                if nfiledict['filename'] in ids:
                    if nfiledict['compression'] in ids[nfiledict['filename']]:
                        nfiledict['desfile_id'] = int(ids[nfiledict['filename']][nfiledict['compression']])
                    else:
                        raise ValueError(f'Missing desfile id for file - no matching compression ({onefile})')
                else:
                    raise ValueError(f'Missing desfile id for file - no matching filename ({onefile})')

                if re.match(r'^/', path):   # if path is absolute
                    #if miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'):
                    #    miscutils.fwdebug_print("absolute path = %s" % path)
                    #    miscutils.fwdebug_print("archiveroot = %s/" % archiveroot)

                    # get rid of the archive root from the path to store
                    if re.match(fr'^{archiveroot}/', path):
                        nfiledict['path'] = path[len(archiveroot) + 1:]
                    else:
                        canon_archroot = os.path.realpath(archiveroot)
                        canon_path = os.path.realpath(path)

                        # get rid of the archive root from the path to store
                        if re.match(fr'^{canon_archroot}/', canon_path):
                            nfiledict['path'] = canon_path[len(canon_archroot) + 1:]
                        else:
                            miscutils.fwdie((f"Error: file's absolute path ({path}) does not " +
                                             f"contain the archive root ({archiveroot}) (filedict:{nfiledict})"), 1)
                else:
                    #if miscutils.fwdebug_check(3, 'FILEMGMT_DEBUG'):
                    #    miscutils.fwdebug_print("relative path = %s" % path)
                    nfiledict['path'] = path # assume only contains the relative path within the archive

                insfilelist.append(nfiledict)

            colnames = ['desfile_id', 'filename', 'compression', 'path', 'archive_name']
            try:
                self.insert_many_indiv('FILE_ARCHIVE_INFO', colnames, insfilelist)
            except:
                print("Error from insert_many_indiv in register_file_archive")
                print("colnames =", colnames)
                print("filelist =", insfilelist)
                raise
コード例 #29
0
    def save_provenance(self, execsect, exwcl, infiles, outfiles, exitcode):
        """ Create provenance wcl """
        #pylint: disable=unbalanced-tuple-unpacking
        self.start_exec_task('save_provenance')

        if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'):
            miscutils.fwdebug_print("INFO: Beg", WRAPPER_OUTPUT_PREFIX)
        if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
            miscutils.fwdebug_print(f"INFO: infiles = {infiles}",
                                    WRAPPER_OUTPUT_PREFIX)
            miscutils.fwdebug_print(f"INFO: outfiles = {outfiles}",
                                    WRAPPER_OUTPUT_PREFIX)

        num_errs = 0

        # convert probably fullnames in outexist to filename+compression
        new_outfiles = collections.OrderedDict()
        for exlabel, exlist in outfiles.items():
            if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
                miscutils.fwdebug_print(
                    f"INFO: exlabel={exlabel} exlist={exlist}",
                    WRAPPER_OUTPUT_PREFIX)
            newlist = []
            for fullname in exlist:
                basename = miscutils.parse_fullname(
                    fullname, miscutils.CU_PARSE_BASENAME)
                newlist.append(basename)
            if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
                miscutils.fwdebug_print(f"INFO: newlist={newlist}",
                                        WRAPPER_OUTPUT_PREFIX)

            new_outfiles[exlabel] = newlist

        prov = self.outputwcl[intgdefs.OW_PROV_SECT]

        # used
        new_infiles = {}
        if infiles:
            all_infiles = []
            for key, sublist in infiles.items():
                new_infiles[key] = []
                for fullname in sublist:
                    basename = miscutils.parse_fullname(
                        fullname, miscutils.CU_PARSE_BASENAME)
                    all_infiles.append(basename)
                    new_infiles[key].append(basename)
            prov[provdefs.PROV_USED][execsect] = provdefs.PROV_DELIM.join(
                all_infiles)

        # was_generated_by - done by PFW when saving metadata

        # was_derived_from
        if intgdefs.IW_DERIVATION in exwcl:
            wdf = prov[provdefs.PROV_WDF]
            derived_pairs = miscutils.fwsplit(exwcl[intgdefs.IW_DERIVATION],
                                              provdefs.PROV_DELIM)
            for dpair in derived_pairs:
                if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
                    miscutils.fwdebug_print(f"INFO: dpair = {dpair}",
                                            WRAPPER_OUTPUT_PREFIX)
                (parent_sect, child_sect) = miscutils.fwsplit(dpair, ':')[:2]
                if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
                    miscutils.fwdebug_print(
                        f"INFO: parent_sect = {parent_sect}",
                        WRAPPER_OUTPUT_PREFIX)
                    miscutils.fwdebug_print(f"INFO: child_sect = {child_sect}",
                                            WRAPPER_OUTPUT_PREFIX)

                optout = self.get_optout(child_sect)
                #parent_key = miscutils.fwsplit(parent_sect, '.')[-1]
                #child_key = miscutils.fwsplit(child_sect, '.')[-1]

                if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
                    #miscutils.fwdebug_print("INFO: parent_key = %s" % parent_key,
                    #                        WRAPPER_OUTPUT_PREFIX)
                    #miscutils.fwdebug_print("INFO: child_key = %s" % child_key,
                    #                        WRAPPER_OUTPUT_PREFIX)
                    miscutils.fwdebug_print(f"INFO: optout = {optout}",
                                            WRAPPER_OUTPUT_PREFIX)
                    miscutils.fwdebug_print(
                        f"INFO: new_outfiles.keys = {list(new_outfiles.keys())}",
                        WRAPPER_OUTPUT_PREFIX)
                    miscutils.fwdebug_print(
                        f"INFO: new_outfiles = {new_outfiles}",
                        WRAPPER_OUTPUT_PREFIX)

                if child_sect not in new_outfiles or \
                        new_outfiles[child_sect] is None or \
                        not new_outfiles[child_sect]:
                    if optout:
                        if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
                            miscutils.fwdebug_print(
                                f"INFO: skipping missing optional output {parent_sect}:{child_sect}",
                                WRAPPER_OUTPUT_PREFIX)
                    elif exitcode != 0:
                        if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
                            miscutils.fwdebug_print(
                                f"INFO: skipping missing output due to non-zero exit code {parent_sect}:{child_sect}",
                                WRAPPER_OUTPUT_PREFIX)
                    else:
                        miscutils.fwdebug_print(
                            f"ERROR: Missing child output files in wdf tuple ({parent_sect}:{child_sect})",
                            WRAPPER_OUTPUT_PREFIX)
                        num_errs += 1
                else:
                    self.last_num_derived += 1
                    key = 'derived_%d' % self.last_num_derived
                    if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
                        miscutils.fwdebug_print(f"INFO: key = {key}",
                                                WRAPPER_OUTPUT_PREFIX)
                        miscutils.fwdebug_print(
                            f"INFO: before wdf = {prov[provdefs.PROV_WDF]}",
                            WRAPPER_OUTPUT_PREFIX)

                    if parent_sect not in infiles and parent_sect not in new_outfiles:
                        miscutils.fwdebug_print(f"parent_sect = {parent_sect}",
                                                WRAPPER_OUTPUT_PREFIX)
                        miscutils.fwdebug_print(
                            f"infiles.keys() = {list(infiles.keys())}",
                            WRAPPER_OUTPUT_PREFIX)
                        miscutils.fwdebug_print(
                            f"outfiles.keys() = {list(outfiles.keys())}",
                            WRAPPER_OUTPUT_PREFIX)
                        miscutils.fwdebug_print(
                            f"used = {exwcl[intgdefs.IW_INPUTS]}",
                            WRAPPER_OUTPUT_PREFIX)
                        miscutils.fwdebug_print(
                            f"ERROR: Could not find parent files for {dpair}",
                            WRAPPER_OUTPUT_PREFIX)
                        num_errs += 1
                    else:
                        wdf[key] = collections.OrderedDict()
                        wdf[key][
                            provdefs.PROV_CHILDREN] = provdefs.PROV_DELIM.join(
                                new_outfiles[child_sect])
                        if parent_sect in infiles:
                            wdf[key][provdefs.
                                     PROV_PARENTS] = provdefs.PROV_DELIM.join(
                                         new_infiles[parent_sect])
                        elif parent_sect in new_outfiles:
                            # this output was generated within same
                            #   program/wrapper from other output files
                            parents = []
                            for outparent in outfiles[parent_sect]:
                                parents.append(
                                    miscutils.parse_fullname(
                                        outparent,
                                        miscutils.CU_PARSE_FILENAME))
                            wdf[key][provdefs.
                                     PROV_PARENTS] = provdefs.PROV_DELIM.join(
                                         parents)

                if miscutils.fwdebug_check(6, 'BASICWRAP_DEBUG'):
                    miscutils.fwdebug_print(
                        f"INFO: after wdf = {prov[provdefs.PROV_WDF]}",
                        WRAPPER_OUTPUT_PREFIX)
            if not wdf:
                del prov[provdefs.PROV_WDF]

        if miscutils.fwdebug_check(3, 'BASICWRAP_DEBUG'):
            miscutils.fwdebug_print(f"INFO: End (num_errs = {num_errs:d})",
                                    WRAPPER_OUTPUT_PREFIX)

        self.end_exec_task(num_errs)
        return prov
コード例 #30
0
def restore_files(util, args, data):
    """ Method to restore file to the file system

        Parameters
        ----------
        util : Util instance
        args : dict
            Command line arguments
        data : dict
            Data on the files to restore
    """
    tape_tar = tarfile.open(args['tape'], mode='r')
    names = tape_tar.getnames()
    if args['unit'] not in names:
        raise Exception('Unit tar %s not found in tape tar %s, this should not happen' % (args['unit'], args['tape']))
    unit_tar = tarfile.open(fileobj=tape_tar.extractfile(args['unit']))

    root_path = '.'
    if args['restore']:
        root_path = data['archive']
    else:
        args['update_fai'] = False

    if args['filename'] or args['path']:
        if args['filename']:
            regex = re.compile(r'%s\Z' % (args['filename']))
        else:
            regex = re.compile(r'\A%s' % (args['path']))
        allnames = [m for m in unit_tar.getnames() if regex.search(m)]
        unit_tar.extractall(path=root_path, members=[m for m in unit_tar.getmembers() if regex.search(m.name)])
    else:
        unit_tar.extractall(path=root_path)
        allnames = unit_tar.getnames()
    if args['update_fai']:
        # get only the file names
        files = [m for m in allnames if unit_tar.getmember(m).isfile()]
        full_listing = {}
        for fln in files:
            full_filename = fln.split('/')[-1]
            direct = fln.replace('/' + full_filename, '')
            (filename, compression) = miscutils.parse_fullname(full_filename, miscutils.CU_PARSE_FILENAME | miscutils.CU_PARSE_COMPRESSION)
            full_listing[full_filename] = {'filename': filename,
                                           'compression': compression,
                                           'path': direct,
                                           'desfile_id': None,
                                           'archive': args['archive']}
        gtt = util.load_gtt_filename(full_listing.values())
        cur = util.cursor()
        cur.execute('select df.id, df.filename, df.compression from desfile df, %s gtt where gtt.filename=df.filename and gtt.compression=df.compression' % (gtt))
        results = cur.fetchall()
        desfile_ids = []
        for res in results:
            full_listing[res[1] + res[2]]['desfile_id'] = res[0]
            desfile_ids.append(res[0])
        # find any files not resgistered in desfile
        bad_files = {}
        if len(desfile_ids) != len(full_listing):
            for key, value in full_listing.iteritems():
                if not value['desfile_id']:
                    bad_files[key] = value
            full_listing = {key:full_listing[key] for key in full_listing if key not in bad_files.keys()}

        #gttid = util.conn.load_id_gtt(desfile_ids)
        # get files which are alread in file_archive_info
        #cur.execute('select desfile_id from file_archive_info fai, %s gtt where gtt.id=fai.desfile_id' % (gttid))
        #results = cur.fetchall()
        #loaded_ids = []
        #for res in results:
        #    loaded_ids.append(res[0])

        cur.prepare("merge into file_archive_info fai using dual on (fai.desfile_id=:desfile_id) when matched then update set path=:path,archive_name=:archive when not matched then insert (filename, archive_name, path, compression, desfile_id) values (:filename, :archive, :path, :compression, :desfile_id)")

        cur.executemany(None, full_listing)
        util.commit()
        if bad_files:
            print "WARNING: The following files we not added to FILE_ARCHIVE_INFO because they do not have entries"
            print "in DESFILE. They will need to be manually ingested with register_files.py"
            for key, value in bad_files.iteritems():
                print os.path.join(value['path'], key)
            print ''
        # DO CHECK
    if args['verify'] and args['archive']:
        print "Starting integrity check of files..."
        comp_args = {'dbh': util,
                     'des_services': args['des_services'],
                     'section': args['section'],
                     'archive': args['archive'],
                     'md5sum': True,
                     'verbose': args['verbose'],
                     'silent': False}
        if args['pfwid']:
            comp_args['pfwid'] = args['pfwid']
        elif args['reqnum']:
            comp_args['reqnum'] = args['reqnum']
            comp_args['unitname'] = args['unitname']
            comp_args['attnum'] = args['attnum']
        else:
            if args['filename']:
                fullpath = allnames[0]
                args['path'] = fullpath[:fullpath.rfind('/')]
            comp_args['relpath'] = args['path']

        cu.compare(**comp_args)
コード例 #31
0
def gather_metadata_file(hdulist, fullname, metadata_defs, extra_info):
    """ gather metadata for a single file """
    # extra_info is "dict" containing any info needed for wcl metadata
    # computes are already created and stored in hdu, key in headers list

    if hdulist is None:
        hdulist = fits.open(fullname, 'r')
    elif fullname is None:
        fullname = hdulist.filename()

    if miscutils.fwdebug_check(3, 'FM_METAUTILS_DEBUG'):
        miscutils.fwdebug_print(f"INFO: Beg file={fullname}")
    if miscutils.fwdebug_check(6, 'FM_METAUTILS_DEBUG'):
        miscutils.fwdebug_print(f"INFO: metadata_defs={metadata_defs}")
        miscutils.fwdebug_print(f"INFO: extra_info={extra_info}")

    metadata = {'fullname': fullname}

    if 'wcl' in metadata_defs:
        if miscutils.fwdebug_check(6, 'FM_METAUTILS_DEBUG'):
            miscutils.fwdebug_print(f"INFO: wcl={metadata_defs['wcl']}")

        wcllist = None
        if isinstance(metadata_defs['wcl'], str):
            wcllist = miscutils.fwsplit(metadata_defs['wcl'], ',')
        else:
            wcllist = metadata_defs['wcl']
        for wclkey in wcllist:
            metakey = wclkey.split('.')[-1]
            if metakey == 'fullname':
                metadata['fullname'] = fullname
            elif metakey == 'filename':
                metadata['filename'] = miscutils.parse_fullname(
                    fullname, miscutils.CU_PARSE_FILENAME)
            else:
                if miscutils.fwdebug_check(3, 'FM_METAUTILS_DEBUG'):
                    miscutils.fwdebug_print(f"INFO: wclkey={wclkey}")
                metadata[metakey] = extra_info[wclkey]

    if 'headers' in metadata_defs:
        if miscutils.fwdebug_check(3, 'FM_METAUTILS_DEBUG'):
            miscutils.fwdebug_print(
                f"INFO: headers={metadata_defs['headers']}")
        for hdu, keys in metadata_defs['headers'].items():
            if miscutils.fwdebug_check(6, 'FM_METAUTILS_DEBUG'):
                miscutils.fwdebug_print(f"INFO: hdu={hdu}, keys={keys}")
            keylist = None
            if isinstance(metadata_defs['wcl'], str):
                keylist = miscutils.fwsplit(keys, ',')
            else:
                keylist = keys
            for key in keylist:
                try:
                    metadata[key] = fitsutils.get_hdr_value(
                        hdulist, key.upper(), hdu)
                except KeyError:
                    if miscutils.fwdebug_check(3, 'FM_METAUTILS_DEBUG'):
                        miscutils.fwdebug_print(
                            f"INFO: didn't find key {key} in {hdu} header of file {fullname}"
                        )

    if miscutils.fwdebug_check(3, 'FM_METAUTILS_DEBUG'):
        miscutils.fwdebug_print("INFO: end")
    return metadata
コード例 #32
0
def read_fullnames_from_listfile(listfile, linefmt, colstr):
    """ Read a list file returning fullnames from the list

        Parameters
        ----------
        listfile : str
            The file to read

        linefmt : str
            The format of the lines. Acceptable formats are

            * 'textcsv' - a csv style file
            * 'texttab' - a tab separated style file
            * 'testsp' - a space separated style file

        colstr : str
            A string representation of the column headers.

        Returns
        -------
        dict
            Dictionary of the file full names and general info.
    """

    if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'):
        miscutils.fwdebug_print('colstr=%s' % colstr)

    columns = convert_col_string_to_list(colstr, False)

    if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'):
        miscutils.fwdebug_print('columns=%s' % columns)

    fullnames = {}
    pos2fsect = {}
    for pos, col in enumerate(columns):
        lcol = col.lower()
        if lcol.endswith('.fullname'):
            filesect = lcol[:-9]
            pos2fsect[pos] = filesect
            fullnames[filesect] = []
        # else a data column instead of a filename

    if miscutils.fwdebug_check(3, 'INTGMISC_DEBUG'):
        miscutils.fwdebug_print('pos2fsect=%s' % pos2fsect)

    if linefmt == 'config' or linefmt == 'wcl':
        miscutils.fwdie(
            'Error:  wcl list format not currently supported (%s)' % listfile,
            1)
    else:
        with open(listfile, 'r') as listfh:
            for line in listfh:
                line = line.strip()

                # convert line into python list
                lineinfo = []
                if linefmt == 'textcsv':
                    lineinfo = miscutils.fwsplit(line, ',')
                elif linefmt == 'texttab':
                    lineinfo = miscutils.fwsplit(line, '\t')
                elif linefmt == 'textsp':
                    lineinfo = miscutils.fwsplit(line, ' ')
                else:
                    miscutils.fwdie('Error:  unknown linefmt (%s)' % linefmt,
                                    1)

                # save each fullname in line
                for pos in pos2fsect:
                    # use common routine to parse actual fullname (e.g., remove [0])
                    parsemask = miscutils.CU_PARSE_PATH | miscutils.CU_PARSE_FILENAME | \
                                miscutils.CU_PARSE_COMPRESSION
                    (path, filename, compression) = miscutils.parse_fullname(
                        lineinfo[pos], parsemask)
                    fname = "%s/%s" % (path, filename)
                    if compression is not None:
                        fname += compression
                    fullnames[pos2fsect[pos]].append(fname)

    if miscutils.fwdebug_check(6, 'INTGMISC_DEBUG'):
        miscutils.fwdebug_print('fullnames = %s' % fullnames)
    return fullnames
コード例 #33
0
    def transfer_directory(self, relpath):
        """ Transfer a directory between two archives

            Parameters
            ----------
            relpath : str
                The directory to transfer

            Returns
            -------
            Dict of the transfer results
        """

        if miscutils.fwdebug_check(0, "ARCHIVE_TRANSFER_GLOBUSONLINE"):
            miscutils.fwdebug_print("\trelpath: %s" % relpath)

        srcpath = "%s/%s" % (self.src_archive_info['root'], relpath)
        dstpath = "%s/%s" % (self.dst_archive_info['root'], relpath)

        credfile = None
        if X509_USER_PROXY in self.config:
            credfile = self.config[X509_USER_PROXY]
        elif 'X509_USER_PROXY' in os.environ:
            credfile = os.environ['X509_USER_PROXY']

        if credfile is None:
            miscutils.fwdie(
                'Error:  Cannot determine location of X509 proxy.  Either set in config or environment.',
                1)

        proxy_valid_hrs = 12
        if PROXY_VALID_HRS in self.config:
            proxy_valid_hrs = self.config[PROXY_VALID_HRS]

        if GO_USER not in self.config:
            miscutils.fwdie('Error:  Missing %s in config' % GO_USER, 1)

        goclient = globonline.DESGlobusOnline(self.src_archive_info,
                                              self.dst_archive_info, credfile,
                                              self.config[GO_USER],
                                              proxy_valid_hrs)
        _ = goclient.transfer_directory(srcpath, dstpath)

        # get listing of remote directory
        dstlisting = goclient.get_directory_listing(
            dstpath, self.dst_archive_info['endpoint'], True)

        retresults = {}
        for fullname, finfo in dstlisting.items():
            filename = miscutils.parse_fullname(fullname,
                                                miscutils.CU_PARSE_FILENAME)
            if finfo is not None:  # include labels required by framework
                if finfo['type'] == 'file':
                    retresults[filename] = finfo
                    retresults[filename]['filesize'] = retresults[filename][
                        'size']
                    retresults[filename]['fullname'] = fullname

        # check for missing files
        srclisting = goclient.get_directory_listing(
            srcpath, self.src_archive_info['endpoint'], True)
        for fullname, finfo in srclisting.items():
            filename = miscutils.parse_fullname(fullname,
                                                miscutils.CU_PARSE_FILENAME)
            if finfo is not None and finfo[
                    'type'] == 'file' and filename not in retresults:
                retresults[filename] = finfo
                retresults[filename]['filesize'] = retresults[filename]['size']
                retresults[filename]['fullname'] = fullname
                retresults[filename]['err'] = 'Unknown error'

        return retresults
コード例 #34
0
def check_single_valid(keywords, fullname,
                       verbose):  # should raise exception if not valid
    """ Check whether the given file is a valid raw file

        Parameters
        ----------
        keywords : dict
            Keywords to look for

        fullname : str
            The name of the file

        verbose : bool
            Whether or not to print out extra info to stdout

        Returns
        -------
        bool
    """

    # check fits file
    hdulist = pyfits.open(fullname)
    prihdr = hdulist[0].header

    # check exposure has correct filename (sometimes get NOAO-science-archive renamed exposures)
    correct_filename = prihdr['FILENAME']
    actual_filename = miscutils.parse_fullname(fullname,
                                               miscutils.CU_PARSE_FILENAME)
    if actual_filename != correct_filename:
        raise ValueError('Error: invalid filename (%s)' % actual_filename)

    instrume = prihdr['INSTRUME'].lower()

    req_num_hdus = -1
    if instrume == 'decam':
        req_num_hdus = 71
    else:
        raise ValueError('Error:  Unknown instrume (%s)' % instrume)

    # check # hdus
    num_hdus = len(hdulist)
    if num_hdus != req_num_hdus:
        raise ValueError('Error:  Invalid number of hdus (%s)' % num_hdus)

    # check keywords
    for hdunum in range(0, num_hdus):
        hdr = hdulist[hdunum].header
        (req, want, extra) = check_header_keywords(keywords, hdunum, hdr)

        if verbose > 1:
            if want is not None and want:
                print "HDU #%02d Missing requested keywords: %s" % (hdunum,
                                                                    want)
            if extra is not None and extra:
                print "HDU #%02d Extra keywords: %s" % (hdunum, extra)

        if req is not None and req:
            raise ValueError(
                'Error: HDU #%02d Missing required keywords (%s)' %
                (hdunum, req))

    return True
コード例 #35
0
    def get_file_archive_info_path(self, path, arname, compress_order=fmdefs.FM_PREFER_COMPRESSED):
        """ Get the archive info of a directory

            Parameters
            ----------
            path : str
                The path to probe

            arname : str
                Name of the archive to look in

            compress_order : list
                What order to look for the file in, compressed first or uncompressed first.
                Default is filemgmt_defs.FM_PREFER_COMPRESSED

            Returns
            -------
            dict
                The files and their info
        """
        # sanity checks
        if 'archive' not in self.config:
            miscutils.fwdie('Error: Missing archive section in config', 1)

        if arname not in self.config['archive']:
            miscutils.fwdie('Error: Invalid archive name (%s)' % arname, 1)

        if 'root' not in self.config['archive'][arname]:
            miscutils.fwdie('Error: Missing root in archive def (%s)' % self.config['archive'][arname], 1)

        if not isinstance(compress_order, list):
            miscutils.fwdie('Error:  Invalid compress_order.  It must be a list of compression extensions (including None)', 1)

        # walk archive to get all files
        fullnames = {}
        for p in compress_order:
            fullnames[p] = {}

        root = self.config['archive'][arname]['root']
        root = root.rstrip("/")  # canonicalize - remove trailing / to ensure

        list_by_name = {}
        for (dirpath, _, filenames) in os.walk(root + '/' + path):
            for fname in filenames:
                d = {}
                (d['filename'], d['compression']) = miscutils.parse_fullname(fname, 3)
                d['filesize'] = os.path.getsize("%s/%s" % (dirpath, fname))
                d['path'] = dirpath[len(root)+1:]
                if d['compression'] is None:
                    compext = ""
                else:
                    compext = d['compression']
                d['rel_filename'] = "%s/%s%s" % (d['path'], d['filename'], compext)
                fullnames[d['compression']][d['filename']] = d
                list_by_name[d['filename']] = True

        print "uncompressed:", len(fullnames[None])
        print "compressed:", len(fullnames['.fz'])

        # go through given list of filenames and find archive location and compreesion
        archiveinfo = {}
        for name in list_by_name.keys():
            #print name
            for p in compress_order:    # follow compression preference
                #print "p = ", p
                if name in fullnames[p]:
                    archiveinfo[name] = fullnames[p][name]
                    break

        print "archiveinfo = ", archiveinfo
        return archiveinfo