コード例 #1
0
def read_archfile(i, f, filetype, row, colnames, archfiles, db):
    """Read filename ``f`` with index ``i`` (position within list of filenames).  The
    file has type ``filetype`` and will be added to MSID file at row index ``row``.
    ``colnames`` is the list of column names for the content type (not used here).
    """
    # Check if filename is already in archfiles.  If so then abort further processing.
    filename = os.path.basename(f)
    if db.fetchall('SELECT filename FROM archfiles WHERE filename=?',
                   (filename, )):
        logger.verbose(
            'File %s already in archfiles - unlinking and skipping' % f)
        os.unlink(f)
        return None, None

    # Read FITS archive file and accumulate data into dats list and header into headers dict
    logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename))
    hdus = pyfits.open(f)
    hdu = hdus[1]

    try:
        dat = converters.convert(hdu.data, filetype['content'])

    except converters.NoValidDataError:
        # When creating files allow NoValidDataError
        hdus.close()
        logger.warning(
            'WARNING: no valid data in data file {}'.format(filename))
        return None, None

    except converters.DataShapeError as err:
        hdus.close()
        logger.warning(
            'WARNING: skipping file {} with bad data shape: ASCDSVER={} {}'.
            format(filename, hdu.header['ASCDSVER'], err))
        return None, None

    # Accumlate relevant info about archfile that will be ingested into
    # MSID h5 files.  Commit info before h5 ingest so if there is a failure
    # the needed info will be available to do the repair.
    archfiles_row = dict(
        (x, hdu.header.get(x.upper())) for x in archfiles_hdr_cols)
    archfiles_row['checksum'] = hdu.header.get('checksum') or hdu._checksum
    archfiles_row['rowstart'] = row
    archfiles_row['rowstop'] = row + len(dat)
    archfiles_row['filename'] = filename
    archfiles_row['filetime'] = int(
        re.search(r'(\d+)', archfiles_row['filename']).group(1))
    filedate = DateTime(archfiles_row['filetime']).date
    year, doy = (int(x)
                 for x in re.search(r'(\d\d\d\d):(\d\d\d)', filedate).groups())
    archfiles_row['year'] = year
    archfiles_row['doy'] = doy
    hdus.close()

    return dat, archfiles_row
コード例 #2
0
ファイル: update_archive.py プロジェクト: sot/eng_archive
def read_archfile(i, f, filetype, row, colnames, archfiles, db):
    """Read filename ``f`` with index ``i`` (position within list of filenames).  The
    file has type ``filetype`` and will be added to MSID file at row index ``row``.
    ``colnames`` is the list of column names for the content type (not used here).
    """
    # Check if filename is already in archfiles.  If so then abort further processing.
    filename = os.path.basename(f)
    if db.fetchall('SELECT filename FROM archfiles WHERE filename=?', (filename,)):
        logger.verbose('File %s already in archfiles - unlinking and skipping' % f)
        os.unlink(f)
        return None, None

    # Read FITS archive file and accumulate data into dats list and header into headers dict
    logger.info('Reading (%d / %d) %s' % (i, len(archfiles), filename))
    hdus = pyfits.open(f, character_as_bytes=True)
    hdu = hdus[1]

    try:
        dat = converters.convert(hdu.data, filetype['content'])

    except converters.NoValidDataError:
        # When creating files allow NoValidDataError
        hdus.close()
        logger.warning('WARNING: no valid data in data file {}'.format(filename))
        return None, None

    except converters.DataShapeError as err:
        hdus.close()
        logger.warning('WARNING: skipping file {} with bad data shape: ASCDSVER={} {}'
                       .format(filename, hdu.header['ASCDSVER'], err))
        return None, None

    # Accumlate relevant info about archfile that will be ingested into
    # MSID h5 files.  Commit info before h5 ingest so if there is a failure
    # the needed info will be available to do the repair.
    archfiles_row = dict((x, hdu.header.get(x.upper())) for x in archfiles_hdr_cols)
    archfiles_row['checksum'] = hdu.header.get('checksum') or hdu._checksum
    archfiles_row['rowstart'] = row
    archfiles_row['rowstop'] = row + len(dat)
    archfiles_row['filename'] = filename
    archfiles_row['filetime'] = int(re.search(r'(\d+)', archfiles_row['filename']).group(1))
    filedate = DateTime(archfiles_row['filetime']).date
    year, doy = (int(x) for x in re.search(r'(\d\d\d\d):(\d\d\d)', filedate).groups())
    archfiles_row['year'] = year
    archfiles_row['doy'] = doy
    hdus.close()

    return dat, archfiles_row
コード例 #3
0
    fitsdir = os.path.abspath(os.path.join(outroot, content))

    if os.path.exists(os.path.join('data', content)):
        print "Skipping", filetype
        continue
    print filetype

    # If files are already in the final cxc archive location:
    # fitsfiles = sorted(glob.glob('/data/cosmos2/eng_archive/data/acisdeahk/arch/????/???/*.fits.gz'))
    fitsfiles = sorted(glob.glob(os.path.join(fitsdir, filetype['fileglob'])))
    if not fitsfiles:
        print 'No files'
        continue

    dat = Ska.Table.read_fits_table(fitsfiles[-1])
    dat = converters.convert(dat, filetype['content'])
    dt = np.median(dat['TIME'][1:] - dat['TIME'][:-1])
    print 'dt=',dt
    n_rows = int(86400 * 365 * 12 / dt)
    colnames = set(dat.dtype.names)
    colnames_all = set(dat.dtype.names)
    for colname in colnames_all:
        if len(dat[colname].shape) > 1:
            print 'Removing column', colname
            colnames.remove(colname)
    for colname in colnames:
        make_h5_col_file(dat, content, colname, n_rows)

    headers = dict()
    max_size = 1e8
    dats_size = 0