Ejemplo n.º 1
0
def make_abscal_hdf(offsets_file, hdf_out, dataset='abscal'):
    if hdf_out is None:
        # Try to open offsets_file as hdf ...
        hdf_out = offsets_file
        offsets_file = None
    # Read offsets ...
    data = moby2.util.StructDB.from_column_file(offsets_file,
                                                [('obs:obs_id', 0), ('dx', 5),
                                                 ('dy', 6), ('gamma', 6)])
    data['gamma'] = 0.

    with h5py.File(hdf_out, 'a') as h:
        rs = metadata.ResultSet(data.dtype.names)
        rs.rows.extend(list(data))
        io.metadata.write_dataset(rs, h, dataset)

    scheme = metadata.ManifestScheme()\
             .add_range_match('obs:timestamp')\
             .add_data_field('loader')\
             .add_data_field('dataset')
    man = metadata.ManifestDb(scheme=scheme)
    man.add_entry(
        {
            'obs:timestamp': (0, 2e9),
            'dataset': dataset,
            'loader': 'actpol_pointofs'
        },
        hdf_out,
        commit=False)
    man.conn.commit()
    return man
Ejemplo n.º 2
0
 def setUp(self):
     # Init.
     scheme = metadata.ManifestScheme()
     scheme.add_exact_match('array')
     scheme.add_range_match('time', dtype='float')
     scheme.add_data_field('also_data')
     self.scheme = scheme
     self.manifest = metadata.ManifestDb(scheme=scheme)
Ejemplo n.º 3
0
def _cuts_and_cal_helper(root_dir, loader, restrictions, db_in, re_suffix,
                         source_prefix):
    """Scan a depot for cuts or cal results, and create or update a
    ManifestDb.

    Args:
      root_dir (str): Base directory to scan.  Any file in this tree
        matching the pattern will be kept.
      loader (str): String to set 'loader' field to in the output db.
      restrictions (dict): Additional restrictions to set on the
        result.  This will affect the scheme (additional exact match
        fields are added for each item).  If you want anything here
        you probably want {'dets:band': 'f150'}.
      db_in (ManifestDb or None): If this is passed in, it will get
        updated by this scan.  Any items already in the Db will not be
        updated.  If this is passed in as None, a new Db is created.
      re_suffix (str): The suffix to look for when matching results.
        Probably '.cuts' or '.cal'.
      source_prefix (str): Having found files in root_dir, this prefix
        is prepended to the results before storing them in the Db.
        (This is used so that paths are relative to some other
        interesting thing, such as the ManifestDb.)

    Returns:
      The updated (or newly created) ManifestDb.

    """
    scheme = metadata.ManifestScheme()\
             .add_exact_match('obs:obs_id')\
             .add_data_field('loader')
    # Additional restrictions...
    for k in restrictions:
        scheme.add_data_field(k)
    if db_in is None:
        db = metadata.ManifestDb(scheme=scheme)
    else:
        db = db_in
    product_re = re.compile('(%s)%s' % (TOD_ID_PAT, re_suffix))
    entry = dict(restrictions)
    entry['loader'] = loader
    for root, dirs, files in os.walk(root_dir):
        for f in files:
            m = product_re.fullmatch(f)
            if m is None:
                continue
            entry['obs:obs_id'] = m.group(1)
            db.add_entry(entry,
                         filename=os.path.join(source_prefix, root, f),
                         replace=True)
    return db
Ejemplo n.º 4
0
def make_pointofs_hdf(offsets_file,
                      hdf_out,
                      dataset='pointofs',
                      obs_list=None,
                      hdf_relout=None):
    # Read offsets ...
    data = moby2.util.StructDB.from_column_file(offsets_file,
                                                [('obs:obs_id', 0), ('dx', 5),
                                                 ('dy', 6), ('gamma', 6)])
    data['gamma'] = 0.

    if obs_list is not None:
        # Restrict ...
        idx = data.select_inner({'obs:obs_id': obs_list})
        assert (np.all(idx > 0))
        data = data[idx]

    if hdf_relout is None:
        hdf_relout = hdf_out
    with h5py.File(hdf_out, 'a') as h:
        rs = metadata.ResultSet(data.dtype.names)
        rs.rows.extend(list(data))
        io.metadata.write_dataset(rs, h, dataset)

    scheme = metadata.ManifestScheme()\
             .add_range_match('obs:timestamp')\
             .add_data_field('loader')\
             .add_data_field('dataset')
    man = metadata.ManifestDb(scheme=scheme)
    man.add_entry(
        {
            'obs:timestamp': (0, 2e9),
            'dataset': dataset,
            'loader': 'actpol_pointofs'
        },
        hdf_relout,
        commit=False)
    man.conn.commit()
    return man
Ejemplo n.º 5
0
def main(args=None):
    if args is None:
        args = sys.argv[1:]
    parser = get_parser()
    args = parser.parse_args(args=args)

    if args.module == None:
        parser.error('Select a submodule.')

    elif args.module == 'obsdb':
        fn1 = _checkfile('obsdb.sqlite', args, parser=parser)
        cat = get_obs_catalog(args.catalog)
        if args.tod_list:
            tods = load_tod_list(args.tod_list)
            print(
                f'Restricting TOD list to {len(tods)} items from {args.tod_list}...'
            )
            idx = cat.select_inner({'tod_id': tods})
            n_bad = (idx < 0).sum()
            if n_bad:
                print(
                    ' -- warning, did not match %i of %i tods from input list.'
                    % (n_bad, len(idx)))
                idx = idx[idx >= 0]
            cat = cat[idx]
        socompat.make_obsdb(cat=cat).to_file(fn1)

    elif args.module == 'obsfiledb':
        fn1 = _checkfile('obsfiledb.sqlite', args)
        cat = get_obs_catalog(args.catalog)
        if args.tod_list:
            tods = load_tod_list(args.tod_list)
            print(
                f'Restricting TOD list to {len(tods)} items from {args.tod_list}...'
            )
            idx = cat.select_inner({'tod_id': tods})
            n_bad = (idx < 0).sum()
            if n_bad:
                print(
                    ' -- warning, did not match %i of %i tods from input list.'
                    % (n_bad, len(idx)))
                idx = idx[idx > 0]
            cat = cat[idx]
        socompat.make_obsfiledb(cat=cat).to_file(fn1)

    elif args.module == 'detdb':
        fn1 = _checkfile('detdb.sqlite', args)
        socompat.make_detdb().to_file(fn1)

    elif args.module == 'scan-hdf':
        fn1 = _checkfile(args.db_file, args, parser=parser, updatable=True)

        if args.tod_list:
            tod_list = load_tod_list(args.tod_list)
        else:
            tod_list = None

        if os.path.exists(fn1):
            db = metadata.ManifestDb.from_file(fn1)
        else:
            scheme = metadata.ManifestScheme()\
                             .add_data_field('dataset')\
                             .add_exact_match('obs:obs_id')
            db = metadata.ManifestDb(scheme=scheme)

        for source_file in args.sources:
            print(f'Scanning {source_file}...')
            with h5py.File(source_file, 'r') as h:
                n_added = 0
                for k in h.keys():
                    if tod_list is None or k in tod_list:
                        db.add_entry({
                            'dataset': k,
                            'obs:obs_id': k
                        },
                                     source_file,
                                     replace=True)
                        n_added += 1
                print(f' ... found {n_added} entries to keep')
        db.to_file(fn1)

    elif args.module == 'pointofs':
        if args.tod_list:
            tods = load_tod_list(args.tod_list)
        else:
            tods = None
        # Clean this up ... what if hdf already exists, elsewhere, etc.
        fn1 = args.infile
        fn2 = os.path.join(args.output_dir, args.h5file)
        fn3 = _checkfile(args.db_file, args, parser=parser)

        proddb = socompat.make_pointofs_hdf(fn1,
                                            fn2,
                                            dataset=args.dataset,
                                            obs_list=tods,
                                            hdf_relout=args.h5file)
        proddb.to_file(fn3)

    elif args.module == 'abscal':
        fn1 = _checkfile(args.db_file, args, parser=parser)
        proddb = socompat.metadata.get_abscal_proddb(args.h5file,
                                                     dataset=args.dataset)
        proddb.to_file(fn1)

    elif args.module == 'timeconst':
        fn1 = _checkfile(args.db_file, args, parser=parser)
        if args.tod_list:
            tod_list = load_tod_list(args.tod_list)
        else:
            tod_list = None
        # Scan some directories.
        if len(args.scan) == 0:
            parser.error('No directories specified (use --scan)')

        scheme = metadata.ManifestScheme()\
                 .add_exact_match('obs:obs_id')\
                 .add_data_field('loader')\
                 .add_data_field('pa_hint')
        db = metadata.ManifestDb(scheme=scheme)
        entry = {'loader': 'actpol_timeconst'}
        TOD_ID_PAT = '[0-9]{10}\.[0-9]{10}\.ar.'
        product_re = re.compile('(%s)\.tau' % (TOD_ID_PAT, ))
        for root_dir in args.scan:
            print(f'Working on {root_dir} ...')
            for root, dirs, files in os.walk(root_dir):
                if len(files):
                    print(f'  looking at {len(files)} in {root}')
                for f in files:
                    m = product_re.fullmatch(f)
                    if m is None:
                        continue
                    entry['obs:obs_id'] = m.group(1)
                    entry['pa_hint'] = 'pa' + m.group(1)[-1]
                    if tod_list is None or entry['obs:obs_id'] in tod_list:
                        db.add_entry(entry, filename=os.path.join(root, f))
        db.to_file(fn1)

    elif args.module == 'focalplane':
        fn1 = _checkfile(args.db_file, args, parser=parser)
        # For a bit of generality, request a map from array and
        # time_range to offset and polarization files.
        spec = yaml.safe_load(open(args.spec_file, 'rb'))
        # Prepare the output database...
        scheme = metadata.ManifestScheme()\
                         .add_data_field('dataset')\
                         .add_range_match('obs:timestamp')\
                         .add_exact_match('obs:pa')
        db = metadata.ManifestDb(scheme=scheme)
        # Write results to hdf5
        hdf_out = os.path.join(args.output_dir, args.h5file)
        with h5py.File(hdf_out, 'a') as h:
            for row in spec['table']:
                pa, t0, t1, pos_file, pol_file = row
                dset = f'{pa}_{t0}_{t1}'
                aman = socompat.metadata.load_detoffsets_file(
                    os.path.join(spec['prefix'], pos_file),
                    os.path.join(spec['prefix'], pol_file),
                    pa=pa)
                # Convert to ResultSet and write out.
                rs = metadata.ResultSet(
                    keys=['dets:readout_id', 'xi', 'eta', 'gamma'])
                for i, d in enumerate(aman.dets.vals):
                    rs.rows.append(
                        [d, aman['xi'][i], aman['eta'][i], aman['gamma'][i]])
                io.metadata.write_dataset(rs, h, dset, overwrite=args.force)
                db.add_entry(
                    {
                        'dataset': dset,
                        'obs:pa': pa,
                        'obs:timestamp': (t0, t1)
                    }, args.h5file)
        db.to_file(fn1)

    elif args.module == 'cuts_release':
        socompat.process_cuts_release(args.release_file,
                                      output_dir=args.output_dir)

    elif args.module == 'cuts_dir':
        src_dir, src_prefix = relativify_paths(args.src_dir, args.output_dir)
        if src_prefix != '':
            print(f'output_dir and src_dir are both relative, so target files '
                  f'will be prefixed with {src_prefix}')

        fn1 = _checkfile(args.db_file, args, parser=parser, updatable=True)
        db = None
        if os.path.exists(fn1):
            db = metadata.ManifestDb.from_file(fn1)
        subset = dict(args.subset)
        db = socompat.make_cuts_db(src_dir,
                                   db_in=db,
                                   source_prefix=src_prefix,
                                   restrictions=subset)
        db.to_file(fn1)

    elif args.module == 'cal_dir':
        src_dir, src_prefix = relativify_paths(args.src_dir, args.output_dir)
        if src_prefix != '':
            print(f'output_dir and src_dir are both relative, so target files '
                  f'will be prefixed with {src_prefix}')
        fn1 = _checkfile(args.db_file, args, parser=parser, updatable=True)
        db = None
        if os.path.exists(fn1):
            db = metadata.ManifestDb.from_file(fn1)
        subset = dict(args.subset)
        db = socompat.make_cal_db(src_dir,
                                  db_in=db,
                                  source_prefix=src_prefix,
                                  restrictions=subset)
        db.to_file(fn1)

    elif args.module == 'context':
        fn1 = _checkfile('context.yaml', args)
        socompat.write_context(fn1)

    else:
        parser.error(f'Module "{args.module}" not implemented.')
Ejemplo n.º 6
0
def process_cuts_release(release_filename,
                         temp_dir='temp/',
                         output_dir='./',
                         output_pattern='metadata_{category}.sqlite'):
    """
    Process a release file from cutslib.
    """
    if isinstance(release_filename, dict):
        cutsc = release_filename
    else:
        cutsc = yaml.safe_load(open(release_filename).read())

    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)

    db_files = {
        k: os.path.join(output_dir, output_pattern).format(category=k)
        for k in ['cal', 'cuts', 'pcuts']
    }

    cuts_map = {}
    for k in cutsc['tags'].keys():
        #pa4_f150_s17_c11
        pa, fcode, scode, x = k.split('_', 3)
        key = f'{pa}_{scode}'
        cuts_map[key] = cuts_map.get(key, [])
        cuts_map[key].append((fcode, k))

    # Make temporary dbs -- this efficiently walks each tag tree.
    print('Making temporary dbs for each item...')
    for key, cuts in cuts_map.items():
        for fcode, k in cuts:
            print(f'  {fcode} {k}')
            for t in ['tag_out', 'tag_planet']:
                print(f'    {t}')
                temp_db_file = 'temp/_%s_%s.sqlite' % (k, t)
                if os.path.exists(temp_db_file):
                    print(f'        skipping because {temp_db_file} exists')
                    continue
                _tag = cutsc['tags'][k][t]
                base = '{depot}/TODCuts/{tag}/'.format(depot=cutsc['depot'],
                                                       tag=_tag)
                db = socompat.make_cuts_db(base)
                db.to_file(temp_db_file)
            for t in ['tag_cal']:
                print(f'    {t}')
                temp_db_file = 'temp/_%s_%s.sqlite' % (k, t)
                if os.path.exists(temp_db_file):
                    print(f'        skipping because {temp_db_file} exists')
                    continue
                _tag = cutsc['tags'][k][t]
                base = '{depot}/Calibration/{tag}/'.format(
                    depot=cutsc['depot'], tag=_tag)
                db = socompat.make_cal_db(base)
                db.to_file(temp_db_file)

    # Join them together.
    scheme = metadata.ManifestScheme() \
                     .add_exact_match('obs:obs_id') \
                     .add_data_field('dets:band') \
                     .add_data_field('loader')
    scutsdb = metadata.ManifestDb(scheme=scheme)
    pcutsdb = metadata.ManifestDb(scheme=scheme)
    caldb = metadata.ManifestDb(scheme=scheme)

    print()
    print('Joining temp dbs together')
    for key, cuts in cuts_map.items():
        for fcode, k in cuts:
            print(f'  {fcode} {k}')
            for t, db, loader in [('tag_out', scutsdb, 'actpol_cuts'),
                                  ('tag_planet', pcutsdb, 'actpol_cuts'),
                                  ('tag_cal', caldb, 'actpol_cal')]:
                db_in = metadata.ManifestDb('temp/_%s_%s.sqlite' % (k, t))
                c_in = db_in.conn.execute(
                    'select `obs:obs_id`,name from map join files on map.file_id=files.id'
                )
                c = db.conn.cursor()
                for row in tqdm(c_in):
                    obs_id, filename = row
                    _ = c.execute('insert into files (name) values (?)',
                                  (filename, ))
                    fid = c.lastrowid
                    _ = c.execute(
                        'insert into map (`obs:obs_id`, `dets:band`, `loader`, `file_id`) '
                        'values (?,?,?,?)', (obs_id, fcode, loader, fid))
                    # Cursors are expensive.
    #                db.add_entry({'obs:obs_id': obs_id,
    #                              'dets:band': fcode,
    #                              'loader': loader},
    #                             filename=filename, commit=False)

    scutsdb.to_file(db_files['cuts'])
    pcutsdb.to_file(db_files['pcuts'])
    caldb.to_file(db_files['cal'])
    return db_files
Ejemplo n.º 7
0
    def test_010_dbs(self):
        """Test metadata detdb/obsdb resolution system

        This tests one of the more complicated cases:

        - The ManifestDb includes restrictions on dets:band, so f090
          is to be loaded from one dataset and f150 is to be loaded
          from another.

        - The two datasets both provide values for f090 and f150, so
          the code has to know to ignore the ones that weren't asked
          for.

        """
        hdf_fn = os.path.join(self.tempdir.name, '_test_010_dbs.h5')
        mandb_fn = os.path.join(self.tempdir.name, '_test_010_dbs.sqlite')

        # Add two datasets to the HDF file.  They are called
        # "timeconst_early" and "timeconst_late" but there is no
        # specific time range associated with each.  Each dataset
        # contains a value for bands f090 and f150.  The "early" set
        # has TBAD for f150 and the "late" set has TBAD for f090.
        T090, T150, TBAD = 90e-3, 150e-3, 1e0
        with h5py.File(hdf_fn, 'a') as fout:
            # First test.
            for label, tau1, tau2 in [('early', T090, TBAD),
                                      ('late', TBAD, T150)]:
                rs = metadata.ResultSet(keys=['dets:band', 'timeconst'])
                rs.append({'dets:band': 'f090', 'timeconst': tau1})
                rs.append({'dets:band': 'f150', 'timeconst': tau2})
                write_dataset(rs, fout, 'timeconst_%s' % label, overwrite=True)

        # To match the early/late example we need DetDb and ObsDb.
        detdb = metadata.DetDb()
        detdb.create_table('base', ["`band` str", "`polcode` str"])
        detdb.add_props('base', 'det1', band='f090', polcode='A')
        detdb.add_props('base', 'det2', band='f090', polcode='B')
        detdb.add_props('base', 'det3', band='f150', polcode='A')
        detdb.add_props('base', 'det4', band='f150', polcode='B')

        obsdb = metadata.ObsDb()
        t_pivot = 2000010000
        obsdb.add_obs_columns(['timestamp float'])
        obsdb.update_obs('obs_00', {'timestamp': t_pivot - 10000})
        obsdb.update_obs('obs_01', {'timestamp': t_pivot + 10000})

        # Test 1 -- ManifestDb and Stored datasets both have "band" rules.
        scheme = metadata.ManifestScheme() \
                         .add_range_match('obs:timestamp') \
                         .add_data_field('dets:band') \
                         .add_data_field('dataset')
        mandb = metadata.ManifestDb(scheme=scheme)
        for band, this_pivot in [('f090', t_pivot + 1e6),
                                 ('f150', t_pivot - 1e6)]:
            mandb.add_entry(
                {
                    'dataset': 'timeconst_early',
                    'dets:band': band,
                    'obs:timestamp': (0, this_pivot)
                },
                filename=hdf_fn)
            mandb.add_entry(
                {
                    'dataset': 'timeconst_late',
                    'dets:band': band,
                    'obs:timestamp': (this_pivot, 4e9)
                },
                filename=hdf_fn)
        mandb.to_file(mandb_fn)

        # The SuperLoader is where the logic lives to combine multiple
        # results and pull out the right information in the right
        # order.  It should leave us with no TBAD values.
        loader = metadata.SuperLoader(obsdb=obsdb, detdb=detdb)
        spec_list = [{'db': mandb_fn, 'name': 'tau&timeconst'}]
        mtod = loader.load(spec_list, {'obs:obs_id': 'obs_00'})
        self.assertCountEqual(mtod['tau'], [T090, T090, T150, T150])

        # Test 2: ManifestDb specifies polcode, which crosses with
        # dataset band.
        scheme = metadata.ManifestScheme() \
                         .add_range_match('obs:timestamp') \
                         .add_data_field('dets:polcode') \
                         .add_data_field('dataset')
        mandb = metadata.ManifestDb(scheme=scheme)
        for polcode, this_pivot in [('A', t_pivot + 1e6),
                                    ('B', t_pivot - 1e6)]:
            mandb.add_entry(
                {
                    'dataset': 'timeconst_early',
                    'dets:polcode': polcode,
                    'obs:timestamp': (0, this_pivot)
                },
                filename=hdf_fn)
            mandb.add_entry(
                {
                    'dataset': 'timeconst_late',
                    'dets:polcode': polcode,
                    'obs:timestamp': (this_pivot, 4e9)
                },
                filename=hdf_fn)
        mandb.to_file(mandb_fn)

        # Now we expect only f090 A and f150 B to resolve to non-bad vals.
        # Make sure you reinit the loader, to avoid cached dbs.
        loader = metadata.SuperLoader(obsdb=obsdb, detdb=detdb)
        mtod = loader.load(spec_list, {'obs:obs_id': 'obs_00'})
        self.assertCountEqual(mtod['tau'], [T090, TBAD, TBAD, T150])