def make_abscal_hdf(offsets_file, hdf_out, dataset='abscal'): if hdf_out is None: # Try to open offsets_file as hdf ... hdf_out = offsets_file offsets_file = None # Read offsets ... data = moby2.util.StructDB.from_column_file(offsets_file, [('obs:obs_id', 0), ('dx', 5), ('dy', 6), ('gamma', 6)]) data['gamma'] = 0. with h5py.File(hdf_out, 'a') as h: rs = metadata.ResultSet(data.dtype.names) rs.rows.extend(list(data)) io.metadata.write_dataset(rs, h, dataset) scheme = metadata.ManifestScheme()\ .add_range_match('obs:timestamp')\ .add_data_field('loader')\ .add_data_field('dataset') man = metadata.ManifestDb(scheme=scheme) man.add_entry( { 'obs:timestamp': (0, 2e9), 'dataset': dataset, 'loader': 'actpol_pointofs' }, hdf_out, commit=False) man.conn.commit() return man
def setUp(self): # Init. scheme = metadata.ManifestScheme() scheme.add_exact_match('array') scheme.add_range_match('time', dtype='float') scheme.add_data_field('also_data') self.scheme = scheme self.manifest = metadata.ManifestDb(scheme=scheme)
def _cuts_and_cal_helper(root_dir, loader, restrictions, db_in, re_suffix, source_prefix): """Scan a depot for cuts or cal results, and create or update a ManifestDb. Args: root_dir (str): Base directory to scan. Any file in this tree matching the pattern will be kept. loader (str): String to set 'loader' field to in the output db. restrictions (dict): Additional restrictions to set on the result. This will affect the scheme (additional exact match fields are added for each item). If you want anything here you probably want {'dets:band': 'f150'}. db_in (ManifestDb or None): If this is passed in, it will get updated by this scan. Any items already in the Db will not be updated. If this is passed in as None, a new Db is created. re_suffix (str): The suffix to look for when matching results. Probably '.cuts' or '.cal'. source_prefix (str): Having found files in root_dir, this prefix is prepended to the results before storing them in the Db. (This is used so that paths are relative to some other interesting thing, such as the ManifestDb.) Returns: The updated (or newly created) ManifestDb. """ scheme = metadata.ManifestScheme()\ .add_exact_match('obs:obs_id')\ .add_data_field('loader') # Additional restrictions... for k in restrictions: scheme.add_data_field(k) if db_in is None: db = metadata.ManifestDb(scheme=scheme) else: db = db_in product_re = re.compile('(%s)%s' % (TOD_ID_PAT, re_suffix)) entry = dict(restrictions) entry['loader'] = loader for root, dirs, files in os.walk(root_dir): for f in files: m = product_re.fullmatch(f) if m is None: continue entry['obs:obs_id'] = m.group(1) db.add_entry(entry, filename=os.path.join(source_prefix, root, f), replace=True) return db
def make_pointofs_hdf(offsets_file, hdf_out, dataset='pointofs', obs_list=None, hdf_relout=None): # Read offsets ... data = moby2.util.StructDB.from_column_file(offsets_file, [('obs:obs_id', 0), ('dx', 5), ('dy', 6), ('gamma', 6)]) data['gamma'] = 0. if obs_list is not None: # Restrict ... idx = data.select_inner({'obs:obs_id': obs_list}) assert (np.all(idx > 0)) data = data[idx] if hdf_relout is None: hdf_relout = hdf_out with h5py.File(hdf_out, 'a') as h: rs = metadata.ResultSet(data.dtype.names) rs.rows.extend(list(data)) io.metadata.write_dataset(rs, h, dataset) scheme = metadata.ManifestScheme()\ .add_range_match('obs:timestamp')\ .add_data_field('loader')\ .add_data_field('dataset') man = metadata.ManifestDb(scheme=scheme) man.add_entry( { 'obs:timestamp': (0, 2e9), 'dataset': dataset, 'loader': 'actpol_pointofs' }, hdf_relout, commit=False) man.conn.commit() return man
def main(args=None): if args is None: args = sys.argv[1:] parser = get_parser() args = parser.parse_args(args=args) if args.module == None: parser.error('Select a submodule.') elif args.module == 'obsdb': fn1 = _checkfile('obsdb.sqlite', args, parser=parser) cat = get_obs_catalog(args.catalog) if args.tod_list: tods = load_tod_list(args.tod_list) print( f'Restricting TOD list to {len(tods)} items from {args.tod_list}...' ) idx = cat.select_inner({'tod_id': tods}) n_bad = (idx < 0).sum() if n_bad: print( ' -- warning, did not match %i of %i tods from input list.' % (n_bad, len(idx))) idx = idx[idx >= 0] cat = cat[idx] socompat.make_obsdb(cat=cat).to_file(fn1) elif args.module == 'obsfiledb': fn1 = _checkfile('obsfiledb.sqlite', args) cat = get_obs_catalog(args.catalog) if args.tod_list: tods = load_tod_list(args.tod_list) print( f'Restricting TOD list to {len(tods)} items from {args.tod_list}...' ) idx = cat.select_inner({'tod_id': tods}) n_bad = (idx < 0).sum() if n_bad: print( ' -- warning, did not match %i of %i tods from input list.' % (n_bad, len(idx))) idx = idx[idx > 0] cat = cat[idx] socompat.make_obsfiledb(cat=cat).to_file(fn1) elif args.module == 'detdb': fn1 = _checkfile('detdb.sqlite', args) socompat.make_detdb().to_file(fn1) elif args.module == 'scan-hdf': fn1 = _checkfile(args.db_file, args, parser=parser, updatable=True) if args.tod_list: tod_list = load_tod_list(args.tod_list) else: tod_list = None if os.path.exists(fn1): db = metadata.ManifestDb.from_file(fn1) else: scheme = metadata.ManifestScheme()\ .add_data_field('dataset')\ .add_exact_match('obs:obs_id') db = metadata.ManifestDb(scheme=scheme) for source_file in args.sources: print(f'Scanning {source_file}...') with h5py.File(source_file, 'r') as h: n_added = 0 for k in h.keys(): if tod_list is None or k in tod_list: db.add_entry({ 'dataset': k, 'obs:obs_id': k }, source_file, replace=True) n_added += 1 print(f' ... found {n_added} entries to keep') db.to_file(fn1) elif args.module == 'pointofs': if args.tod_list: tods = load_tod_list(args.tod_list) else: tods = None # Clean this up ... what if hdf already exists, elsewhere, etc. fn1 = args.infile fn2 = os.path.join(args.output_dir, args.h5file) fn3 = _checkfile(args.db_file, args, parser=parser) proddb = socompat.make_pointofs_hdf(fn1, fn2, dataset=args.dataset, obs_list=tods, hdf_relout=args.h5file) proddb.to_file(fn3) elif args.module == 'abscal': fn1 = _checkfile(args.db_file, args, parser=parser) proddb = socompat.metadata.get_abscal_proddb(args.h5file, dataset=args.dataset) proddb.to_file(fn1) elif args.module == 'timeconst': fn1 = _checkfile(args.db_file, args, parser=parser) if args.tod_list: tod_list = load_tod_list(args.tod_list) else: tod_list = None # Scan some directories. if len(args.scan) == 0: parser.error('No directories specified (use --scan)') scheme = metadata.ManifestScheme()\ .add_exact_match('obs:obs_id')\ .add_data_field('loader')\ .add_data_field('pa_hint') db = metadata.ManifestDb(scheme=scheme) entry = {'loader': 'actpol_timeconst'} TOD_ID_PAT = '[0-9]{10}\.[0-9]{10}\.ar.' product_re = re.compile('(%s)\.tau' % (TOD_ID_PAT, )) for root_dir in args.scan: print(f'Working on {root_dir} ...') for root, dirs, files in os.walk(root_dir): if len(files): print(f' looking at {len(files)} in {root}') for f in files: m = product_re.fullmatch(f) if m is None: continue entry['obs:obs_id'] = m.group(1) entry['pa_hint'] = 'pa' + m.group(1)[-1] if tod_list is None or entry['obs:obs_id'] in tod_list: db.add_entry(entry, filename=os.path.join(root, f)) db.to_file(fn1) elif args.module == 'focalplane': fn1 = _checkfile(args.db_file, args, parser=parser) # For a bit of generality, request a map from array and # time_range to offset and polarization files. spec = yaml.safe_load(open(args.spec_file, 'rb')) # Prepare the output database... scheme = metadata.ManifestScheme()\ .add_data_field('dataset')\ .add_range_match('obs:timestamp')\ .add_exact_match('obs:pa') db = metadata.ManifestDb(scheme=scheme) # Write results to hdf5 hdf_out = os.path.join(args.output_dir, args.h5file) with h5py.File(hdf_out, 'a') as h: for row in spec['table']: pa, t0, t1, pos_file, pol_file = row dset = f'{pa}_{t0}_{t1}' aman = socompat.metadata.load_detoffsets_file( os.path.join(spec['prefix'], pos_file), os.path.join(spec['prefix'], pol_file), pa=pa) # Convert to ResultSet and write out. rs = metadata.ResultSet( keys=['dets:readout_id', 'xi', 'eta', 'gamma']) for i, d in enumerate(aman.dets.vals): rs.rows.append( [d, aman['xi'][i], aman['eta'][i], aman['gamma'][i]]) io.metadata.write_dataset(rs, h, dset, overwrite=args.force) db.add_entry( { 'dataset': dset, 'obs:pa': pa, 'obs:timestamp': (t0, t1) }, args.h5file) db.to_file(fn1) elif args.module == 'cuts_release': socompat.process_cuts_release(args.release_file, output_dir=args.output_dir) elif args.module == 'cuts_dir': src_dir, src_prefix = relativify_paths(args.src_dir, args.output_dir) if src_prefix != '': print(f'output_dir and src_dir are both relative, so target files ' f'will be prefixed with {src_prefix}') fn1 = _checkfile(args.db_file, args, parser=parser, updatable=True) db = None if os.path.exists(fn1): db = metadata.ManifestDb.from_file(fn1) subset = dict(args.subset) db = socompat.make_cuts_db(src_dir, db_in=db, source_prefix=src_prefix, restrictions=subset) db.to_file(fn1) elif args.module == 'cal_dir': src_dir, src_prefix = relativify_paths(args.src_dir, args.output_dir) if src_prefix != '': print(f'output_dir and src_dir are both relative, so target files ' f'will be prefixed with {src_prefix}') fn1 = _checkfile(args.db_file, args, parser=parser, updatable=True) db = None if os.path.exists(fn1): db = metadata.ManifestDb.from_file(fn1) subset = dict(args.subset) db = socompat.make_cal_db(src_dir, db_in=db, source_prefix=src_prefix, restrictions=subset) db.to_file(fn1) elif args.module == 'context': fn1 = _checkfile('context.yaml', args) socompat.write_context(fn1) else: parser.error(f'Module "{args.module}" not implemented.')
def process_cuts_release(release_filename, temp_dir='temp/', output_dir='./', output_pattern='metadata_{category}.sqlite'): """ Process a release file from cutslib. """ if isinstance(release_filename, dict): cutsc = release_filename else: cutsc = yaml.safe_load(open(release_filename).read()) if not os.path.exists(temp_dir): os.makedirs(temp_dir) db_files = { k: os.path.join(output_dir, output_pattern).format(category=k) for k in ['cal', 'cuts', 'pcuts'] } cuts_map = {} for k in cutsc['tags'].keys(): #pa4_f150_s17_c11 pa, fcode, scode, x = k.split('_', 3) key = f'{pa}_{scode}' cuts_map[key] = cuts_map.get(key, []) cuts_map[key].append((fcode, k)) # Make temporary dbs -- this efficiently walks each tag tree. print('Making temporary dbs for each item...') for key, cuts in cuts_map.items(): for fcode, k in cuts: print(f' {fcode} {k}') for t in ['tag_out', 'tag_planet']: print(f' {t}') temp_db_file = 'temp/_%s_%s.sqlite' % (k, t) if os.path.exists(temp_db_file): print(f' skipping because {temp_db_file} exists') continue _tag = cutsc['tags'][k][t] base = '{depot}/TODCuts/{tag}/'.format(depot=cutsc['depot'], tag=_tag) db = socompat.make_cuts_db(base) db.to_file(temp_db_file) for t in ['tag_cal']: print(f' {t}') temp_db_file = 'temp/_%s_%s.sqlite' % (k, t) if os.path.exists(temp_db_file): print(f' skipping because {temp_db_file} exists') continue _tag = cutsc['tags'][k][t] base = '{depot}/Calibration/{tag}/'.format( depot=cutsc['depot'], tag=_tag) db = socompat.make_cal_db(base) db.to_file(temp_db_file) # Join them together. scheme = metadata.ManifestScheme() \ .add_exact_match('obs:obs_id') \ .add_data_field('dets:band') \ .add_data_field('loader') scutsdb = metadata.ManifestDb(scheme=scheme) pcutsdb = metadata.ManifestDb(scheme=scheme) caldb = metadata.ManifestDb(scheme=scheme) print() print('Joining temp dbs together') for key, cuts in cuts_map.items(): for fcode, k in cuts: print(f' {fcode} {k}') for t, db, loader in [('tag_out', scutsdb, 'actpol_cuts'), ('tag_planet', pcutsdb, 'actpol_cuts'), ('tag_cal', caldb, 'actpol_cal')]: db_in = metadata.ManifestDb('temp/_%s_%s.sqlite' % (k, t)) c_in = db_in.conn.execute( 'select `obs:obs_id`,name from map join files on map.file_id=files.id' ) c = db.conn.cursor() for row in tqdm(c_in): obs_id, filename = row _ = c.execute('insert into files (name) values (?)', (filename, )) fid = c.lastrowid _ = c.execute( 'insert into map (`obs:obs_id`, `dets:band`, `loader`, `file_id`) ' 'values (?,?,?,?)', (obs_id, fcode, loader, fid)) # Cursors are expensive. # db.add_entry({'obs:obs_id': obs_id, # 'dets:band': fcode, # 'loader': loader}, # filename=filename, commit=False) scutsdb.to_file(db_files['cuts']) pcutsdb.to_file(db_files['pcuts']) caldb.to_file(db_files['cal']) return db_files
def test_010_dbs(self): """Test metadata detdb/obsdb resolution system This tests one of the more complicated cases: - The ManifestDb includes restrictions on dets:band, so f090 is to be loaded from one dataset and f150 is to be loaded from another. - The two datasets both provide values for f090 and f150, so the code has to know to ignore the ones that weren't asked for. """ hdf_fn = os.path.join(self.tempdir.name, '_test_010_dbs.h5') mandb_fn = os.path.join(self.tempdir.name, '_test_010_dbs.sqlite') # Add two datasets to the HDF file. They are called # "timeconst_early" and "timeconst_late" but there is no # specific time range associated with each. Each dataset # contains a value for bands f090 and f150. The "early" set # has TBAD for f150 and the "late" set has TBAD for f090. T090, T150, TBAD = 90e-3, 150e-3, 1e0 with h5py.File(hdf_fn, 'a') as fout: # First test. for label, tau1, tau2 in [('early', T090, TBAD), ('late', TBAD, T150)]: rs = metadata.ResultSet(keys=['dets:band', 'timeconst']) rs.append({'dets:band': 'f090', 'timeconst': tau1}) rs.append({'dets:band': 'f150', 'timeconst': tau2}) write_dataset(rs, fout, 'timeconst_%s' % label, overwrite=True) # To match the early/late example we need DetDb and ObsDb. detdb = metadata.DetDb() detdb.create_table('base', ["`band` str", "`polcode` str"]) detdb.add_props('base', 'det1', band='f090', polcode='A') detdb.add_props('base', 'det2', band='f090', polcode='B') detdb.add_props('base', 'det3', band='f150', polcode='A') detdb.add_props('base', 'det4', band='f150', polcode='B') obsdb = metadata.ObsDb() t_pivot = 2000010000 obsdb.add_obs_columns(['timestamp float']) obsdb.update_obs('obs_00', {'timestamp': t_pivot - 10000}) obsdb.update_obs('obs_01', {'timestamp': t_pivot + 10000}) # Test 1 -- ManifestDb and Stored datasets both have "band" rules. scheme = metadata.ManifestScheme() \ .add_range_match('obs:timestamp') \ .add_data_field('dets:band') \ .add_data_field('dataset') mandb = metadata.ManifestDb(scheme=scheme) for band, this_pivot in [('f090', t_pivot + 1e6), ('f150', t_pivot - 1e6)]: mandb.add_entry( { 'dataset': 'timeconst_early', 'dets:band': band, 'obs:timestamp': (0, this_pivot) }, filename=hdf_fn) mandb.add_entry( { 'dataset': 'timeconst_late', 'dets:band': band, 'obs:timestamp': (this_pivot, 4e9) }, filename=hdf_fn) mandb.to_file(mandb_fn) # The SuperLoader is where the logic lives to combine multiple # results and pull out the right information in the right # order. It should leave us with no TBAD values. loader = metadata.SuperLoader(obsdb=obsdb, detdb=detdb) spec_list = [{'db': mandb_fn, 'name': 'tau&timeconst'}] mtod = loader.load(spec_list, {'obs:obs_id': 'obs_00'}) self.assertCountEqual(mtod['tau'], [T090, T090, T150, T150]) # Test 2: ManifestDb specifies polcode, which crosses with # dataset band. scheme = metadata.ManifestScheme() \ .add_range_match('obs:timestamp') \ .add_data_field('dets:polcode') \ .add_data_field('dataset') mandb = metadata.ManifestDb(scheme=scheme) for polcode, this_pivot in [('A', t_pivot + 1e6), ('B', t_pivot - 1e6)]: mandb.add_entry( { 'dataset': 'timeconst_early', 'dets:polcode': polcode, 'obs:timestamp': (0, this_pivot) }, filename=hdf_fn) mandb.add_entry( { 'dataset': 'timeconst_late', 'dets:polcode': polcode, 'obs:timestamp': (this_pivot, 4e9) }, filename=hdf_fn) mandb.to_file(mandb_fn) # Now we expect only f090 A and f150 B to resolve to non-bad vals. # Make sure you reinit the loader, to avoid cached dbs. loader = metadata.SuperLoader(obsdb=obsdb, detdb=detdb) mtod = loader.load(spec_list, {'obs:obs_id': 'obs_00'}) self.assertCountEqual(mtod['tau'], [T090, TBAD, TBAD, T150])