def test_asdf_open(tmpdir, dtype): """Test the top-level open method of the asdf module""" tmpfile = os.path.join(str(tmpdir), 'test.fits') # Write the AsdfInFits object out as a FITS file with ASDF extension asdf_in_fits = create_asdf_in_fits(dtype) asdf_in_fits.write_to(tmpfile) # Test opening the file directly from the URI with asdf_open(tmpfile) as ff: compare_asdfs(asdf_in_fits, ff) # Test open/close without context handler ff = asdf_open(tmpfile) compare_asdfs(asdf_in_fits, ff) ff.close() # Test reading in the file from an already-opened file handle with open(tmpfile, 'rb') as handle: with asdf_open(handle) as ff: compare_asdfs(asdf_in_fits, ff) # Test opening the file as a FITS file first and passing the HDUList with fits.open(tmpfile) as hdulist: with asdf_open(hdulist) as ff: compare_asdfs(asdf_in_fits, ff)
def test_asdf_open(tmpdir): """Test the top-level open method of the asdf module""" tmpfile = os.path.join(str(tmpdir), 'test.fits') # Write the AsdfInFits object out as a FITS file with ASDF extension asdf_in_fits = create_asdf_in_fits() asdf_in_fits.write_to(tmpfile) # Test opening the file directly from the URI with asdf_open(tmpfile) as ff: compare_asdfs(asdf_in_fits, ff) # Test open/close without context handler ff = asdf_open(tmpfile) compare_asdfs(asdf_in_fits, ff) ff.close() # Test reading in the file from an already-opened file handle with open(tmpfile, 'rb') as handle: with asdf_open(handle) as ff: compare_asdfs(asdf_in_fits, ff) # Test opening the file as a FITS file first and passing the HDUList with fits.open(tmpfile) as hdulist: with asdf_open(hdulist) as ff: compare_asdfs(asdf_in_fits, ff)
def test_bad_input(tmpdir): """Make sure these functions behave properly with bad input""" text_file = os.path.join(str(tmpdir), 'test.txt') with open(text_file, 'w') as fh: fh.write('I <3 ASDF!!!!!') with pytest.raises(ValueError): asdf_open(text_file)
def tensorize(self) -> None: try: # Cannot use multiprocessing if parser is an attribute of self. # Hence this hack with try, finally and a backup/restore of self.parser. parser = self.parser del self.parser finished_marker = self.tensor_dir / "finished" if not finished_marker.is_file(): self._logger.info(f"Pre-tensorizing {self.canonical_name}") for file_path in self.parse_dir.rglob("*.asdf"): with asdf_open(str(file_path)) as af: nodes_instance = Nodes.from_tree(af.tree["nodes"]) self.instance.index(nodes_instance) self._logger.info(f"Pre-tensorized {self.canonical_name}") self._logger.info(f"Tensorizing {self.canonical_name}") with Pool(self.n_workers) as pool: pool.map( self._tensorize_worker, [ p.relative_to(self.parse_dir) for p in self.parse_dir.rglob("*.asdf") ], ) self._logger.info(f"Tensorized {self.canonical_name}") self.pickles = list( sorted((self.tensor_dir / "pickle").rglob("*.pickle.bz2"), key=str)) finished_marker.touch() finally: self.parser = parser
def _load_config_file_filesystem(config_file): if not os.path.isfile(config_file): raise ValueError("Config file {0} not found.".format(config_file)) try: with asdf_open(config_file) as asdf_file: return _config_obj_from_asdf(asdf_file) except (AsdfValidationError, ValueError): logger.debug( 'Config file did not parse as ASDF. Trying as ConfigObj: %s', config_file) return ConfigObj(config_file, raise_errors=True)
def _tensorize_worker(self, file_path: Path) -> None: self._logger.debug(f"Tensorizing {file_path}") with asdf_open(str(self.parse_dir / file_path)) as af: nodes_instance = Nodes.from_tree(af.tree["nodes"]) tensors = self.instance.tensorize(nodes_instance) output_dir = (self.tensor_dir / "pickle" / file_path).parent output_dir.mkdir(parents=True, exist_ok=True) with bz2_open((output_dir / file_path.name).with_suffix(".pickle.bz2"), "wb") as fh: dump(tensors, fh, protocol=self.pickle_protocol) self._logger.debug(f"Tensorized {file_path}")
def _compare_trees(name_without_ext, expect_warnings=False): asdf_path = name_without_ext + ".asdf" yaml_path = name_without_ext + ".yaml" with asdf_open(asdf_path) as af_handle: af_handle.resolve_and_inline() with asdf_open(yaml_path) as ref: def _compare_func(): assert_tree_match(af_handle.tree, ref.tree, funcname='assert_allclose') if expect_warnings: # Make sure to only suppress warnings when they are expected. # However, there's still a chance of missing warnings that we # actually care about here. with pytest.warns(RuntimeWarning) as w: _compare_func() else: _compare_func()
def _load_config_file_filesystem(config_file): if not os.path.isfile(config_file): raise ValueError("Config file {0} not found.".format(config_file)) try: cfg = asdf_open(config_file) except (AsdfValidationError, ValueError): logger.debug( 'Config file did not parse as ASDF. Trying as ConfigObj: %s', config_file) return ConfigObj(config_file, raise_errors=True) # Seems to be ASDF. Create the configobj from that. return _config_obj_from_asdf(cfg)
def _load_config_file_s3(config_file): if not s3_utils.object_exists(config_file): raise ValueError("Config file {0} not found.".format(config_file)) content = s3_utils.get_object(config_file) try: with asdf_open(content) as asdf_file: return _config_obj_from_asdf(asdf_file) except (AsdfValidationError, ValueError): logger.debug( 'Config file did not parse as ASDF. Trying as ConfigObj: %s', config_file) content.seek(0) return ConfigObj(content, raise_errors=True)
def index( *, uasts_dir: str, instance_file: str, configs_dir: str, encoder_edge_types: List[str], max_length: int, log_level: str, ) -> None: """Index UASTs with respect to some fields.""" Config.from_arguments(locals(), ["uasts_dir", "instance_file"], "configs_dir").save( Path(configs_dir) / "index.json" ) logger = setup_logging(__name__, log_level) uasts_dir_path = Path(uasts_dir).expanduser().resolve() instance_file_path = Path(instance_file).expanduser().resolve() instance = Instance( fields=[ TypedDGLGraphField( name="typed_dgl_graph", type="graph", edge_types=encoder_edge_types ), MetadataField(name="metadata", type="metadata"), BinaryLabelsField(name="label", type="label"), IndexesField(name="indexes", type="indexes"), InternalTypeField(name="internal_type", type="input"), RolesField(name="roles", type="input"), LengthField(name="max_length", type="input", max_length=max_length), ] ) logger.info(f"Indexing %s", uasts_dir_path) for file_path in uasts_dir_path.rglob("*.asdf"): with asdf_open(str(file_path)) as af: instance.index( { Nodes: Nodes.from_tree(af.tree["nodes"]), CodRepLabel: CodRepLabel.from_tree(af.tree["codrep_label"]), str: af.tree["filepath"], } ) instance.save(instance_file_path) logger.info(f"Indexed %s", uasts_dir_path)
def test_create_in_tree_first(tmpdir, dtype): tree = { 'model': { 'sci': { 'data': np.arange(512, dtype=dtype), 'wcs': 'WCS info' }, 'dq': { 'data': np.arange(512, dtype=dtype), 'wcs': 'WCS info' }, 'err': { 'data': np.arange(512, dtype=dtype), 'wcs': 'WCS info' } } } hdulist = fits.HDUList() hdulist.append(fits.ImageHDU(tree['model']['sci']['data'])) hdulist.append(fits.ImageHDU(tree['model']['dq']['data'])) hdulist.append(fits.ImageHDU(tree['model']['err']['data'])) tmpfile = os.path.join(str(tmpdir), 'test.fits') with fits_embed.AsdfInFits(hdulist, tree) as ff: ff.write_to(tmpfile) with asdf.AsdfFile(tree) as ff: ff.write_to(os.path.join(str(tmpdir), 'plain.asdf')) with asdf.open(os.path.join(str(tmpdir), 'plain.asdf')) as ff: assert_array_equal(ff.tree['model']['sci']['data'], np.arange(512, dtype=dtype)) # This tests the changes that allow FITS files with ASDF extensions to be # opened directly by the top-level asdf.open API with asdf_open(tmpfile) as ff: assert_array_equal(ff.tree['model']['sci']['data'], np.arange(512, dtype=dtype))
def test_create_in_tree_first(tmpdir): tree = { 'model': { 'sci': { 'data': np.arange(512, dtype=np.float), 'wcs': 'WCS info' }, 'dq': { 'data': np.arange(512, dtype=np.float), 'wcs': 'WCS info' }, 'err': { 'data': np.arange(512, dtype=np.float), 'wcs': 'WCS info' } } } hdulist = fits.HDUList() hdulist.append(fits.ImageHDU(tree['model']['sci']['data'])) hdulist.append(fits.ImageHDU(tree['model']['dq']['data'])) hdulist.append(fits.ImageHDU(tree['model']['err']['data'])) tmpfile = os.path.join(str(tmpdir), 'test.fits') with fits_embed.AsdfInFits(hdulist, tree) as ff: ff.write_to(tmpfile) with asdf.AsdfFile(tree) as ff: ff.write_to(os.path.join(str(tmpdir), 'plain.asdf')) with asdf.open(os.path.join(str(tmpdir), 'plain.asdf')) as ff: assert_array_equal(ff.tree['model']['sci']['data'], np.arange(512, dtype=np.float)) # This tests the changes that allow FITS files with ASDF extensions to be # opened directly by the top-level asdf.open API with asdf_open(tmpfile) as ff: assert_array_equal(ff.tree['model']['sci']['data'], np.arange(512, dtype=np.float))
def _tensorize_worker( file_path: Path, instance: Instance, logger: Logger, uasts_dir_path: Path, output_dir_path: Path, pickle_protocol: int, ) -> None: logger.debug(f"Tensorizing {file_path}") with asdf_open(str(uasts_dir_path / file_path)) as af: tensors = instance.tensorize( { Nodes: Nodes.from_tree(af.tree["nodes"]), CodRepLabel: CodRepLabel.from_tree(af.tree["codrep_label"]), str: af.tree["filepath"], } ) output_dir = (output_dir_path / file_path).parent output_dir.mkdir(parents=True, exist_ok=True) with bz2_open((output_dir / file_path.name).with_suffix(".pickle.bz2"), "wb") as fh: pickle_dump(tensors, fh, protocol=pickle_protocol) logger.debug(f"Tensorized {file_path}")