def _nddata_to_glue_data(ndd, data_label): if ndd.data.ndim != 2: raise ValueError(f'Imviz cannot load this NDData with ndim={ndd.data.ndim}') for attrib in ['data', 'mask', 'uncertainty']: arr = getattr(ndd, attrib) if arr is None: continue comp_label = attrib.upper() cur_label = f'{data_label}[{comp_label}]' cur_data = Data(label=cur_label) cur_data.meta.update(ndd.meta) if ndd.wcs is not None: cur_data.coords = ndd.wcs raw_arr = arr if attrib == 'data': bunit = ndd.unit or '' elif attrib == 'uncertainty': raw_arr = arr.array bunit = arr.unit or '' else: bunit = '' component = Component.autotyped(raw_arr, units=bunit) cur_data.add_component(component=component, label=comp_label) yield cur_data, cur_label
def merge(self, *data, **kwargs): """ Merge two or more datasets into a single dataset. This has the following effects: All components from all datasets are added to the first argument All datasets except the first argument are removed from the collection Any component name conflicts are disambiguated The pixel and world components apart from the first argument are discarded :note: All arguments must have the same shape :param data: One or more :class:`~glue.core.data.Data` instances. :returns: self """ if len(data) < 2: raise ValueError("merge requires 2 or more arguments") shp = data[0].shape for d in data: if d.shape != shp: raise ValueError("All arguments must have the same shape") label = kwargs.get('label', data[0].label) master = Data(label=label) self.append(master) master.coords = data[0].coords for d in data: skip = d.pixel_component_ids + d.world_component_ids for c in d.components: if c in skip: continue if c in master.components: # already present (via a link) continue taken = [_.label for _ in master.components] lbl = c.label # Special-case 'PRIMARY', rename to data label if lbl == 'PRIMARY': lbl = d.label # First-pass disambiguation, try component_data if lbl in taken: lbl = '%s_%s' % (lbl, d.label) lbl = disambiguate(lbl, taken) c._label = lbl master.add_component(d.get_component(c), c) self.remove(d) return self
def new_data(): label = '{0}[{1}]'.format(label_base, hdu_name) data = Data(label=label) data.coords = coords groups[hdu_name] = data extension_by_shape[shape] = hdu_name return data
def panda_process(indf): """ Build a data set from a table using pandas. This attempts to respect categorical data input by letting pandas.read_csv infer the type """ result = Data() for name, column in indf.iteritems(): if (column.dtype == np.object) | (column.dtype == np.bool): # try to salvage numerical data coerced = column.convert_objects(convert_numeric=True) if (coerced.dtype != column.dtype) and coerced.isnull().mean() < 0.4: c = Component(coerced.values) else: # pandas has a 'special' nan implementation and this doesn't # play well with np.unique c = CategoricalComponent(column.fillna('')) else: c = Component(column.values) # convert header to string - in some cases if the first row contains # numbers, these are cast to numerical types, so we want to change that # here. if not isinstance(name, six.string_types): name = str(name) # strip off leading # name = name.strip() if name.startswith('#'): name = name[1:].strip() result.add_component(c, name) return result
def test_image(): data = Data(label="Test Image") comp_a = Component(np.ones((25, 25))) data.add_component(comp_a, 'test_1') comp_b = Component(np.zeros((25, 25))) data.add_component(comp_b, 'test_2') return data
def astropy_tabular_data(*args, **kwargs): """ Build a data set from a table. We restrict ourselves to tables with 1D columns. All arguments are passed to astropy.table.Table.read(...). """ result = Data() table = astropy_table_read(*args, **kwargs) result.meta = table.meta # Loop through columns and make component list for column_name in table.columns: c = table[column_name] u = c.unit if hasattr(c, 'unit') else c.units if table.masked: # fill array for now try: c = c.filled(fill_value=np.nan) except (ValueError, TypeError): # assigning nan to integer dtype c = c.filled(fill_value=-1) nc = Component.autotyped(c, units=u) result.add_component(nc, column_name) return result
def npy_npz_reader(filename, format='auto', auto_merge=False, **kwargs): """ Read in a Numpy structured array saved to a .npy or .npz file. Parameters ---------- source: str The pathname to the Numpy save file. """ import numpy as np data = np.load(filename) if isinstance(data, np.ndarray): data = {None: data} groups = [] for groupname in sorted(data): d = Data(label=groupname) arr = data[groupname] if arr.dtype.names is None: comp = Component.autotyped(arr) d.add_component(comp, label='array') else: for name in arr.dtype.names: comp = Component.autotyped(arr[name]) d.add_component(comp, label=name) groups.append(d) return groups
def test_histogram_data(): data = Data(label="Test Data") comp_a = Component(np.random.uniform(size=500)) comp_b = Component(np.random.normal(size=500)) data.add_component(comp_a, 'uniform') data.add_component(comp_b, 'normal') return data
def npz_reader(filename, format='auto', auto_merge=False, **kwargs): """ Read in a Numpy structured array saved to a .npy or .npz file. Parameters ---------- source: str The pathname to the Numpy save file. """ import numpy as np npy_data = np.load(filename) groups = [] for groupname in sorted(npy_data.files): d = Data(label=groupname) arr = npy_data[groupname] if not hasattr(arr.dtype, 'names'): raise ValueError("Numpy save file loading currently only supports structured" " arrays, e.g., with specified names.") for name in arr.dtype.names: comp = Component.autotyped(arr[name]) d.add_component(comp, label=name) groups.append(d) return groups
def load_dendro(filename): """ Load a dendrogram saved by the astrodendro package :param file: Path to a dendrogram file :returns: A list of 2 glue Data objects: the original dataset, and dendrogram. """ label = data_label(filename) dg = Dendrogram.load_from(filename) structs = np.arange(len(dg)) parent = np.array([ dg[i].parent.idx if dg[i].parent is not None else -1 for i in structs ]) height = np.array([dg[i].height for i in structs]) pk = np.array([dg[i].get_peak(True)[1] for i in structs]) dendro = Data(parent=parent, height=height, peak=pk, label="{} [dendrogram]".format(label)) im = Data(intensity=dg.data, structure=dg.index_map, label="{} [data]".format(label)) im.join_on_key(dendro, 'structure', dendro.pixel_component_ids[0]) return [dendro, im]
def vaex_reader(source): """ Read a vaex hdf5 file """ if os.path.isdir(source): arrays = {} for filename in glob.glob(os.path.join(source, '*')): if is_vaex_file(filename): logger.info("Reading vaex data from {0}".format(filename)) ds = vaex.open(filename) else: logger.info("Not a vaex file: {0}".format(filename)) # If there are no vaex files, we raise an error, and if there is one # then we are done! if len(arrays) == 0: raise Exception( "No vaex files found in directory: {0}".format(source)) elif len(arrays) == 1: label = list(arrays.keys())[0] return [Data(array=arrays[label], label=label)] # We now check whether all the shapes of the vaex files are the same, # and if so, we merge them into a single file. labels = sorted(arrays) ref_shape = arrays[labels[0]].shape for label in labels[1:]: if arrays[label].shape != ref_shape: break else: # Since we are here, the shapes of all the vaex files match, so # we can construct a higher-dimensional array. # Make sure arrays are sorted while constructing array array = np.array([arrays[label] for label in labels]) # We flip the array here on that in most cases we expect that the # scan will start at the top of e.g. the body and move downwards. array = array[::-1] return [Data(array=array, label=dicom_label(source))] # If we are here, the shapes of the vaex files didn't match, so we # simply return one Data object per vaex file. return [Data(array=arrays[label], label=label) for label in labels] else: ds = vaex.open(source) data = [DataVaex(ds)] return data
def _load_data(rec, context): label = rec['label'] result = Data(label=label) result.coords = context.object(rec['coords']) # we manually rebuild pixel/world components, so # we override this function. This is pretty ugly result._create_pixel_and_world_components = lambda: None comps = [list(map(context.object, [cid, comp])) for cid, comp in rec['components']] comps = sorted(comps, key=lambda x: isinstance(x[1], (DerivedComponent, CoordinateComponent))) for cid, comp in comps: if isinstance(comp, CoordinateComponent): comp._data = result result.add_component(comp, cid) assert result._world_component_ids == [] coord = [c for c in comps if isinstance(c[1], CoordinateComponent)] coord = [x[0] for x in sorted(coord, key=lambda x: x[1])] assert len(coord) == result.ndim * 2 result._world_component_ids = coord[:len(coord) // 2] result._pixel_component_ids = coord[len(coord) // 2:] for s in rec['subsets']: result.add_subset(context.object(s)) return result
def __init__(self, cubeviz_layout): self._cv_layout = cubeviz_layout self._cubes = cubeviz_layout.cubes ui = cubeviz_layout.ui self._overlays = Data('Overlays') # This is a list of overlay objects that are currently displayed self._active_overlays = [] # Maps overlays to the data sets they represent self._overlay_map = {} self._overlay_colorbar_axis = [] self._overlay_image_combo = ui.overlay_image_combo self._overlay_colormap_combo = ui.overlay_colormap_combo self._alpha_slider = ui.alpha_slider self._colormap_index = DEFAULT_GLUE_COLORMAP_INDEX self._overlay_image_combo.addItem("No Overlay") self._overlay_image_combo.currentIndexChanged.connect( self._on_overlay_change) self._overlay_colormap_combo.setCurrentIndex(self._colormap_index) self._overlay_colormap_combo.currentIndexChanged.connect( self._on_colormap_change) self._alpha_slider.valueChanged.connect(self._on_alpha_change)
def merge(self, *data, **kwargs): """ Merge two or more datasets into a single dataset. This has the following effects: All components from all datasets are added to the first argument All datasets except the first argument are removed from the collection Any component name conflicts are disambiguated The pixel and world components apart from the first argument are discarded :note: All arguments must have the same shape :param data: One or more :class:`~glue.core.data.Data` instances. :returns: self """ if len(data) < 2: raise ValueError("merge requires 2 or more arguments") shp = data[0].shape for d in data: if d.shape != shp: raise ValueError("All arguments must have the same shape") label = kwargs.get('label', data[0].label) master = Data(label=label) self.append(master) master.coords = data[0].coords for i, d in enumerate(data): if isinstance(d.coords, WCSCoordinates): master.coords = d.coords break # Find ambiguous components (ones which have labels in more than one # dataset from collections import Counter clabel_count = Counter([c.label for d in data for c in d.visible_components]) for d in data: for c in d.components: if c in master.components: # already present (via a link) continue lbl = c.label if clabel_count[lbl] > 1: lbl = lbl + " [{0}]".format(d.label) c._label = lbl c.parent = master master.add_component(d.get_component(c), c) self.remove(d) return self
def _ndarray_to_glue_data(arr, data_label): if arr.ndim != 2: raise ValueError(f'Imviz cannot load this array with ndim={arr.ndim}') data = Data(label=data_label) component = Component.autotyped(arr) data.add_component(component=component, label='DATA') yield data, data_label
def setup_method(self, method): self.data = Data(x=[1, 2, 3, 2, 2, 3, 1]) figure = MagicMock() self.collect = DataCollection() self.client = HistogramClient(self.collect, figure) self.axes = self.client.axes self.hub = self.collect.hub self.connect()
def setup_class(self): x = +np.arange(2520).reshape((3, 4, 5, 6, 7)) y = -np.arange(2520).reshape((3, 4, 5, 6, 7)) self.data = Data(x=x, y=y, label='Test data') self.x_id, self.y_id = self.data.main_components self.subset_state = self.x_id >= 1200
def setup_method(self, method): self.data = Data(x=[0, 0, 0, 1, 2, 3, 3, 10, 20], y=[-1, -1, -1, -2, -2, -2, -3, -5, -7]) self.subset = self.data.new_subset() self.collect = DataCollection(self.data) self.client = HistogramClient(self.collect, FIGURE) self.axes = self.client.axes FIGURE.canvas.draw = MagicMock() assert FIGURE.canvas.draw.call_count == 0
def new_data(): label = '{0}[{1}]'.format( label_base, hdu_name ) data = Data(label=label) data.coords = coords groups[hdu_name] = data extension_by_shape[shape] = hdu_name return data
def setup_method(self, method): self.data = Data(y=[-1, -1, -1, -2, -2, -2, -3, -5, -7]) self.data.add_component( CategoricalComponent(['a', 'a', 'a', 'b', 'c', 'd', 'd', 'e', 'f']), 'x') self.subset = self.data.new_subset() self.collect = DataCollection(self.data) self.client = HistogramClient(self.collect, FIGURE) self.axes = self.client.axes FIGURE.canvas.draw = MagicMock() assert FIGURE.canvas.draw.call_count == 0
def merge(self, *data, **kwargs): """ Merge two or more datasets into a single dataset. This has the following effects: All components from all datasets are added to the first argument All datasets except the first argument are removed from the collection Any component name conflicts are disambiguated The pixel and world components apart from the first argument are discarded :note: All arguments must have the same shape :param data: One or more :class:`~glue.core.data.Data` instances. :returns: self """ if len(data) < 2: raise ValueError("merge requires 2 or more arguments") shp = data[0].shape for d in data: if d.shape != shp: raise ValueError("All arguments must have the same shape") label = kwargs.get('label', data[0].label) master = Data(label=label) self.append(master) for d in data: skip = d.pixel_component_ids + d.world_component_ids for c in d.components: if c in skip: continue if c in master.components: # already present (via a link) continue taken = [_.label for _ in master.components] lbl = c.label # Special-case 'PRIMARY', rename to data label if lbl == 'PRIMARY': lbl = d.label # First-pass disambiguation, try component_data if lbl in taken: lbl = '%s_%s' % (lbl, d.label) lbl = disambiguate(lbl, taken) c._label = lbl master.add_component(d.get_component(c), c) self.remove(d) return self
def test_links(self): d1 = Data(label='x', x=[1, 2, 3]) d2 = Data(label='y', y=[3, 4, 8]) dc = DataCollection([d1, d2]) link = ComponentLink([d1.id['x']], d2.id['y'], doubler) dc.add_link(link) np.testing.assert_array_equal(d1['y'], [2, 4, 6]) app = GlueApplication(dc) self.check_clone(app)
def read_header(header): out = [] for stream in header.stream_names: result = Data(label="{stream}_{uid}".format(stream=stream, uid=header.start['uid'])) tbl = header.table(stream, fill=True) for col in tbl.columns: result.add_component(tbl[col], str(col)) out.append(result) return out
def make_test_data(): data = Data(label="Test Cat Data 1") comp_x1 = Component(np.array([4, 5, 6, 3])) comp_y1 = Component(np.array([1, 2, 3, 2])) comp_z1 = Component(np.array([2, 3, 4, 1])) data.add_component(comp_x1, 'x_gal') data.add_component(comp_y1, 'y_gal') data.add_component(comp_z1, 'z_gal') return data
def test_limits_inf(self): d = Data() x = Component(np.array([[1, 2], [np.infty, 4]])) y = Component(np.array([[2, 4], [-np.infty, 8]])) xid = d.add_component(x, 'x') yid = d.add_component(y, 'y') self.collect.append(d) self.client.add_layer(d) self.client.xatt = xid self.client.yatt = yid assert self.client._visible_limits(0) == (1, 4) assert self.client._visible_limits(1) == (2, 8)
def test_suggest_merge(self): x = Data(x=[1, 2, 3], label='x') y = Data(y=[4, 5, 6, 7], label='y') z = Data(z=[8, 9, 10], label='z') self.app.data_collection.append(x) self.app.data_collection.append(y) with process_dialog(delay=500, accept=True): result = self.app.add_datasets(self.app.data_collection, z) np.testing.assert_equal(self.app.data_collection[0]['x'], [1, 2, 3]) np.testing.assert_equal(self.app.data_collection[0]['z'], [8, 9, 10]) np.testing.assert_equal(self.app.data_collection[1]['y'], [4, 5, 6, 7])
def test_new_data_defaults(self): with patch('glue.app.qt.application.pick_class') as pc: pc.return_value = None d2 = Data(x=np.array([[1, 2, 3], [4, 5, 6]])) d1 = Data(x=np.array([1, 2, 3])) self.app.choose_new_data_viewer(data=d1) args, kwargs = pc.call_args assert kwargs['default'] is ScatterViewer self.app.choose_new_data_viewer(data=d2) args, kwargs = pc.call_args assert kwargs['default'] is ImageViewer
def hdf5_reader(filename, format='auto', auto_merge=False, **kwargs): """ Read in all datasets from an HDF5 file Parameters ---------- source: str or HDUList The pathname to the FITS file. If an HDUList is passed in, simply use that. """ import h5py from astropy.table import Table # Open file file_handle = h5py.File(filename, 'r') # Define function to read # Read in all datasets datasets = extract_hdf5_datasets(file_handle) label_base = os.path.basename(filename).rpartition('.')[0] if not label_base: label_base = os.path.basename(filename) data_by_shape = {} groups = OrderedDict() for key in datasets: label = '{0}[{1}]'.format(label_base, key) if datasets[key].dtype.kind in ('f', 'i'): if auto_merge and datasets[key].value.shape in data_by_shape: data = data_by_shape[datasets[key].value.shape] else: data = Data(label=label) data_by_shape[datasets[key].value.shape] = data groups[label] = data data.add_component(component=datasets[key].value, label=key) else: table = Table.read(datasets[key], format='hdf5') data = Data(label=label) groups[label] = data for column_name in table.columns: column = table[column_name] if column.ndim == 1: component = Component(column, units=column.unit) data.add_component(component=component, label=column_name) else: warnings.warn( "HDF5: Ignoring vector column {0}".format(column_name)) # Close HDF5 file file_handle.close() return [groups[idx] for idx in groups]
def newView(self, type="scatter", components=[], title="New View", **kwargs): only_subsets = kwargs.get('only_subsets', False) only_view = kwargs.get('only_view', False) if (self.parent is not None): kwargs.setdefault('modal', self.parent.modal) if (self.debug is not None): kwargs.setdefault('debug', self.debug) gp = None mode = "tab-after" if (len(self.active_views.values()) == 0): mode = "split-bottom" kwargs.setdefault('mode', mode) if only_view is False: gp = self.factory.createGluePlot(type, self.data, components, title, **kwargs) else: data = Data(label=self.data.label) for c in components: data.add_component(self.data[c, self.selection], label=c) data.get_component(c).color = self.data.get_component(c).color if (data.size > 0): gp = self.factory.createGluePlot(type, data, components, title, **kwargs) if gp is not None: if only_view is False: gp.setParent(self) key = id(gp.window) self.active_views[key] = gp self.views[key] = { 'type': type, 'components': components, 'title': title, 'kwargs': kwargs } if isinstance(gp.window, Floatview): gp.window.observe( lambda changes: GlueManager.removeViewIfDisposed( self, gp.window), 'uid') self.parent.updateHistory() return gp
def _add_to_table(app, data, comp_label): """ Creates a mos table instance in the application data collection is none currently exists. Parameters ---------- app : `~jdaviz.app.Application` The JDAViz application instance. data : array-list The set of data to added as a table (i.g. column) component. comp_label : str The label used to describe the data. Also used as the column header. """ # Add data to the mos viz table object if 'MOS Table' not in app.data_collection: table_data = Data(label="MOS Table") app.data_collection.append(table_data) mos_table = app.data_collection['MOS Table'] mos_table.add_component(data, comp_label) viewer = app.get_viewer("table-viewer") viewer.add_data(table_data) else: mos_table = app.data_collection['MOS Table'] mos_table.add_component(data, comp_label)
def test_load_data_auto_assigns_label(): factory = MagicMock() result = Data(x=[1, 2, 3], label='') factory.return_value = result d = df.load_data('test.fits', factory) factory.assert_called_once_with('test.fits') assert d.label == 'test'
def setup_class(self): x = +np.arange(2520).reshape((3, 4, 5, 6, 7)) y = -np.arange(2520).reshape((3, 4, 5, 6, 7)) self.data = Data(x=x, y=y, label='Test data') self.x_id, self.y_id = self.data.main_components matrix = np.random.random((6, 6)) - 0.5 matrix[-1] = [0, 0, 0, 0, 0, 1] self.data_with_coords = Data(x=x, y=y, label='Test data', coords=AffineCoordinates(matrix=matrix)) self.subset_state = self.x_id >= 1200
def test_new_data_defaults(self): from glue.config import qt_client with patch('glue.app.qt.application.pick_class') as pc: pc.return_value = None d2 = Data(x=np.array([[1, 2, 3], [4, 5, 6]])) d1 = Data(x=np.array([1, 2, 3])) self.app.choose_new_data_viewer(data=d1) args, kwargs = pc.call_args assert qt_client.members[kwargs['default']] == ScatterWidget self.app.choose_new_data_viewer(data=d2) args, kwargs = pc.call_args assert qt_client.members[kwargs['default']] == ImageWidget
def setup_2d_data(self): d = Data(x=[[1, 2], [3, 4]], y=[[2, 4], [6, 8]]) self.collect.append(d) self.client.add_layer(d) self.client.xatt = d.id['x'] self.client.yatt = d.id['y'] return d
def test_subset_facet(self): # regression test for 335 act = self.app._layer_widget._actions['facet'] self.app.data_collection.append(Data(x=[1, 2, 3])) with patch('glue.dialogs.subset_facet.qt.SubsetFacet.exec_'): act._do_action()
def _hdu2data(hdu, data_label, hdulist, include_wcs=True): if 'BUNIT' in hdu.header and _validate_bunit(hdu.header['BUNIT'], raise_error=False): bunit = hdu.header['BUNIT'] else: bunit = '' comp_label = f'{hdu.name.upper()},{hdu.ver}' new_data_label = f'{data_label}[{comp_label}]' data = Data(label=new_data_label) if include_wcs: data.coords = WCS(hdu.header, hdulist) component = Component.autotyped(hdu.data, units=bunit) data.add_component(component=component, label=comp_label) return data, new_data_label
def _load_data(rec, context): label = rec['label'] result = Data(label=label) result.coords = context.object(rec['coords']) # we manually rebuild pixel/world components, so # we override this function. This is pretty ugly result._create_pixel_and_world_components = lambda: None comps = [ list(map(context.object, [cid, comp])) for cid, comp in rec['components'] ] for icomp, (cid, comp) in enumerate(comps): if isinstance(comp, CoordinateComponent): comp._data = result # For backward compatibility, we need to check for cases where # the component ID for the pixel components was not a PixelComponentID # and upgrade it to one. This can be removed once we no longer # support pre-v0.8 session files. if not comp.world and not isinstance(cid, PixelComponentID): cid = PixelComponentID(comp.axis, cid.label, hidden=cid.hidden, parent=cid.parent) comps[icomp] = (cid, comp) result.add_component(comp, cid) assert result._world_component_ids == [] coord = [c for c in comps if isinstance(c[1], CoordinateComponent)] coord = [x[0] for x in sorted(coord, key=lambda x: x[1])] assert len(coord) == result.ndim * 2 result._world_component_ids = coord[:len(coord) // 2] result._pixel_component_ids = coord[len(coord) // 2:] for s in rec['subsets']: result.add_subset(context.object(s)) return result
def casalike_cube(filename, **kwargs): """ This provides special support for 4D CASA FITS - like cubes, which have 2 spatial axes, a spectral axis, and a stokes axis in that order. Each stokes cube is split out as a separate component """ from astropy.io import fits result = Data() with fits.open(filename, **kwargs) as hdulist: array = hdulist[0].data header = hdulist[0].header result.coords = coordinates_from_header(header) for i in range(array.shape[0]): result.add_component(array[[i]], label='STOKES %i' % i) return result
def setup_method(self, method): self.data = Data(y=[-1, -1, -1, -2, -2, -2, -3, -5, -7]) self.data.add_component(CategoricalComponent(['a', 'a', 'a', 'b', 'c', 'd', 'd', 'e', 'f']), 'x') self.subset = self.data.new_subset() self.collect = DataCollection(self.data) self.client = HistogramClient(self.collect, FIGURE) self.axes = self.client.axes FIGURE.canvas.draw = MagicMock() assert FIGURE.canvas.draw.call_count == 0
def test_ticks_go_back_after_changing(self): """ If you change to a categorical axis and then change back to a numeric, the axis ticks should fix themselves properly. """ data = Data() data.add_component(Component(np.arange(100)), 'y') data.add_component( CategoricalComponent(['a'] * 50 + ['b'] * 50), 'xcat') data.add_component(Component(2 * np.arange(100)), 'xcont') self.add_data(data=data) self.client.yatt = data.find_component_id('y') self.client.xatt = data.find_component_id('xcat') self.check_ticks(self.client.axes.xaxis, False, True) self.check_ticks(self.client.axes.yaxis, False, False) self.client.xatt = data.find_component_id('xcont') self.check_ticks(self.client.axes.yaxis, False, False) self.check_ticks(self.client.axes.xaxis, False, False)
def npy_reader(filename, format='auto', auto_merge=False, **kwargs): """ Read in a Numpy structured array saved to a .npy or .npz file. Parameters ---------- source: str The pathname to the Numpy save file. """ import numpy as np npy_data = np.load(filename) if not hasattr(npy_data.dtype, 'names'): raise ValueError("Numpy save file loading currently only supports structured" " arrays, e.g., with specified names.") d = Data() for name in npy_data.dtype.names: comp = Component(npy_data[name]) d.add_component(comp, label=name) return d
def img_data(file_name): """Load common image files into a Glue data object""" result = Data() data = img_loader(file_name) data = np.flipud(data) shp = data.shape comps = [] labels = [] # split 3 color images into each color plane if len(shp) == 3 and shp[2] in [3, 4]: comps.extend([data[:, :, 0], data[:, :, 1], data[:, :, 2]]) labels.extend(['red', 'green', 'blue']) if shp[2] == 4: comps.append(data[:, :, 3]) labels.append('alpha') else: comps = [data] labels = ['PRIMARY'] # look for AVM coordinate metadata try: from pyavm import AVM avm = AVM(str(file_name)) # avoid unicode wcs = avm.to_wcs() except: pass else: result.coords = coordinates_from_wcs(wcs) for c, l in zip(comps, labels): result.add_component(c, l) return result
def new_data(suffix=True): if suffix: label = '{0}[{1}]'.format(label_base, hdu_name) else: label = label_base data = Data(label=label) data.coords = coords # We need to be careful here because some header values are special # objects that we should convert to strings for key, value in hdu.header.items(): if (key == 'COMMENT' or key == 'HISTORY'): if key not in data.meta: data.meta[key] = [str(value)] else: data.meta[key].append(str(value)) elif isinstance(value, string_types) or isinstance(value, (int, float, bool)): data.meta[key] = value else: data.meta[key] = str(value) groups[hdu_name] = data extension_by_shape[shape] = hdu_name return data
def casalike_cube(filename, **kwargs): """ This provides special support for 4D CASA FITS - like cubes, which have 2 spatial axes, a spectral axis, and a stokes axis in that order. Each stokes cube is split out as a separate component """ from astropy.io import fits result = Data() if 'ignore_missing_end' not in kwargs: kwargs['ignore_missing_end'] = True with fits.open(filename, **kwargs) as hdulist: array = hdulist[0].data header = hdulist[0].header result.coords = coordinates_from_header(header) for i in range(array.shape[0]): units = header.get('BUNIT') component = Component.autotyped(array[[i]], units=units) result.add_component(component, label='STOKES %i' % i) return result
def _load_data(rec, context): label = rec['label'] result = Data(label=label) result.coords = context.object(rec['coords']) # we manually rebuild pixel/world components, so # we override this function. This is pretty ugly result._create_pixel_and_world_components = lambda: None comps = [list(map(context.object, [cid, comp])) for cid, comp in rec['components']] for icomp, (cid, comp) in enumerate(comps): if isinstance(comp, CoordinateComponent): comp._data = result # For backward compatibility, we need to check for cases where # the component ID for the pixel components was not a PixelComponentID # and upgrade it to one. This can be removed once we no longer # support pre-v0.8 session files. if not comp.world and not isinstance(cid, PixelComponentID): cid = PixelComponentID(comp.axis, cid.label, hidden=cid.hidden, parent=cid.parent) comps[icomp] = (cid, comp) result.add_component(comp, cid) assert result._world_component_ids == [] coord = [c for c in comps if isinstance(c[1], CoordinateComponent)] coord = [x[0] for x in sorted(coord, key=lambda x: x[1])] assert len(coord) == result.ndim * 2 result._world_component_ids = coord[:len(coord) // 2] result._pixel_component_ids = coord[len(coord) // 2:] for s in rec['subsets']: result.add_subset(context.object(s)) return result
def load_dendro(file): """ Load a dendrogram saved by the astrodendro package :param file: Path to a dendrogram file :returns: A list of 2 glue Data objects: the original dataset, and dendrogram. """ dg = Dendrogram.load_from(file) structs = np.arange(len(dg)) parent = np.array([dg[i].parent.idx if dg[i].parent is not None else -1 for i in structs]) height = np.array([dg[i].height for i in structs]) pk = np.array([dg[i].get_peak(True)[1] for i in structs]) dendro = Data(parent=parent, height=height, peak=pk, label='Dendrogram') im = Data(intensity=dg.data, structure=dg.index_map) im.join_on_key(dendro, 'structure', dendro.pixel_component_ids[0]) return [dendro, im]
def gridded_data(filename, format='auto', **kwargs): result = Data() # Try and automatically find the format if not specified if format == 'auto': format = file_format(filename) # Read in the data if is_fits(filename): from astropy.io import fits arrays = extract_data_fits(filename, **kwargs) header = fits.getheader(filename) result.coords = coordinates_from_header(header) elif is_hdf5(filename): arrays = extract_data_hdf5(filename, **kwargs) else: raise Exception("Unkonwn format: %s" % format) for component_name in arrays: comp = Component.autotyped(arrays[component_name]) result.add_component(comp, component_name) return result
def test_data(): data = Data(label="Test Data 1") data2 = Data(label="Teset Data 2") comp_a = Component(np.array([1, 2, 3])) comp_b = Component(np.array([1, 2, 3])) comp_c = Component(np.array([2, 4, 6])) comp_d = Component(np.array([1, 3, 5])) data.add_component(comp_a, 'a') data.add_component(comp_b, 'b') data2.add_component(comp_c, 'c') data2.add_component(comp_d, 'd') return data, data2
def hdf5_reader(filename, auto_merge=False, memmap=True, **kwargs): """ Read in all datasets from an HDF5 file Parameters ---------- filename : str The filename of the HDF5 file memmap : bool, optional Whether to use memory mapping """ from astropy.table import Table # Read in all datasets datasets = extract_hdf5_datasets(filename, memmap=memmap) label_base = os.path.basename(filename).rpartition('.')[0] if not label_base: label_base = os.path.basename(filename) data_by_shape = {} groups = OrderedDict() for key in datasets: label = '{0}[{1}]'.format(label_base, key) array = datasets[key] if isinstance(array, Table): data = Data(label=label) groups[label] = data for column_name in array.columns: column = array[column_name] if column.ndim == 1: component = Component.autotyped(column, units=column.unit) data.add_component(component=component, label=column_name) else: warnings.warn("HDF5: Ignoring vector column {0}".format(column_name)) else: if auto_merge and array.shape in data_by_shape: data = data_by_shape[datasets[key].shape] else: data = Data(label=label) data_by_shape[array.shape] = data groups[label] = data data.add_component(component=datasets[key], label=key[1:]) return [groups[idx] for idx in groups]
def test_categorical_data(): data = Data(label="Test Cat Data 1") data2 = Data(label="Teset Cat Data 2") comp_x1 = CategoricalComponent(np.array(['a', 'a', 'b'])) comp_y1 = Component(np.array([1, 2, 3])) comp_x2 = CategoricalComponent(np.array(['c', 'a', 'b'])) comp_y2 = Component(np.array([1, 3, 5])) data.add_component(comp_x1, 'x1') data.add_component(comp_y1, 'y1') data2.add_component(comp_x2, 'x2') data2.add_component(comp_y2, 'y2') return data, data2
def test_high_cardinatility_timing(self): card = 50000 data = Data() card_data = [str(num) for num in range(card)] data.add_component(Component(np.arange(card * 5)), 'y') data.add_component( CategoricalComponent(np.repeat([card_data], 5)), 'xcat') self.add_data(data) comp = data.find_component_id('xcat') timer_func = partial(self.client._set_xydata, 'x', comp) timer = timeit(timer_func, number=1) assert timer < 3 # this is set for Travis speed
def _parse_data_dataframe(data, label): label = label or 'Data' result = Data(label=label) for c in data.columns: result.add_component(data[c], str(c)) return [result]