def read(self): """ Given an ISIS store, read the underlying ISIS3 compatible control network and return an IsisControlNetwork dataframe. """ pvl_header = pvl.load(self._path) header_start_byte = find_in_dict(pvl_header, 'HeaderStartByte') header_bytes = find_in_dict(pvl_header, 'HeaderBytes') point_start_byte = find_in_dict(pvl_header, 'PointsStartByte') version = find_in_dict(pvl_header, 'Version') if version == 2: self.point_attrs = [i for i in cnf._CONTROLPOINTFILEENTRYV0002.fields_by_name if i != 'measures'] self.measure_attrs = [i for i in cnf._CONTROLPOINTFILEENTRYV0002_MEASURE.fields_by_name] cp = cnf.ControlPointFileEntryV0002() self._handle.seek(header_start_byte) pbuf_header = cnf.ControlNetFileHeaderV0002() pbuf_header.ParseFromString(self._handle.read(header_bytes)) self._handle.seek(point_start_byte) cp = cnf.ControlPointFileEntryV0002() pts = [] for s in pbuf_header.pointMessageSizes: cp.ParseFromString(self._handle.read(s)) pt = [getattr(cp, i) for i in self.point_attrs if i != 'measures'] for measure in cp.measures: meas = pt + [getattr(measure, j) for j in self.measure_attrs] pts.append(meas) elif version == 5: self.point_attrs = [i for i in cnp5._CONTROLPOINTFILEENTRYV0005.fields_by_name if i != 'measures'] self.measure_attrs = [i for i in cnp5._CONTROLPOINTFILEENTRYV0005_MEASURE.fields_by_name] cp = cnp5.ControlPointFileEntryV0005() self._handle.seek(header_start_byte) pbuf_header = cnh5.ControlNetFileHeaderV0005() pbuf_header.ParseFromString(self._handle.read(header_bytes)) self._handle.seek(point_start_byte) cp = cnp5.ControlPointFileEntryV0005() pts = [] byte_count = 0; while byte_count < find_in_dict(pvl_header, 'PointsBytes'): message_size = struct.unpack('I', self._handle.read(4))[0] cp.ParseFromString(self._handle.read(message_size)) pt = [getattr(cp, i) for i in self.point_attrs if i != 'measures'] for measure in cp.measures: meas = pt + [getattr(measure, j) for j in self.measure_attrs] pts.append(meas) byte_count += 4 + message_size cols = self.point_attrs + self.measure_attrs df = IsisControlNetwork(pts, columns=cols) df.header = pvl_header return df
def test_cnet_read(cnet_file): df = io_controlnetwork.from_isis(cnet_file) assert len(df) == find_in_dict(df.header, 'NumberOfMeasures') assert isinstance(df, io_controlnetwork.IsisControlNetwork) assert len(df.groupby('id')) == find_in_dict(df.header, 'NumberOfPoints') for proto_field, mangled_field in io_controlnetwork.IsisStore.point_field_map.items( ): assert proto_field not in df.columns assert mangled_field in df.columns for proto_field, mangled_field in io_controlnetwork.IsisStore.measure_field_map.items( ): assert proto_field not in df.columns assert mangled_field in df.columns
def test_create_pvl_header(self): pvl_header = pvl.load('test.net') npoints = find_in_dict(pvl_header, 'NumberOfPoints') self.assertEqual(5, npoints) mpoints = find_in_dict(pvl_header, 'NumberOfMeasures') self.assertEqual(10, mpoints) points_bytes = find_in_dict(pvl_header, 'PointsBytes') self.assertEqual(675, points_bytes) points_start_byte = find_in_dict(pvl_header, 'PointsStartByte') self.assertEqual(self.point_start_byte, points_start_byte)
def test_create_pvl_header(cnet_dataframe, tmpdir): with open(tmpdir.join('test.net'), 'rb') as f: pvl_header = pvl.load(f) npoints = find_in_dict(pvl_header, 'NumberOfPoints') assert 5 == npoints mpoints = find_in_dict(pvl_header, 'NumberOfMeasures') assert 10 == mpoints points_bytes = find_in_dict(pvl_header, 'PointsBytes') assert 796 == points_bytes points_start_byte = find_in_dict(pvl_header, 'PointsStartByte') assert cnet_dataframe.point_start_byte == points_start_byte
def get_isis_translation(label): """ Compute the ISIS serial number for a given image using the input cube or the label extracted from the cube. Parameters ---------- label : dict or str A PVL dict object or file name to extract the PVL object from Returns ------- translation : dict A PVLModule object containing the extracted translation file """ # Instantiate a DB session if not already instantiated if not hasattr(plio, 'data_session'): print(get_data('data.db')) plio.data_session = setup_db_session(get_data('data.db')) # Grab the label is not already read if not isinstance(label, PVLModule): label = pvl.load(label) # Grab the spacecraft name and run it through the ISIS lookup spacecraft_name = find_in_dict(label, 'SpacecraftName') for row in plio.data_session.query(StringToMission).filter( StringToMission.key == spacecraft_name): spacecraft_name = row.value.lower() # Try and pull an instrument identifier try: instrumentid = find_in_dict(label, 'InstrumentId').capitalize() except: instrumentid = None translation = None # Grab the translation PVL object using the lookup for row in plio.data_session.query(Translations).filter( Translations.mission == spacecraft_name, Translations.instrument == instrumentid): # Convert the JSON back to a PVL object translation = PVLModule(row.translation) return translation
def footprint(self): if not hasattr(self, '_footprint'): try: polygon_pvl = find_in_dict(self.metadata, 'Polygon') start_polygon_byte = find_in_dict(polygon_pvl, 'StartByte') num_polygon_bytes = find_in_dict(polygon_pvl, 'Bytes') # I too dislike the additional open here. Not sure a good option with open(self.file_name, 'r') as f: f.seek(start_polygon_byte - 1) # Sloppy unicode to string because GDAL pukes on unicode stream = str(f.read(num_polygon_bytes)) self._footprint = ogr.CreateGeometryFromWkt(stream) except: self._footprint = None return self._footprint
def get_isis_translation(label): """ Compute the ISIS serial number for a given image using the input cube or the label extracted from the cube. Parameters ---------- label : dict or str A PVL dict object or file name to extract the PVL object from Returns ------- translation : dict A PVLModule object containing the extracted translation file """ # Instantiate a DB session if not already instantiated if not hasattr(plio, 'data_session'): print(get_data('data.db')) plio.data_session = setup_db_session(get_data('data.db')) # Grab the label is not already read if not isinstance(label, PVLModule): label = pvl.load(label) # Grab the spacecraft name and run it through the ISIS lookup spacecraft_name = find_in_dict(label, 'SpacecraftName') for row in plio.data_session.query(StringToMission).filter(StringToMission.key==spacecraft_name): spacecraft_name = row.value.lower() # Try and pull an instrument identifier try: instrumentid = find_in_dict(label, 'InstrumentId').capitalize() except: instrumentid = None translation = None # Grab the translation PVL object using the lookup for row in plio.data_session.query(Translations).filter(Translations.mission==spacecraft_name, Translations.instrument==instrumentid): # Convert the JSON back to a PVL object translation = PVLModule(row.translation) return translation
def test_create_pvl_header(cnet_dataframe, tmpdir): # Write the cnet io_controlnetwork.to_isis(cnet_dataframe, tmpdir.join('test.net'), mode='wb', targetname='Moon') with open(tmpdir.join('test.net'), 'rb') as stream: pvl_header = pvl.load(stream, grammar=pvl.grammar.ISISGrammar()) npoints = find_in_dict(pvl_header, 'NumberOfPoints') assert 5 == npoints mpoints = find_in_dict(pvl_header, 'NumberOfMeasures') assert 10 == mpoints points_bytes = find_in_dict(pvl_header, 'PointsBytes') assert 796 == points_bytes points_start_byte = find_in_dict(pvl_header, 'PointsStartByte') assert cnet_dataframe.point_start_byte == points_start_byte
def footprint(self): if not hasattr(self, '_footprint'): # Try to get the footprint from the image try: polygon_pvl = find_in_dict(self.metadata, 'Polygon') start_polygon_byte = find_in_dict(polygon_pvl, 'StartByte') num_polygon_bytes = find_in_dict(polygon_pvl, 'Bytes') # I too dislike the additional open here. Not sure a good option with open(self.file_name, 'r') as f: f.seek(start_polygon_byte - 1) # Sloppy unicode to string because GDAL pukes on unicode stream = str(f.read(num_polygon_bytes)) self._footprint = ogr.CreateGeometryFromWkt(stream) except: self._footprint = None # If the image does not have a footprint, try getting the image from projected # coordinates try: # Get the lat lon corners lat = [i[0] for i in self.latlon_corners] lon = [i[1] for i in self.latlon_corners] # Compute a ogr geometry for the tiff which # provides leverage for overlaps ring = ogr.Geometry(ogr.wkbLinearRing) for point in [*zip(lon, lat)]: ring.AddPoint(*point) ring.AddPoint(lon[0], lat[0]) poly = ogr.Geometry(ogr.wkbPolygon) poly.AddGeometry(ring) poly.FlattenTo2D() self._footprint = poly except: self._footprint = None return self._footprint
def read(self): """ Given an ISIS store, read the underlying ISIS3 compatible control network and return an IsisControlNetwork dataframe. """ pvl_header = pvl.load(self._path) header_start_byte = find_in_dict(pvl_header, 'HeaderStartByte') header_bytes = find_in_dict(pvl_header, 'HeaderBytes') point_start_byte = find_in_dict(pvl_header, 'PointsStartByte') version = find_in_dict(pvl_header, 'Version') if version == 2: point_attrs = [ i for i in cnf._CONTROLPOINTFILEENTRYV0002.fields_by_name if i != 'measures' ] measure_attrs = [ i for i in cnf._CONTROLPOINTFILEENTRYV0002_MEASURE.fields_by_name ] cols = point_attrs + measure_attrs cp = cnf.ControlPointFileEntryV0002() self._handle.seek(header_start_byte) pbuf_header = cnf.ControlNetFileHeaderV0002() pbuf_header.ParseFromString(self._handle.read(header_bytes)) self._handle.seek(point_start_byte) cp = cnf.ControlPointFileEntryV0002() pts = [] for s in pbuf_header.pointMessageSizes: cp.ParseFromString(self._handle.read(s)) pt = [getattr(cp, i) for i in point_attrs if i != 'measures'] for measure in cp.measures: meas = pt + [getattr(measure, j) for j in measure_attrs] pts.append(meas) df = IsisControlNetwork(pts, columns=cols) df.header = pvl_header return df
def campt_header(outcube): """ Compute the incidence angle at the center of the image and the local solar time. These are required by the Davinci processing pipeline to determine what processing to perform. """ workingpath, fname = os.path.split(outcube) fname = os.path.splitext(fname)[0] header = pvl.load(outcube) samples = find_in_dict(header, 'Samples') lines = find_in_dict(header, 'Lines') coordinatelist = os.path.join(workingpath, 'coordinatelist.lis') with open(coordinatelist, 'w') as f: f.write('{},{}\n'.format(samples / 2, lines / 2)) f.write('1,1\n') #UpperLeft f.write('{},{}\n'.format(samples - 1, lines - 1)) #LowerRight campt = pvl.loads( isis.campt(from_=outcube, to=os.path.join(workingpath, fname + '_campt.pvl'), usecoordlist='yes', coordlist=coordinatelist, coordtype='image')) for j, g in enumerate(campt.items()): if j == 0: #Incidence at the center of the image try: incidence = g[1]['Incidence'].value except: incidence = g[1]['Incidence'] elif j == 1: #Upper Left Corner Pixel stoplocaltime = g[1]['LocalSolarTime'].value elif j == 2: #Lower Right Corner Pixel startlocaltime = g[1]['LocalSolarTime'].value return incidence, stoplocaltime, startlocaltime
def extract_keywords(header, *args): """ For a given header, find all of the keys and return an unnested dict. """ try: header = pvl.load(header) except: header = pvl.loads(header) res = {} # Iterate through all of the requested keys for a in args: try: res[a] = find_in_dict(a) except: res[a] = None return res
def spatial_reference(self): if not getattr(self, '_srs', None): self._srs = osr.SpatialReference() proj = self.dataset.GetProjection() if proj: self._srs.ImportFromWkt(self.dataset.GetProjection()) else: target = find_in_dict(self.metadata, 'TargetName') self._srs.ImportFromWkt(DEFAULT_PROJECTIONS[target.lower()]) try: self._srs.MorphToESRI() self._srs.MorphFromESRI() except: pass #pragma: no cover #Setup the GCS self._gcs = self._srs.CloneGeogCS() return self._srs
def data_from_cube(header): """ Take an ISIS Cube header and normalize back to PVL keywords. """ instrument_name = 'CONTEXT CAMERA' data = pvl.PVLModule([ ('START_TIME', find_in_dict(header, 'StartTime')), ('SPACECRAFT_NAME', find_in_dict(header, 'SpacecraftName').upper()), ('INSTRUMENT_NAME', instrument_name), ('SAMPLING_FACTOR', find_in_dict(header, 'SpatialSumming')), ('SAMPLE_FIRST_PIXEL', find_in_dict(header, 'SampleFirstPixel')), ('TARGET_NAME', find_in_dict(header, 'TargetName').upper()), ('LINE_EXPOSURE_DURATION', find_in_dict(header, 'LineExposureDuration')), ('SPACECRAFT_CLOCK_START_COUNT', find_in_dict(header, 'SpacecraftClockCount')), ('IMAGE', { 'LINES': find_in_dict(header, 'Lines'), 'LINE_SAMPLES': find_in_dict(header, 'Samples') }) ]) return data
def __init__(self, input_data, cleaned=True, qa_threshold=2000): """ Read the .spc file, parse the label, and extract the spectra Parameters ---------- input_data : string The PATH to the input .spc file cleaned : boolean If True, mask the data based on the QA array. nspectra : int The number of spectra in the given data file qa_threshold : int The threshold value over which observations are masked as noise if cleaned is True. """ label_dtype_map = { 'IEEE_REAL': 'f', 'MSB_INTEGER': 'i', 'MSB_UNSIGNED_INTEGER': 'u' } label = pvl.load(input_data) self.label = label self.input_data = input_data with open(input_data, 'rb') as indata: # Extract and handle the ancillary data ancillary_data = find_in_dict(label, "ANCILLARY_AND_SUPPLEMENT_DATA") self.nspectra = nrows = ancillary_data['ROWS'] ncols = ancillary_data['COLUMNS'] rowbytes = ancillary_data['ROW_BYTES'] columns = [] bytelengths = [] datatypes = [] ancillary_data_offset = find_in_dict( label, "^ANCILLARY_AND_SUPPLEMENT_DATA").value indata.seek(ancillary_data_offset - 1) for i in ancillary_data.items(): if i[0] == 'COLUMN': entry = i[1] # Level 2B2 PVL has entries with 0 bytes, e.g. omitted. if entry['BYTES'] > 0: columns.append(str(entry['NAME'])) datatypes.append(label_dtype_map[entry['DATA_TYPE']]) bytelengths.append(entry['BYTES']) else: ncols -= 1 strbytes = map(str, bytelengths) rowdtype = list( zip(columns, map(''.join, zip(['>'] * ncols, datatypes, strbytes)))) d = np.fromstring(indata.read(rowbytes * nrows), dtype=rowdtype, count=nrows) self.ancillary_data = pd.DataFrame(d, columns=columns, index=np.arange(nrows)) assert (ncols == len(columns)) keys = [] array_offsets = [] for d in ['WAV', 'RAW', 'REF', 'REF1', 'REF2', 'DAR', 'QA']: search_key = '^SP_SPECTRUM_{}'.format(d) result = find_in_dict(label, search_key) if result: array_offsets.append(result.value) keys.append('SP_SPECTRUM_{}'.format(d)) offsets = dict(zip(keys, array_offsets)) arrays = {} for k, offset in offsets.items(): indata.seek(offset - 1) newk = k.split('_')[-1] d = find_in_dict(label, k) unit = d['UNIT'] lines = d['LINES'] scaling_factor = d['SCALING_FACTOR'] arr = np.fromstring(indata.read(lines * 296 * 2), dtype='>H').astype(np.float64) arr = arr.reshape(lines, -1) # If the data is scaled, apply the scaling factor if isinstance(scaling_factor, float): arr *= scaling_factor arrays[newk] = arr self.wavelengths = pd.Series(arrays['WAV'][0]) self.spectra = {} for i in range(nrows): self.spectra[i] = pd.DataFrame(index=self.wavelengths) for k in keys: k = k.split('_')[-1] if k == 'WAV': continue self.spectra[i][k] = arrays[k][i] if cleaned: self.spectra[i] = self.spectra[i][ self.spectra[i]['QA'] < qa_threshold] self.spectra = pd.Panel(self.spectra)
def __init__(self, input_data, cleaned=True, qa_threshold=2000): """ Read the .spc file, parse the label, and extract the spectra Parameters ---------- input_data : string The PATH to the input .spc file cleaned : boolean If True, mask the data based on the QA array. nspectra : int The number of spectra in the given data file qa_threshold : int The threshold value over which observations are masked as noise if cleaned is True. """ label_dtype_map = {'IEEE_REAL':'f', 'MSB_INTEGER':'i', 'MSB_UNSIGNED_INTEGER':'u'} label = pvl.load(input_data) self.label = label with open(input_data, 'rb') as indata: # Extract and handle the ancillary data ancillary_data = find_in_dict(label, "ANCILLARY_AND_SUPPLEMENT_DATA") self.nspectra = nrows = ancillary_data['ROWS'] ncols = ancillary_data['COLUMNS'] rowbytes = ancillary_data['ROW_BYTES'] columns = [] bytelengths = [] datatypes = [] ancillary_data_offset = find_in_dict(label, "^ANCILLARY_AND_SUPPLEMENT_DATA").value indata.seek(ancillary_data_offset - 1) for i in ancillary_data.items(): if i[0] == 'COLUMN': entry = i[1] # Level 2B2 PVL has entries with 0 bytes, e.g. omitted. if entry['BYTES'] > 0: columns.append(str(entry['NAME'])) datatypes.append(label_dtype_map[entry['DATA_TYPE']]) bytelengths.append(entry['BYTES']) else: ncols -= 1 strbytes = map(str, bytelengths) rowdtype = list(zip(columns, map(''.join, zip(['>'] * ncols, datatypes, strbytes)))) d = np.fromstring(indata.read(rowbytes * nrows), dtype=rowdtype, count=nrows) self.ancillary_data = pd.DataFrame(d, columns=columns, index=np.arange(nrows)) assert(ncols == len(columns)) keys = [] array_offsets = [] for d in ['WAV', 'RAW', 'REF', 'REF1', 'REF2', 'DAR', 'QA']: search_key = '^SP_SPECTRUM_{}'.format(d) result = find_in_dict(label, search_key) if result: array_offsets.append(result.value) keys.append('SP_SPECTRUM_{}'.format(d)) offsets = dict(zip(keys, array_offsets)) arrays = {} for k, offset in offsets.items(): indata.seek(offset - 1) newk = k.split('_')[-1] d = find_in_dict(label, k) unit = d['UNIT'] lines = d['LINES'] scaling_factor = d['SCALING_FACTOR'] arr = np.fromstring(indata.read(lines * 296*2), dtype='>H').astype(np.float64) arr = arr.reshape(lines, -1) # If the data is scaled, apply the scaling factor if isinstance(scaling_factor, float): arr *= scaling_factor arrays[newk] = arr self.wavelengths = pd.Series(arrays['WAV'][0]) self.spectra = {} for i in range(nrows): self.spectra[i] = pd.DataFrame(index=self.wavelengths) for k in keys: k = k.split('_')[-1] if k == 'WAV': continue self.spectra[i][k] = arrays[k][i] if cleaned: self.spectra[i] = self.spectra[i][self.spectra[i]['QA'] < qa_threshold] self.spectra = pd.Panel(self.spectra)
def read(self): """ Given an ISIS store, read the underlying ISIS3 compatible control network and return an IsisControlNetwork dataframe. """ pvl_header = pvl.load(self._path) header_start_byte = find_in_dict(pvl_header, 'HeaderStartByte') header_bytes = find_in_dict(pvl_header, 'HeaderBytes') point_start_byte = find_in_dict(pvl_header, 'PointsStartByte') version = find_in_dict(pvl_header, 'Version') if version == 2: self.point_attrs = [ i for i in cnf._CONTROLPOINTFILEENTRYV0002.fields_by_name if i != 'measures' ] self.measure_attrs = [ i for i in cnf._CONTROLPOINTFILEENTRYV0002_MEASURE.fields_by_name ] cp = cnf.ControlPointFileEntryV0002() self._handle.seek(header_start_byte) pbuf_header = cnf.ControlNetFileHeaderV0002() pbuf_header.ParseFromString(self._handle.read(header_bytes)) self._handle.seek(point_start_byte) cp = cnf.ControlPointFileEntryV0002() pts = [] for s in pbuf_header.pointMessageSizes: cp.ParseFromString(self._handle.read(s)) pt = [ getattr(cp, i) for i in self.point_attrs if i != 'measures' ] for measure in cp.measures: meas = pt + [ getattr(measure, j) for j in self.measure_attrs ] pts.append(meas) elif version == 5: self.point_attrs = [ i for i in cnp5._CONTROLPOINTFILEENTRYV0005.fields_by_name if i != 'measures' ] self.measure_attrs = [ i for i in cnp5._CONTROLPOINTFILEENTRYV0005_MEASURE.fields_by_name ] cp = cnp5.ControlPointFileEntryV0005() self._handle.seek(header_start_byte) pbuf_header = cnh5.ControlNetFileHeaderV0005() pbuf_header.ParseFromString(self._handle.read(header_bytes)) self._handle.seek(point_start_byte) cp = cnp5.ControlPointFileEntryV0005() pts = [] byte_count = 0 while byte_count < find_in_dict(pvl_header, 'PointsBytes'): message_size = struct.unpack('I', self._handle.read(4))[0] cp.ParseFromString(self._handle.read(message_size)) pt = [ getattr(cp, i) for i in self.point_attrs if i != 'measures' ] for measure in cp.measures: meas = pt + [ getattr(measure, j) for j in self.measure_attrs ] pts.append(meas) byte_count += 4 + message_size # Some point and measure fields have the same name, so mangle them as point_ and measure_ point_cols = [ self.point_field_map[attr] if attr in self.point_field_map else attr for attr in self.point_attrs ] measure_cols = [ self.measure_field_map[attr] if attr in self.measure_field_map else attr for attr in self.measure_attrs ] cols = point_cols + measure_cols df = IsisControlNetwork(pts, columns=cols) # Convert the (0.5, 0.5) origin pixels back to (0,0) pixels df['line'] -= 0.5 df['sample'] -= 0.5 df.header = pvl_header return df
def __init__(self, input_data, label=None, cleaned=True, qa_threshold=2000): """ Read the .spc file, parse the label, and extract the spectra Parameters ---------- input_data : string The PATH to the input .spc file label : string The PATH to an optional detached label associated with the .spc cleaned : boolean If True, mask the data based on the QA array. nspectra : int The number of spectra in the given data file qa_threshold : int The threshold value over which observations are masked as noise if cleaned is True. """ label_dtype_map = {'IEEE_REAL':'f', 'MSB_INTEGER':'i', 'MSB_UNSIGNED_INTEGER':'u'} if label: label = pvl.load(label) else: label = pvl.load(input_data) self.label = label self.input_data = input_data with open(input_data, 'rb') as indata: # Extract and handle the ancillary data ancillary_data = find_in_dict(label, "ANCILLARY_AND_SUPPLEMENT_DATA") self.nspectra = nrows = ancillary_data['ROWS'] ncols = ancillary_data['COLUMNS'] rowbytes = ancillary_data['ROW_BYTES'] columns = [] bytelengths = [] datatypes = [] try: ancillary_data_offset = find_in_dict(self.label, "^ANCILLARY_AND_SUPPLEMENT_DATA").value except: ancillary_data_offset = find_in_dict(self.label, "^ANCILLARY_AND_SUPPLEMENT_DATA")[1].value indata.seek(ancillary_data_offset - 1) for i in ancillary_data.items(): if i[0] == 'COLUMN': entry = i[1] # Level 2B2 PVL has entries with 0 bytes, e.g. omitted. if entry['BYTES'] > 0: columns.append(str(entry['NAME'])) datatypes.append(label_dtype_map[entry['DATA_TYPE']]) bytelengths.append(entry['BYTES']) else: ncols -= 1 strbytes = map(str, bytelengths) rowdtype = list(zip(columns, map(''.join, zip(['>'] * ncols, datatypes, strbytes)))) d = np.frombuffer(indata.read(rowbytes * nrows), dtype=rowdtype, count=nrows) self.ancillary_data = pd.DataFrame(d, columns=columns, index=np.arange(nrows)) keys = [] vals = [] for k, v in label.items(): if k in ["ANCILLARY_AND_SUPPLEMENT_DATA", "L2D_RESULT_ARRAY", "SP_SPECTRUM_QA", "SP_SPECTRUM_REF1", "SP_SPECTRUM_RAD", "SP_SPECTRUM_REF2", "SP_SPECTRUM_RAW", "SP_SPECTRUM_WAV", "^ANCILLARY_AND_SUPPLEMENT_DATA", "^SP_SPECTRUM_WAV", "^SP_SPECTRUM_RAW", "^SP_SPECTRUM_REF2"," ^SP_SPECTRUM_RAD", "^SP_SPECTRUM_REF1", "^SP_SPECTRUM_QA", "^L2D_RESULT_ARRAY", "^SP_SPECTRUM_RAD"]: continue if isinstance(v, pvl._collections.Units): k = "{}_{}".format(k, v.units) v = v.value keys.append(k) vals.append(v) vals = [vals] * len(self.ancillary_data) new_anc = pd.DataFrame(vals, index=self.ancillary_data.index, columns=keys) self.ancillary_data = self.ancillary_data.join(new_anc, how='inner') assert(ncols == len(columns)) keys = [] array_offsets = [] for d in ['WAV', 'RAW', 'REF', 'REF1', 'REF2', 'DAR', 'QA', 'RAD']: search_key = '^SP_SPECTRUM_{}'.format(d) result = find_in_dict(label, search_key) if result: try: array_offsets.append(result.value) except: array_offsets.append(result[1].value) # 2C V3.0 keys.append('SP_SPECTRUM_{}'.format(d)) offsets = dict(zip(keys, array_offsets)) arrays = {} for k, offset in offsets.items(): indata.seek(offset - 1) newk = k.split('_')[-1] d = find_in_dict(label, k) unit = d['UNIT'] lines = d['LINES'] scaling_factor = d['SCALING_FACTOR'] arr = np.frombuffer(indata.read(lines * 296*2), dtype='>H').astype(np.float64) arr = arr.reshape(lines, -1) # If the data is scaled, apply the scaling factor if isinstance(scaling_factor, float): arr *= scaling_factor arrays[newk] = arr self.wavelengths = pd.Series(arrays['WAV'][0]) self.spectra = {} for i in range(nrows): self.spectra[i] = pd.DataFrame(index=self.wavelengths) for k in keys: k = k.split('_')[-1] if k == 'WAV': continue self.spectra[i][k] = arrays[k][i] if cleaned: mask = self.spectra[i]['QA'] < qa_threshold self.spectra[i] = self.spectra[i][mask] # If the spectra have been cleaned, the wavelength ids also need to be cleaned. if cleaned: self.wavelengths = self.wavelengths[mask.values].values dfs = [v for k, v in self.spectra.items()] self.spectra = pd.concat(dfs, axis=1, keys=range(nrows))
def read(self): """ Given an ISIS store, read the underlying ISIS3 compatible control network and return an IsisControlNetwork dataframe. """ pvl_header = pvl.load(self._path) header_start_byte = find_in_dict(pvl_header, 'HeaderStartByte') header_bytes = find_in_dict(pvl_header, 'HeaderBytes') point_start_byte = find_in_dict(pvl_header, 'PointsStartByte') version = find_in_dict(pvl_header, 'Version') if version == 2: self.point_attrs = [ i for i in cnf._CONTROLPOINTFILEENTRYV0002.fields_by_name if i != 'measures' ] self.measure_attrs = [ i for i in cnf._CONTROLPOINTFILEENTRYV0002_MEASURE.fields_by_name ] cp = cnf.ControlPointFileEntryV0002() self._handle.seek(header_start_byte) pbuf_header = cnf.ControlNetFileHeaderV0002() pbuf_header.ParseFromString(self._handle.read(header_bytes)) self._handle.seek(point_start_byte) cp = cnf.ControlPointFileEntryV0002() pts = [] for s in pbuf_header.pointMessageSizes: cp.ParseFromString(self._handle.read(s)) pt = [ getattr(cp, i) for i in self.point_attrs if i != 'measures' ] for measure in cp.measures: meas = pt + [ getattr(measure, j) for j in self.measure_attrs ] pts.append(meas) elif version == 5: self.point_attrs = [ i for i in cnp5._CONTROLPOINTFILEENTRYV0005.fields_by_name if i != 'measures' ] self.measure_attrs = [ i for i in cnp5._CONTROLPOINTFILEENTRYV0005_MEASURE.fields_by_name ] cp = cnp5.ControlPointFileEntryV0005() self._handle.seek(header_start_byte) pbuf_header = cnh5.ControlNetFileHeaderV0005() pbuf_header.ParseFromString(self._handle.read(header_bytes)) self._handle.seek(point_start_byte) cp = cnp5.ControlPointFileEntryV0005() pts = [] byte_count = 0 while byte_count < find_in_dict(pvl_header, 'PointsBytes'): message_size = struct.unpack('I', self._handle.read(4))[0] cp.ParseFromString(self._handle.read(message_size)) pt = [ getattr(cp, i) for i in self.point_attrs if i != 'measures' ] for measure in cp.measures: meas = pt + [ getattr(measure, j) for j in self.measure_attrs ] pts.append(meas) byte_count += 4 + message_size self.point_attrs = [ i if i != 'jigsawRejected' else 'pointJigsawRejected' for i in self.point_attrs ] cols = self.point_attrs + self.measure_attrs cols = self.point_attrs + self.measure_attrs df = IsisControlNetwork(pts, columns=cols) df.header = pvl_header return df
def test_cnet_read(apollo_cnet): df = io_controlnetwork.from_isis(apollo_cnet) assert len(df) == find_in_dict(df.header, 'NumberOfMeasures') assert isinstance(df, io_controlnetwork.IsisControlNetwork) assert len(df.groupby('id')) == find_in_dict(df.header, 'NumberOfPoints')
def __init__(self, input_data, label=None, cleaned=True, qa_threshold=2000): """ Read the .spc file, parse the label, and extract the spectra Parameters ---------- input_data : string The PATH to the input .spc file label : string The PATH to an optional detached label associated with the .spc cleaned : boolean If True, mask the data based on the QA array. nspectra : int The number of spectra in the given data file qa_threshold : int The threshold value over which observations are masked as noise if cleaned is True. """ label_dtype_map = { 'IEEE_REAL': 'f', 'MSB_INTEGER': 'i', 'MSB_UNSIGNED_INTEGER': 'u' } if label: label = pvl.load(label) else: label = pvl.load(input_data) self.label = label self.input_data = input_data with open(input_data, 'rb') as indata: # Extract and handle the ancillary data ancillary_data = find_in_dict(label, "ANCILLARY_AND_SUPPLEMENT_DATA") self.nspectra = nrows = ancillary_data['ROWS'] ncols = ancillary_data['COLUMNS'] rowbytes = ancillary_data['ROW_BYTES'] columns = [] bytelengths = [] datatypes = [] try: ancillary_data_offset = find_in_dict( self.label, "^ANCILLARY_AND_SUPPLEMENT_DATA").value except: ancillary_data_offset = find_in_dict( self.label, "^ANCILLARY_AND_SUPPLEMENT_DATA")[1].value indata.seek(ancillary_data_offset - 1) for i in ancillary_data.items(): if i[0] == 'COLUMN': entry = i[1] # Level 2B2 PVL has entries with 0 bytes, e.g. omitted. if entry['BYTES'] > 0: columns.append(str(entry['NAME'])) datatypes.append(label_dtype_map[entry['DATA_TYPE']]) bytelengths.append(entry['BYTES']) else: ncols -= 1 strbytes = map(str, bytelengths) rowdtype = list( zip(columns, map(''.join, zip(['>'] * ncols, datatypes, strbytes)))) d = np.frombuffer(indata.read(rowbytes * nrows), dtype=rowdtype, count=nrows) self.ancillary_data = pd.DataFrame(d, columns=columns, index=np.arange(nrows)) keys = [] vals = [] for k, v in label.items(): if k in [ "ANCILLARY_AND_SUPPLEMENT_DATA", "L2D_RESULT_ARRAY", "SP_SPECTRUM_QA", "SP_SPECTRUM_REF1", "SP_SPECTRUM_RAD", "SP_SPECTRUM_REF2", "SP_SPECTRUM_RAW", "SP_SPECTRUM_WAV", "^ANCILLARY_AND_SUPPLEMENT_DATA", "^SP_SPECTRUM_WAV", "^SP_SPECTRUM_RAW", "^SP_SPECTRUM_REF2", " ^SP_SPECTRUM_RAD", "^SP_SPECTRUM_REF1", "^SP_SPECTRUM_QA", "^L2D_RESULT_ARRAY", "^SP_SPECTRUM_RAD" ]: continue if isinstance(v, pvl.collections.Quantity): k = "{}_{}".format(k, v.units) v = v.value keys.append(k) vals.append(v) vals = [vals] * len(self.ancillary_data) new_anc = pd.DataFrame(vals, index=self.ancillary_data.index, columns=keys) self.ancillary_data = self.ancillary_data.join(new_anc, how='inner') assert (ncols == len(columns)) keys = [] array_offsets = [] for d in ['WAV', 'RAW', 'REF', 'REF1', 'REF2', 'DAR', 'QA', 'RAD']: search_key = '^SP_SPECTRUM_{}'.format(d) result = find_in_dict(label, search_key) if result: try: array_offsets.append(result.value) except: array_offsets.append(result[1].value) # 2C V3.0 keys.append('SP_SPECTRUM_{}'.format(d)) offsets = dict(zip(keys, array_offsets)) arrays = {} for k, offset in offsets.items(): indata.seek(offset - 1) newk = k.split('_')[-1] d = find_in_dict(label, k) unit = d['UNIT'] lines = d['LINES'] scaling_factor = d['SCALING_FACTOR'] arr = np.frombuffer(indata.read(lines * 296 * 2), dtype='>H').astype(np.float64) arr = arr.reshape(lines, -1) # If the data is scaled, apply the scaling factor if isinstance(scaling_factor, float): arr *= scaling_factor arrays[newk] = arr self.wavelengths = pd.Series(arrays['WAV'][0]) self.spectra = {} for i in range(nrows): self.spectra[i] = pd.DataFrame(index=self.wavelengths) for k in keys: k = k.split('_')[-1] if k == 'WAV': continue self.spectra[i][k] = arrays[k][i] if cleaned: mask = self.spectra[i]['QA'] < qa_threshold self.spectra[i] = self.spectra[i][mask] # If the spectra have been cleaned, the wavelength ids also need to be cleaned. if cleaned: self.wavelengths = self.wavelengths[mask.values].values dfs = [v for k, v in self.spectra.items()] self.spectra = pd.concat(dfs, axis=1, keys=range(nrows))