def test_merge_masks(self): self.assertRaises(IndexError, merge_masks, []) self.assertEqual( merge_masks(np.array([[0, 0, 0]], dtype=np.bool_)).tolist(), False) self.assertEqual( merge_masks(np.array([[0, 1, 0]], dtype=np.bool_)).tolist(), [0, 1, 0]) self.assertEqual( merge_masks(np.array([[1, 1, 1]], dtype=np.bool_)).tolist(), [1, 1, 1]) self.assertEqual( merge_masks(np.array([[0, 0, 0], [0, 0, 0]], dtype=np.bool_)).tolist(), False) self.assertEqual( merge_masks(np.array([[0, 1, 0], [0, 1, 0]], dtype=np.bool_)).tolist(), [0, 1, 0]) self.assertEqual( merge_masks(np.array([[0, 1, 0], [0, 1, 1]], dtype=np.bool_)).tolist(), [0, 1, 1]) self.assertEqual( merge_masks(np.array([[1, 1, 1], [1, 1, 1]], dtype=np.bool_)).tolist(), [1, 1, 1]) self.assertEqual( merge_masks( np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.bool_)).tolist(), [1, 1, 1])
def combine_submasks(self): ''' Combine submasks into a single OR'd mask. :returns: Combined submask. :rtype: np.array ''' if self.submasks: return merge_masks(list(self.submasks.values())) else: return self.array.mask
def combine_submasks(self): ''' Combine submasks into a single OR'd mask. :returns: Combined submask. :rtype: np.array ''' if self.submasks: return merge_masks(list(self.submasks.values())) else: return self.array.mask
def test_merge_masks(self): self.assertRaises(IndexError, merge_masks, []) self.assertEqual( merge_masks(np.array([[0,0,0]], dtype=np.bool_)).tolist(), False) self.assertEqual( merge_masks(np.array([[0,1,0]], dtype=np.bool_)).tolist(), [0,1,0]) self.assertEqual( merge_masks(np.array([[1,1,1]], dtype=np.bool_)).tolist(), [1,1,1]) self.assertEqual( merge_masks(np.array([[0,0,0], [0,0,0]], dtype=np.bool_)).tolist(), False) self.assertEqual( merge_masks(np.array([[0,1,0], [0,1,0]], dtype=np.bool_)).tolist(), [0,1,0]) self.assertEqual( merge_masks(np.array([[0,1,0], [0,1,1]], dtype=np.bool_)).tolist(), [0,1,1]) self.assertEqual( merge_masks(np.array([[1,1,1], [1,1,1]], dtype=np.bool_)).tolist(), [1,1,1]) self.assertEqual( merge_masks(np.array([[1,0,0], [0,1,0], [0,0,1]], dtype=np.bool_)).tolist(), [1,1,1])
def write_segment(source, segment, dest, boundary, submasks=None): ''' Writes a segment of the HDF file stored in hdf_path to dest defined by segments, a slice in seconds. Expects the HDF file to contain whole superframes. Assumes "data" and "mask" are present. The source file used to be copied to the destination and then modified the file inplace. Since it is impossible to fully reclaim the space of deleted datasets, we now create a new hdf file and copy groups, attributes and parameters into it resulting in smaller segment sizes. :param hdf_path: file path of hdf file. :type hdf_path: str :param segment: segment of flight to write in seconds. step is disregarded. :type segment: slice :param dest: destination path for output file containing segment. :type dest: str :param supf_boundary: Split on superframe boundaries, masking data outside of the segment. :type supf_boundary: bool :param submasks: Collection of submask names to write. The default value of None writes all submasks, while an empty collection will result in no submasks being written. :type submasks: collection (tuple/list/set) or None :return: path to output hdf file containing specified segment. :rtype: str TODO: Support segmenting parameter masks. Q: Does this mean copying the mask along with data? If so, this is already done. ''' if os.path.isfile(dest): logging.warning( "File '%s' already exists, write_segments will delete file.", dest) os.remove(dest) supf_start_secs, supf_stop_secs, array_start_secs, array_stop_secs = segment_boundaries( segment, boundary) if supf_start_secs == 0 and supf_stop_secs is None: logging.debug( "Write Segment: Segment is not being sliced, file will be copied.") shutil.copy(source, dest) return dest with hdf_file(source) as source_hdf: if supf_stop_secs is None: supf_stop_secs = source_hdf.duration segment_duration = supf_stop_secs - supf_start_secs if source_hdf.duration == segment_duration: logging.debug("Write Segment: Segment duration is equal to whole " "duration, file will be copied.") shutil.copy(source, dest) return dest with hdf_file(dest, create=True) as dest_hdf: logging.debug("Write Segment: Duration %.2fs to be written to %s", segment_duration, dest) for group_name in source_hdf.hdf.keys(): # Copy top-level groups. if group_name == 'series': continue # Avoid copying parameter datasets. source_hdf.hdf.copy(group_name, dest_hdf.hdf) logging.debug("Copied group '%s' between '%s' and '%s'.", group_name, source, dest) _copy_attrs(source_hdf.hdf, dest_hdf.hdf) # Copy top-level attrs. #Q: Could this be too short if we change the start and stop a bit further down??? dest_hdf.duration = segment_duration # Overwrite duration. supf_slice = slice(supf_start_secs, supf_stop_secs) for param_name in source_hdf.keys(): #Q: Why not always pad masked values to the next superframe param = source_hdf.get_param(param_name, _slice=supf_slice, load_submasks=True) if ((param.hz * 64) % 1) != 0: raise ValueError( "Parameter '%s' does not record a consistent number of " "values every superframe. Check the LFL definition." % param_name) if submasks is not None and param.submasks: # if param does not have submasks, or no submasks match, # write the original mask mask_subset = { k: v for k, v in param.submasks.items() if k in submasks } if mask_subset and len(param.submasks) != len(mask_subset): submask_arrays = list(six.itervalues(mask_subset)) if 'padding' in submasks and 'padding' not in param.submasks: # padding submask from initial processing does not exist # in this case include the original array mask submask_arrays += [param.array.mask] param.array.mask = merge_masks(submask_arrays) param.submasks = mask_subset param.array = param.raw_array param_start_index = int(array_start_secs * param.hz) param_stop_index = int( len(param.array) - (array_stop_secs * param.hz)) array_size = int(segment_duration * param.hz) if param.array.size < array_size: # There's not enough data in the input # The input data was not aligned to 4s or 64s # we need to pad the arrays to have the expected number of # samples param_stop_index = param.array.size padding_size = array_size - param_stop_index param.array = np.ma.concatenate( (param.array, np.ma.zeros(padding_size, dtype=param.array.dtype))) for sub_name, submask in param.submasks.items(): param.submasks[sub_name] = np.ma.concatenate( (submask, np.ma.ones(padding_size, dtype=submask.dtype))) # Mask data outside of split. param.array[:param_start_index] = np.ma.masked param.array[param_stop_index:] = np.ma.masked for submask in param.submasks.values(): submask[:param_start_index] = True submask[param_stop_index:] = True # save modified parameter back to file dest_hdf[param_name] = param #logging.debug("Finished writing segment: %s", dest_hdf) return dest
def write_segment(source, segment, dest, boundary, submasks=None): ''' Writes a segment of the HDF file stored in hdf_path to dest defined by segments, a slice in seconds. Expects the HDF file to contain whole superframes. Assumes "data" and "mask" are present. The source file used to be copied to the destination and then modified the file inplace. Since it is impossible to fully reclaim the space of deleted datasets, we now create a new hdf file and copy groups, attributes and parameters into it resulting in smaller segment sizes. :param hdf_path: file path of hdf file. :type hdf_path: str :param segment: segment of flight to write in seconds. step is disregarded. :type segment: slice :param dest: destination path for output file containing segment. :type dest: str :param supf_boundary: Split on superframe boundaries, masking data outside of the segment. :type supf_boundary: bool :param submasks: Collection of submask names to write. The default value of None writes all submasks, while an empty collection will result in no submasks being written. :type submasks: collection (tuple/list/set) or None :return: path to output hdf file containing specified segment. :rtype: str TODO: Support segmenting parameter masks. Q: Does this mean copying the mask along with data? If so, this is already done. ''' if os.path.isfile(dest): logging.warning( "File '%s' already exists, write_segments will delete file.", dest) os.remove(dest) supf_start_secs, supf_stop_secs, array_start_secs, array_stop_secs = segment_boundaries(segment, boundary) if supf_start_secs == 0 and supf_stop_secs is None: logging.debug("Write Segment: Segment is not being sliced, file will be copied.") shutil.copy(source, dest) return dest with hdf_file(source) as source_hdf: if supf_stop_secs is None: supf_stop_secs = source_hdf.duration segment_duration = supf_stop_secs - supf_start_secs if source_hdf.duration == segment_duration: logging.debug("Write Segment: Segment duration is equal to whole " "duration, file will be copied.") shutil.copy(source, dest) return dest with hdf_file(dest, create=True) as dest_hdf: logging.debug("Write Segment: Duration %.2fs to be written to %s", segment_duration, dest) for group_name in source_hdf.hdf.keys(): # Copy top-level groups. if group_name == 'series': continue # Avoid copying parameter datasets. source_hdf.hdf.copy(group_name, dest_hdf.hdf) logging.debug("Copied group '%s' between '%s' and '%s'.", group_name, source, dest) _copy_attrs(source_hdf.hdf, dest_hdf.hdf) # Copy top-level attrs. #Q: Could this be too short if we change the start and stop a bit further down??? dest_hdf.duration = segment_duration # Overwrite duration. supf_slice = slice(supf_start_secs, supf_stop_secs) for param_name in source_hdf.keys(): #Q: Why not always pad masked values to the next superframe param = source_hdf.get_param( param_name, _slice=supf_slice, load_submasks=True) if ((param.hz * 64) % 1) != 0: raise ValueError( "Parameter '%s' does not record a consistent number of " "values every superframe. Check the LFL definition." % param_name) if submasks is not None and param.submasks: # if param does not have submasks, or no submasks match, # write the original mask mask_subset = {k: v for k, v in param.submasks.items() if k in submasks} if mask_subset and len(param.submasks) != len(mask_subset): submask_arrays = list(six.itervalues(mask_subset)) if 'padding' in submasks and 'padding' not in param.submasks: # padding submask from initial processing does not exist # in this case include the original array mask submask_arrays += [param.array.mask] param.array.mask = merge_masks(submask_arrays) param.submasks = mask_subset param.array = param.raw_array param_start_index = int(array_start_secs * param.hz) param_stop_index = int(len(param.array) - (array_stop_secs * param.hz)) array_size = int(segment_duration * param.hz) if param.array.size < array_size: # There's not enough data in the input # The input data was not aligned to 4s or 64s # we need to pad the arrays to have the expected number of # samples param_stop_index = param.array.size padding_size = array_size - param_stop_index param.array = np.ma.concatenate(( param.array, np.ma.zeros(padding_size, dtype=param.array.dtype))) for sub_name, submask in param.submasks.items(): param.submasks[sub_name] = np.ma.concatenate(( submask, np.ma.ones(padding_size, dtype=submask.dtype))) # Mask data outside of split. param.array[:param_start_index] = np.ma.masked param.array[param_stop_index:] = np.ma.masked for submask in param.submasks.values(): submask[:param_start_index] = True submask[param_stop_index:] = True # save modified parameter back to file dest_hdf[param_name] = param #logging.debug("Finished writing segment: %s", dest_hdf) return dest