def test_write_segment__all_data(self):
        '''
        Tests that the correct segment of the dataset within the path matching
        'series/<Param Name>/data' defined by the slice has been written to the
        destination file while other datasets and attributes are unaffected.
        Slice has a start and stop.
        '''
        def test_hdf(dest):
            with h5py.File(dest, 'r') as hdf_file:
                # 'IVV' - 1Hz parameter.
                ivv_group = hdf_file['series']['IVV']
                self.assertEqual(ivv_group.attrs['frequency'],
                                 self.ivv_frequency)
                self.assertEqual(ivv_group.attrs['supf_offset'],
                                 self.ivv_supf_offset)
                ivv_result = ivv_group['data'][:]
                self.assertTrue(all(ivv_result == self.ivv_data))
                # 'WOW' - 4Hz parameter.
                wow_group = hdf_file['series']['WOW']
                self.assertEqual(wow_group.attrs['frequency'],
                                 self.wow_frequency)
                wow_result = wow_group['data'][:]
                self.assertTrue(all(wow_result == self.wow_data))
                # 'DME' - 0.25Hz parameter.
                dme_group = hdf_file['series']['DME']
                self.assertEqual(dme_group.attrs['frequency'],
                                 self.dme_frequency)
                dme_result = dme_group['data'][:]
                self.assertTrue(all(dme_result == self.dme_data))
                # Test mask is written.
                dme_mask_result = dme_group['mask'][:]
                self.assertTrue(all(dme_mask_result == self.dme_mask))
                self.assertEqual(hdf_file.attrs['duration'], self.data_secs)

        segment = slice(None)
        dest = write_segment(self.hdf_path, segment, self.out_path, boundary=4)
        self.assertEqual(dest, self.out_path)
        test_hdf(dest)
        dest = write_segment(self.hdf_path,
                             segment,
                             self.out_path,
                             boundary=64)
        self.assertEqual(dest, self.out_path)
        test_hdf(dest)
    def test_write_segment__all_data(self):
        '''
        Tests that the correct segment of the dataset within the path matching
        'series/<Param Name>/data' defined by the slice has been written to the
        destination file while other datasets and attributes are unaffected.
        Slice has a start and stop.
        '''
        def test_hdf(dest):
            with h5py.File(dest, 'r') as hdf_file:
                # 'IVV' - 1Hz parameter.
                ivv_group = hdf_file['series']['IVV']
                self.assertEqual(ivv_group.attrs['frequency'],
                                 self.ivv_frequency)
                self.assertEqual(ivv_group.attrs['supf_offset'],
                                 self.ivv_supf_offset)
                ivv_result = ivv_group['data'][:]
                self.assertTrue(all(ivv_result == self.ivv_data))
                # 'WOW' - 4Hz parameter.
                wow_group = hdf_file['series']['WOW']
                self.assertEqual(wow_group.attrs['frequency'],
                                 self.wow_frequency)
                wow_result = wow_group['data'][:]
                self.assertTrue(all(wow_result == self.wow_data))
                # 'DME' - 0.25Hz parameter.
                dme_group = hdf_file['series']['DME']
                self.assertEqual(dme_group.attrs['frequency'],
                                 self.dme_frequency)
                dme_result = dme_group['data'][:]
                self.assertTrue(all(dme_result == self.dme_data))
                # Test mask is written.
                dme_mask_result = dme_group['mask'][:]
                self.assertTrue(all(dme_mask_result == self.dme_mask))
                self.assertEqual(hdf_file.attrs['duration'], self.data_secs)

        segment = slice(None)
        dest = write_segment(self.hdf_path, segment, self.out_path,
                             supf_boundary=False)
        self.assertEqual(dest, self.out_path)
        test_hdf(dest)
        dest = write_segment(self.hdf_path, segment, self.out_path,
                             supf_boundary=True)
        self.assertEqual(dest, self.out_path)
        test_hdf(dest)
 def test_write_segment__start_only(self):
     '''
     Tests that the correct segment of the dataset within the path matching
     'series/<Param Name>/data' defined by the slice has been written to the
     destination file while other datasets and attributes are unaffected.
     Slice has a start and stop.
     '''
     segment = slice(50, None)
     frame_start = 48  # 48 is nearest frame boundary rounded down
     dest = write_segment(self.hdf_path, segment, self.out_path,
                          boundary=4)
     self.assertEqual(dest, self.out_path)
     
     with h5py.File(dest, 'r') as hdf_file:
         # 'IVV' - 1Hz parameter.
         ivv_group = hdf_file['series']['IVV']
         self.assertEqual(ivv_group.attrs['frequency'],
                          self.ivv_frequency)
         self.assertEqual(ivv_group.attrs['supf_offset'],
                          self.ivv_supf_offset)
         ivv_result = np.ma.masked_array(ivv_group['data'][:],
                                         mask=ivv_group['mask'][:])
         ivv_expected_result = np.arange(
             frame_start * self.ivv_frequency,
             self.data_secs * self.ivv_frequency,
             dtype=np.dtype(np.float))
         ivv_expected_result = np.ma.masked_array(
             ivv_expected_result, mask=[True] * 2 + [False] * 78)
         print ivv_result
         print ivv_expected_result
         self.assertEqual(ivv_result.tolist(), ivv_expected_result.tolist())
         # 'WOW' - 4Hz parameter.
         wow_group = hdf_file['series']['WOW']
         self.assertEqual(wow_group.attrs['frequency'],
                          self.wow_frequency)
         wow_result = wow_group['data'][:]
         wow_expected_result = np.arange(
             frame_start * self.wow_frequency,
             self.data_secs * self.wow_frequency,
             dtype=np.dtype(np.float))
         self.assertEqual(wow_result.tolist(), wow_expected_result.tolist())
         # 'DME' - 0.25Hz parameter.
         dme_group = hdf_file['series']['DME']
         self.assertEqual(dme_group.attrs['frequency'],
                          self.dme_frequency)
         dme_result = dme_group['data'][:]
         dme_expected_result = np.arange(12, 32, dtype=np.dtype(np.float))
         self.assertEqual(dme_result.tolist(), dme_expected_result.tolist())
         self.assertEqual(hdf_file.attrs['duration'], 80)
Example #4
0
 def test_write_segment__start_only(self):
     '''
     Tests that the correct segment of the dataset within the path matching
     'series/<Param Name>/data' defined by the slice has been written to the
     destination file while other datasets and attributes are unaffected.
     Slice has a start and stop.
     '''
     segment = slice(50, None)
     frame_start = 48  # 48 is nearest frame boundary rounded down
     dest = write_segment(self.hdf_path, segment, self.out_path,
                          boundary=4)
     self.assertEqual(dest, self.out_path)
     
     with h5py.File(dest, 'r') as hdf_file:
         # 'IVV' - 1Hz parameter.
         ivv_group = hdf_file['series']['IVV']
         self.assertEqual(ivv_group.attrs['frequency'],
                          self.ivv_frequency)
         self.assertEqual(ivv_group.attrs['supf_offset'],
                          self.ivv_supf_offset)
         ivv_result = np.ma.masked_array(ivv_group['data'][:],
                                         mask=ivv_group['mask'][:])
         ivv_expected_result = np.arange(
             frame_start * self.ivv_frequency,
             self.data_secs * self.ivv_frequency,
             dtype=np.float)
         ivv_expected_result = np.ma.masked_array(
             ivv_expected_result, mask=[True] * 2 + [False] * 78)
         self.assertEqual(ivv_result.tolist(), ivv_expected_result.tolist())
         # 'WOW' - 4Hz parameter.
         wow_group = hdf_file['series']['WOW']
         self.assertEqual(wow_group.attrs['frequency'],
                          self.wow_frequency)
         wow_result = wow_group['data'][:]
         wow_expected_result = np.arange(
             frame_start * self.wow_frequency,
             self.data_secs * self.wow_frequency,
             dtype=np.float)
         self.assertEqual(wow_result.tolist(), wow_expected_result.tolist())
         # 'DME' - 0.25Hz parameter.
         dme_group = hdf_file['series']['DME']
         self.assertEqual(dme_group.attrs['frequency'],
                          self.dme_frequency)
         dme_result = dme_group['data'][:]
         dme_expected_result = np.arange(12, 32, dtype=np.float)
         self.assertEqual(dme_result.tolist(), dme_expected_result.tolist())
         self.assertEqual(hdf_file.attrs['duration'], 80)
 def test_write_segment__start_only(self):
     '''
     Tests that the correct segment of the dataset within the path matching
     'series/<Param Name>/data' defined by the slice has been written to the
     destination file while other datasets and attributes are unaffected.
     Slice has a start and stop.
     '''
     segment = slice(50, None)
     dest = write_segment(self.hdf_path, segment, self.out_path,
                          supf_boundary=False)
     self.assertEqual(dest, self.out_path)
     
     frame_boundary_segment = slice(48, None)
     
     with h5py.File(dest, 'r') as hdf_file:
         # 'IVV' - 1Hz parameter.
         ivv_group = hdf_file['series']['IVV']
         self.assertEqual(ivv_group.attrs['frequency'],
                          self.ivv_frequency)
         self.assertEqual(ivv_group.attrs['supf_offset'],
                          self.ivv_supf_offset)
         ivv_result = ivv_group['data'][:]
         ivv_expected_result = np.arange(
             frame_boundary_segment.start * self.ivv_frequency,
             self.data_secs * self.ivv_frequency,
             dtype=np.dtype(np.float))
         self.assertTrue(all(ivv_result == ivv_expected_result))
         # 'WOW' - 4Hz parameter.
         wow_group = hdf_file['series']['WOW']
         self.assertEqual(wow_group.attrs['frequency'],
                          self.wow_frequency)
         wow_result = wow_group['data'][:]
         wow_expected_result = np.arange(
             frame_boundary_segment.start * self.wow_frequency,
             self.data_secs * self.wow_frequency,
             dtype=np.dtype(np.float))
         self.assertTrue(all(wow_result == wow_expected_result))
         # 'DME' - 0.25Hz parameter.
         dme_group = hdf_file['series']['DME']
         self.assertEqual(dme_group.attrs['frequency'],
                          self.dme_frequency)
         dme_result = dme_group['data'][:]
         dme_expected_result = np.arange(12, 32, dtype=np.dtype(np.float))
         self.assertTrue(all(dme_result == dme_expected_result))
         self.assertEqual(hdf_file.attrs['duration'], 80)
Example #6
0
 def test_write_segment__stop_only(self):
     '''
     Tests that the correct segment of the dataset within the path matching
     'series/<Param Name>/data' defined by the slice has been written to the
     destination file while other datasets and attributes are unaffected.
     Slice has a start and stop.
     '''
     segment = slice(None, 70)
     dest = write_segment(self.hdf_path, segment, self.out_path,
                          boundary=4)
     self.assertEqual(dest, self.out_path)
     
     frame_boundary_segment = slice(None, 72)
     
     with h5py.File(dest, 'r') as hdf_file:
         # 'IVV' - 1Hz parameter.
         ivv_group = hdf_file['series']['IVV']
         self.assertEqual(ivv_group.attrs['frequency'],
                          self.ivv_frequency)
         self.assertEqual(ivv_group.attrs['supf_offset'],
                          self.ivv_supf_offset)
         ivv_result = ivv_group['data'][:]
         ivv_expected_result = np.arange(
             0, frame_boundary_segment.stop * self.ivv_frequency,
             dtype=np.float)
         self.assertTrue(all(ivv_result == ivv_expected_result))
         # 'WOW' - 4Hz parameter.
         wow_group = hdf_file['series']['WOW']
         self.assertEqual(wow_group.attrs['frequency'],
                          self.wow_frequency)
         wow_result = wow_group['data'][:]
         wow_expected_result = np.arange(
             0, frame_boundary_segment.stop * self.wow_frequency,
             dtype=np.float)
         self.assertTrue(list(wow_result), list(wow_expected_result))
         # 'DME' - 0.25Hz parameter.
         dme_group = hdf_file['series']['DME']
         self.assertEqual(dme_group.attrs['frequency'],
                          self.dme_frequency)
         dme_result = dme_group['data'][:]
         dme_expected_result = np.arange(0, 18, dtype=np.float)
         self.assertEqual(list(dme_result), list(dme_expected_result))
         self.assertEqual(hdf_file.attrs['duration'], 72)
def split_hdf_to_segments(hdf_path, aircraft_info, fallback_dt=None, 
                          fallback_relative_to_start=True,
                          draw=False, dest_dir=None):
    """
    Main method - analyses an HDF file for flight segments and splits each
    flight into a new segment appropriately.

    :param hdf_path: path to HDF file
    :type hdf_path: string
    :param aircraft_info: Information which identify the aircraft, specfically
        with the keys 'Tail Number', 'MSN'...
    :type aircraft_info: Dict
    :param fallback_dt: A datetime which is as close to the end of the data
        file as possible. Used to replace elements of datetimes which are not
        available in the hdf file (e.g. YEAR not being recorded)
    :type fallback_dt: datetime
    :param draw: Whether to use matplotlib to plot the flight
    :type draw: Boolean
    :param dest_dir: Destination directory, if None, the source file directory
        is used
    :type dest_dir: str
    :returns: List of Segments
    :rtype: List of Segment recordtypes ('slice type part duration path hash')
    """
    logger.info("Processing file: %s", hdf_path)

    if dest_dir is None:
        dest_dir = os.path.dirname(hdf_path)

    if draw:
        from analysis_engine.plot_flight import plot_essential
        plot_essential(hdf_path)

    with hdf_file(hdf_path) as hdf:
        superframe_present = hdf.superframe_present

        # Confirm aircraft tail for the entire datafile
        logger.info("Validating aircraft matches that recorded in data")
        validate_aircraft(aircraft_info, hdf)

        # now we know the Aircraft is correct, go and do the PRE FILE ANALYSIS
        if hooks.PRE_FILE_ANALYSIS:
            logger.info("Performing PRE_FILE_ANALYSIS analysis: %s",
                        hooks.PRE_FILE_ANALYSIS.func_name)
            hooks.PRE_FILE_ANALYSIS(hdf, aircraft_info)
        else:
            logger.info("No PRE_FILE_ANALYSIS actions to perform")


        fallback_dt = calculate_fallback_dt(hdf, fallback_dt, fallback_relative_to_start)
        segment_tuples = split_segments(hdf)

    # process each segment (into a new file) having closed original hdf_path
    segments = []
    previous_stop_dt = None
    for part, (segment_type, segment_slice) in enumerate(segment_tuples,
                                                         start=1):
        # write segment to new split file (.001)
        basename = os.path.basename(hdf_path)
        dest_basename = os.path.splitext(basename)[0] + '.%03d.hdf5' % part
        dest_path = os.path.join(dest_dir, dest_basename)
        logger.debug("Writing segment %d: %s", part, dest_path)

        # ARINC 717 data has frames or superframes. ARINC 767 will be split
        # on a minimum boundary of 4 seconds for the analyser.
        boundary = 64 if superframe_present else 4

        write_segment(hdf_path, segment_slice, dest_path,
                      boundary=boundary)
        segment = append_segment_info(dest_path, segment_type, segment_slice,
                                      part, fallback_dt=fallback_dt)

        if previous_stop_dt and segment.start_dt < previous_stop_dt:
            # In theory, this should not happen - but be warned of superframe
            # padding?
            logger.warning(
                "Segment start_dt '%s' comes before the previous segment "
                "ended '%s'", segment.start_dt, previous_stop_dt)
        previous_stop_dt = segment.stop_dt

        if fallback_dt:
            # move the fallback_dt on to be relative to start of next segment
            fallback_dt += segment.stop_dt - segment.start_dt  # plus a small gap between flights
        segments.append(segment)
        if draw:
            plot_essential(dest_path)

    if draw:
        # show all figures together
        from matplotlib.pyplot import show
        show()
        #close('all') # closes all figures

    return segments
def split_hdf_to_segments(hdf_path,
                          aircraft_info,
                          fallback_dt=None,
                          validation_dt=None,
                          fallback_relative_to_start=True,
                          draw=False,
                          dest_dir=None,
                          pre_file_kwargs={}):
    """
    Main method - analyses an HDF file for flight segments and splits each
    flight into a new segment appropriately.

    :param hdf_path: path to HDF file
    :type hdf_path: string
    :param aircraft_info: Information which identify the aircraft, specfically
        with the keys 'Tail Number', 'MSN'...
    :type aircraft_info: Dict
    :param fallback_dt: A datetime which is as close to the end of the data
        file as possible. Used to replace elements of datetimes which are not
        available in the hdf file (e.g. YEAR not being recorded)
    :type fallback_dt: datetime
    :param draw: Whether to use matplotlib to plot the flight
    :type draw: Boolean
    :param dest_dir: Destination directory, if None, the source file directory
        is used
    :type dest_dir: str
    :param pre_file_kwargs: Pre-file analysis keyword arguments.
    :type pre_file_kwargs: dict
    :returns: List of Segments
    :rtype: List of Segment recordtypes ('slice type part duration path hash')
    """
    logger.debug("Processing file: %s", hdf_path)

    if dest_dir is None:
        dest_dir = os.path.dirname(hdf_path)

    if draw:
        from analysis_engine.plot_flight import plot_essential
        plot_essential(hdf_path)

    with hdf_file(hdf_path) as hdf:

        # Confirm aircraft tail for the entire datafile
        logger.debug("Validating aircraft matches that recorded in data")
        validate_aircraft(aircraft_info, hdf)

        # now we know the Aircraft is correct, go and do the PRE FILE ANALYSIS
        hook = hooks.PRE_FILE_ANALYSIS
        if hook:
            logger.debug(
                "Performing PRE_FILE_ANALYSIS action '%s' with options: %s",
                getattr(hook, 'func_name', getattr(hook, '__name__')),
                pre_file_kwargs)
            hook(hdf, aircraft_info, **pre_file_kwargs)
        else:
            logger.info("No PRE_FILE_ANALYSIS actions to perform")

        # ARINC 717 data has frames or superframes. ARINC 767 will be split
        # on a minimum boundary of 4 seconds for the analyser.
        boundary = 64 if hdf.superframe_present else 4

        segment_tuples = split_segments(hdf, aircraft_info)
        frame_doubled = aircraft_info.get('Frame Doubled', False)

        fallback_dt = calculate_fallback_dt(hdf, fallback_dt, validation_dt,
                                            fallback_relative_to_start,
                                            frame_doubled)

    # process each segment (into a new file) having closed original hdf_path
    segments = []
    previous_stop_dt = None
    for part, (segment_type, segment_slice,
               start_padding) in enumerate(segment_tuples, start=1):
        # write segment to new split file (.001)
        basename = os.path.basename(hdf_path)
        dest_basename = os.path.splitext(basename)[0] + '.%03d.hdf5' % part
        dest_path = os.path.join(dest_dir, dest_basename)
        logger.debug("Writing segment %d: %s", part, dest_path)

        write_segment(hdf_path,
                      segment_slice,
                      dest_path,
                      boundary,
                      submasks=('arinc', 'invalid_states', 'padding',
                                'saturation'))

        # adjust fallback time to account for any padding added at start of segment
        segment_start_dt = fallback_dt - timedelta(seconds=start_padding)

        segment = append_segment_info(dest_path,
                                      segment_type,
                                      segment_slice,
                                      part,
                                      fallback_dt=segment_start_dt,
                                      validation_dt=validation_dt,
                                      aircraft_info=aircraft_info)

        if previous_stop_dt and segment.start_dt < previous_stop_dt - timedelta(
                0, 4):
            # In theory, this should not happen - but be warned of superframe
            # padding?
            logger.warning(
                "Segment start_dt '%s' comes before the previous segment "
                "ended '%s'", segment.start_dt, previous_stop_dt)
        previous_stop_dt = segment.stop_dt

        if fallback_dt:
            # move the fallback_dt on to be relative to start of next segment slice
            fallback_dt += timedelta(seconds=(segment_slice.stop -
                                              segment_slice.start))
        segments.append(segment)
        if draw:
            plot_essential(dest_path)

    if draw:
        # show all figures together
        from matplotlib.pyplot import show
        show()
        #close('all') # closes all figures

    return segments
    def test_write_segment__start_and_stop(self):
        '''
        Tests that the correct segment of the dataset within the path matching
        'series/<Param Name>/data' defined by the slice has been written to the
        destination file while other datasets and attributes are unaffected.
        Slice has a start and stop.
        '''
        segment = slice(10, 17)
        dest = write_segment(self.hdf_path, segment, self.out_path, boundary=4)
        self.assertEqual(dest, self.out_path)

        frame_boundary_segment = slice(8, 20)

        with h5py.File(dest, 'r') as hdf_file:
            # 'IVV' - 1Hz parameter.
            ivv_group = hdf_file['series']['IVV']
            self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency)
            self.assertEqual(ivv_group.attrs['supf_offset'],
                             self.ivv_supf_offset)
            ivv_result = np.ma.masked_array(ivv_group['data'][:],
                                            mask=ivv_group['mask'][:])
            ivv_expected_result = np.arange(
                frame_boundary_segment.start * self.ivv_frequency,
                frame_boundary_segment.stop * self.ivv_frequency,
                dtype=np.dtype(np.float))
            ivv_expected_result = np.ma.masked_array(ivv_expected_result,
                                                     mask=[True] * 2 +
                                                     [False] * 7 + [True] * 3)
            self.assertEqual(ivv_result.tolist(), ivv_expected_result.tolist())
            # 'WOW' - 4Hz parameter.
            wow_group = hdf_file['series']['WOW']
            self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency)
            wow_result = wow_group['data'][:]
            wow_expected_result = np.arange(
                frame_boundary_segment.start * self.wow_frequency,
                frame_boundary_segment.stop * self.wow_frequency,
                dtype=np.dtype(np.float))
            self.assertEqual(list(wow_result), list(wow_expected_result))
            # 'DME' - 0.25Hz parameter.
            dme_group = hdf_file['series']['DME']
            self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency)
            dme_result = dme_group['data'][:]  # array([ 3.,  4.])
            dme_expected_result = np.arange(2, 5, dtype=np.dtype(np.float))
            self.assertEqual(list(dme_result), list(dme_expected_result))
            self.assertEqual(
                hdf_file.attrs['duration'],
                frame_boundary_segment.stop - frame_boundary_segment.start)

        # Write segment on superframe boundary.
        dest = write_segment(self.hdf_path,
                             segment,
                             self.out_path,
                             boundary=64)
        self.assertEqual(dest, self.out_path)

        with h5py.File(dest, 'r') as hdf_file:
            # 'IVV' - 1Hz parameter.
            ivv_group = hdf_file['series']['IVV']
            self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency)
            self.assertEqual(ivv_group.attrs['supf_offset'],
                             self.ivv_supf_offset)
            ivv_result = ivv_group['data'][:]
            ivv_expected_result = np.arange(64 * self.ivv_frequency,
                                            dtype=np.dtype(np.float))
            self.assertEqual(list(ivv_result), list(ivv_expected_result))
            # 'WOW' - 4Hz parameter.
            wow_group = hdf_file['series']['WOW']
            self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency)
            wow_result = wow_group['data'][:]
            wow_expected_result = np.arange(64 * self.wow_frequency,
                                            dtype=np.dtype(np.float))
            self.assertEqual(list(wow_result), list(wow_expected_result))
            # 'DME' - 0.25Hz parameter.
            dme_group = hdf_file['series']['DME']
            self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency)
            dme_result = dme_group['data'][:]
            dme_expected_result = np.arange(64 * self.dme_frequency,
                                            dtype=np.dtype(np.float))
            self.assertEqual(list(dme_result), list(dme_expected_result))
            self.assertEqual(hdf_file.attrs['duration'], 64)
    def test_write_segment__start_and_stop(self):
        '''
        Tests that the correct segment of the dataset within the path matching
        'series/<Param Name>/data' defined by the slice has been written to the
        destination file while other datasets and attributes are unaffected.
        Slice has a start and stop.
        '''
        segment = slice(10, 20)
        dest = write_segment(self.hdf_path, segment, self.out_path,
                             supf_boundary=False)
        self.assertEqual(dest, self.out_path)
        
        frame_boundary_segment = slice(8, 20)

        with h5py.File(dest, 'r') as hdf_file:
            # 'IVV' - 1Hz parameter.
            ivv_group = hdf_file['series']['IVV']
            self.assertEqual(ivv_group.attrs['frequency'],
                             self.ivv_frequency)
            self.assertEqual(ivv_group.attrs['supf_offset'],
                             self.ivv_supf_offset)
            ivv_result = ivv_group['data'][:]
            ivv_expected_result = np.arange(
                frame_boundary_segment.start * self.ivv_frequency,
                frame_boundary_segment.stop * self.ivv_frequency,
                dtype=np.dtype(np.float))
            self.assertEqual(list(ivv_result), list(ivv_expected_result))
            # 'WOW' - 4Hz parameter.
            wow_group = hdf_file['series']['WOW']
            self.assertEqual(wow_group.attrs['frequency'],
                             self.wow_frequency)
            wow_result = wow_group['data'][:]
            wow_expected_result = np.arange(
                frame_boundary_segment.start * self.wow_frequency,
                frame_boundary_segment.stop * self.wow_frequency,
                dtype=np.dtype(np.float))
            self.assertEqual(list(wow_result), list(wow_expected_result))
            # 'DME' - 0.25Hz parameter.
            dme_group = hdf_file['series']['DME']
            self.assertEqual(dme_group.attrs['frequency'],
                             self.dme_frequency)
            dme_result = dme_group['data'][:]  # array([ 2.,  3.,  4.])
            dme_expected_result = np.arange(2, 5, dtype=np.dtype(np.float))
            self.assertEqual(list(dme_result), list(dme_expected_result))
            self.assertEqual(
                hdf_file.attrs['duration'],
                frame_boundary_segment.stop - frame_boundary_segment.start)

        # Write segment on superframe boundary.
        dest = write_segment(self.hdf_path, segment, self.out_path,
                             supf_boundary=True)
        self.assertEqual(dest, self.out_path)

        with h5py.File(dest, 'r') as hdf_file:
            # 'IVV' - 1Hz parameter.
            ivv_group = hdf_file['series']['IVV']
            self.assertEqual(ivv_group.attrs['frequency'],
                             self.ivv_frequency)
            self.assertEqual(ivv_group.attrs['supf_offset'],
                             self.ivv_supf_offset)
            ivv_result = ivv_group['data'][:]
            ivv_expected_result = np.arange(64 * self.ivv_frequency,
                                            dtype=np.dtype(np.float))
            self.assertEqual(list(ivv_result), list(ivv_expected_result))
            # 'WOW' - 4Hz parameter.
            wow_group = hdf_file['series']['WOW']
            self.assertEqual(wow_group.attrs['frequency'],
                             self.wow_frequency)
            wow_result = wow_group['data'][:]
            wow_expected_result = np.arange(64 * self.wow_frequency,
                                            dtype=np.dtype(np.float))
            self.assertEqual(list(wow_result), list(wow_expected_result))
            # 'DME' - 0.25Hz parameter.
            dme_group = hdf_file['series']['DME']
            self.assertEqual(dme_group.attrs['frequency'],
                             self.dme_frequency)
            dme_result = dme_group['data'][:]
            dme_expected_result = np.arange(64 * self.dme_frequency,
                                            dtype=np.dtype(np.float))
            self.assertEqual(list(dme_result), list(dme_expected_result))
            self.assertEqual(hdf_file.attrs['duration'], 64)
Example #11
0
def split_hdf_to_segments(hdf_path, aircraft_info, fallback_dt=None,
                          draw=False, dest_dir=None):
    """
    Main method - analyses an HDF file for flight segments and splits each
    flight into a new segment appropriately.

    :param hdf_path: path to HDF file
    :type hdf_path: string
    :param aircraft_info: Information which identify the aircraft, specfically
        with the keys 'Tail Number', 'MSN'...
    :type aircraft_info: Dict
    :param fallback_dt: A datetime which is as close to the end of the data
        file as possible. Used to replace elements of datetimes which are not
        available in the hdf file (e.g. YEAR not being recorded)
    :type fallback_dt: datetime
    :param draw: Whether to use matplotlib to plot the flight
    :type draw: Boolean
    :param dest_dir: Destination directory, if None, the source file directory
        is used
    :type dest_dir: str
    :returns: List of Segments
    :rtype: List of Segment recordtypes ('slice type part duration path hash')
    """
    logger.info("Processing file: %s", hdf_path)

    if dest_dir is None:
        dest_dir = os.path.dirname(hdf_path)

    if draw:
        from analysis_engine.plot_flight import plot_essential
        plot_essential(hdf_path)

    with hdf_file(hdf_path) as hdf:
        superframe_present = hdf.superframe_present

        # Confirm aircraft tail for the entire datafile
        logger.info("Validating aircraft matches that recorded in data")
        validate_aircraft(aircraft_info, hdf)

        # now we know the Aircraft is correct, go and do the PRE FILE ANALYSIS
        if hooks.PRE_FILE_ANALYSIS:
            logger.info("Performing PRE_FILE_ANALYSIS analysis: %s",
                        hooks.PRE_FILE_ANALYSIS.func_name)
            hooks.PRE_FILE_ANALYSIS(hdf, aircraft_info)
        else:
            logger.info("No PRE_FILE_ANALYSIS actions to perform")

        segment_tuples = split_segments(hdf)
        if fallback_dt:
            # fallback_dt is relative to the end of the data; remove the data
            # duration to make it relative to the start of the data
            secs = hdf.duration
            fallback_dt -= timedelta(seconds=secs)
            logger.info("Reduced fallback_dt by %ddays %dhr %dmin to %s",
                        secs // 86400, secs % 86400 // 3600,
                        secs % 86400 % 3600 // 60, fallback_dt)

    # process each segment (into a new file) having closed original hdf_path
    segments = []
    previous_stop_dt = None
    for part, (segment_type, segment_slice) in enumerate(segment_tuples,
                                                         start=1):
        # write segment to new split file (.001)
        basename = os.path.basename(hdf_path)
        dest_basename = os.path.splitext(basename)[0] + '.%03d.hdf5' % part
        dest_path = os.path.join(dest_dir, dest_basename)
        logger.debug("Writing segment %d: %s", part, dest_path)
        write_segment(hdf_path, segment_slice, dest_path,
                      supf_boundary=superframe_present)
        segment = append_segment_info(dest_path, segment_type, segment_slice,
                                      part, fallback_dt=fallback_dt)

        if previous_stop_dt and segment.start_dt < previous_stop_dt:
            # In theory, this should not happen - but be warned of superframe
            # padding?
            logger.warning(
                "Segment start_dt '%s' comes before the previous segment "
                "ended '%s'", segment.start_dt, previous_stop_dt)
        previous_stop_dt = segment.stop_dt

        if fallback_dt:
            # move the fallback_dt on to be relative to start of next segment
            fallback_dt += segment.stop_dt - segment.start_dt
        segments.append(segment)
        if draw:
            plot_essential(dest_path)

    if draw:
        # show all figures together
        from matplotlib.pyplot import show
        show()
        #close('all') # closes all figures

    return segments