def get_tracking_id(self):
     """ Returns a dictionary of tracking-id key/value pairs.
     """
     self.assert_open()
     tracking = self.handle[self.global_key + 'tracking_id'].attrs.items()
     tracking = {key: _clean(value) for key, value in tracking}
     return tracking
 def _parse_attribute_tree(self, group):
     data = {}
     folders = self.handle[group].keys()
     for folder in folders:
         path = '{}/{}'.format(group, folder)
         attr = self.handle[path].attrs.items()
         data[folder] = {key: _clean(value) for key, value in attr}
     return data
 def get_context_tags(self):
     """ Returns a dictionary of context tag key/value pairs.
     """
     self.assert_open()
     if self.has_context_tags:
         tags = self.handle[self.global_key + 'context_tags'].attrs.items()
         return {key: _clean(value) for key, value in tags}
     return {}
 def get_channel_info(self):
     """ Returns a dictionary of channel information key/value pairs.
     """
     self.assert_open()
     channel_info = self.handle[self.global_key +
                                'channel_id'].attrs.items()
     channel_info = {key: _clean(value) for key, value in channel_info}
     channel_info['channel_number'] = int(channel_info['channel_number'])
     return channel_info
Beispiel #5
0
    def test__clean(self):
        self.assertEqual(_clean(1), 1)
        self.assertEqual(_clean(b''), '')
        self.assertEqual(_clean(''), '')
        self.assertEqual(_clean('str'), 'str')
        self.assertEqual(_clean(b'str'), 'str')
        self.assertTrue(isinstance(_clean('str'), str))

        # _clean should convert byte strings into utf-8 ones
        test_str = array(b'Hello!', dtype=bytes)
        self.assertEqual(type(test_str), ndarray)
        self.assertEqual(_clean(test_str), 'Hello!')

        # _clean shouldn't do anything to python strings
        test_str = array('Hello!', dtype=str)
        self.assertEqual(type(test_str), ndarray)
        self.assertEqual(_clean(test_str), test_str)

        self.assertEqual(_clean(array([1, 2, 3])), [1, 2, 3])
    def get_analysis_attributes(self, group_name):
        """ Returns the attributes for the specified group or dataset.

        :param group_name: The path of the group or dataset, relative to the
            "Analyses" group.
        :returns: A dictionary representing the attributes (if any).
        """
        self.assert_open()
        group = 'Analyses/{}'.format(group_name)
        attr = None
        if group in self.handle:
            attr = self.handle[group].attrs.items()
            attr = {key: _clean(value) for key, value in attr}
        return attr
    def list_analyses(self, component=None):
        """ Provides a list of all analyses groups.

        :param component: Optional component name. If provided, only
            analyses of that component will be returned.
        :returns: A list of component-name/group-name pairs (tuples).
        """
        self.assert_open()
        analyses = []
        if 'Analyses' not in self.handle:
            return analyses
        ana_groups = self.handle['Analyses'].keys()
        for group_name in ana_groups:
            group_attrs = self.handle['Analyses/{}'.format(group_name)].attrs
            if 'component' in group_attrs:
                comp = _clean(group_attrs['component'])
            elif group_name[:-4] in LEGACY_COMPONENT_NAMES:
                comp = LEGACY_COMPONENT_NAMES[group_name[:-4]]
            else:
                # We don't know anything about this component!
                comp = None
            if comp is not None and (component is None or comp == component):
                analyses.append((comp, group_name))
        return analyses
Beispiel #8
0
    def __init__(self, fname):
        """ Constructs a status object from a file.

        :param fname: Filename of fast5 file to read status from.
        """
        self.valid = True
        self.channel = None
        self.read_info = []
        self.read_number_map = {}
        self.read_id_map = {}
        try:
            with h5py.File(fname, 'r') as handle:
                if 'file_version' in handle.attrs:
                    self.version = _clean(handle.attrs['file_version'])
                    minimum_valid_version = packaging_version.Version('0.6')
                    if packaging_version.parse(str(self.version)) \
                       < minimum_valid_version:
                        self.valid = False
                else:
                    self.valid = False
                    self.version = 0.0

                # Check for required groups.
                top_groups = handle.keys()
                if 'UniqueGlobalKey' in top_groups:
                    global_keys = handle['UniqueGlobalKey'].keys()
                if 'tracking_id' not in global_keys and not self._legacy_version(
                ):
                    self.valid = False
                if 'channel_id' not in global_keys:
                    self.valid = False

                self.channel = handle['UniqueGlobalKey/channel_id'].attrs.get(
                    'channel_number')
                if self.channel is None and self._legacy_version():
                    self.valid = False

                # Get the read information.
                if 'Raw' in top_groups:
                    reads = handle['Raw/Reads'].keys()
                    for read in reads:
                        read_group_name = 'Raw/Reads/{}'.format(read)
                        read_group = handle[read_group_name]
                        read_attrs = read_group.attrs
                        read_number = _clean(read_attrs['read_number'])
                        if 'read_id' in read_attrs:
                            read_id = _clean(read_attrs['read_id'])
                        else:
                            if not self._legacy_version():
                                self.valid = False
                            else:
                                read_id = os.path.basename(fname)
                        start_time = _clean(read_attrs['start_time'])
                        duration = _clean(read_attrs['duration'])
                        mux = _clean(read_attrs.get('start_mux', 0))
                        median_before = _clean(
                            read_attrs.get('median_before', -1.0))
                        read_info = ReadInfo(read_number, read_id, start_time,
                                             duration, mux, median_before)
                        if 'Signal' in read_group:
                            read_info.has_raw_data = True
                        elif self._legacy_version():
                            if 'Data' in read_group:
                                read_info.has_raw_data = True
                            else:
                                self.valid = False
                        self.read_info.append(read_info)
                        n = len(self.read_info) - 1
                        self.read_number_map[read_number] = n
                        self.read_id_map[read_id] = n
                else:
                    if not self._legacy_version():
                        self.valid = False
                analyses = sorted(
                    handle['Analyses'].keys()) if 'Analyses' in handle else []
                for ana in analyses[::-1]:
                    if ana.startswith('EventDetection'):
                        reads_group_name = 'Analyses/{}/Reads'.format(ana)
                        if reads_group_name not in handle:
                            continue
                        reads = handle[reads_group_name].keys()
                        for read in reads:
                            read_group_name = '{}/{}'.format(
                                reads_group_name, read)
                            read_group = handle[read_group_name]
                            read_attrs = read_group.attrs
                            read_number = _clean(read_attrs['read_number'])
                            if 'read_id' in read_attrs:
                                read_id = _clean(read_attrs['read_id'])
                            else:
                                if not self._legacy_version():
                                    self.valid = False
                                    continue
                                else:
                                    read_id = os.path.basename(fname)
                            start_time = _clean(read_attrs['start_time'])
                            duration = _clean(read_attrs['duration'])
                            mux = _clean(read_attrs.get('start_mux', 0))
                            median_before = _clean(
                                read_attrs.get('median_before', -1.0))
                            read_info = ReadInfo(read_number, read_id,
                                                 start_time, duration, mux,
                                                 median_before)
                            if 'Events' in read_group:
                                read_info.has_event_data = True
                                read_info.event_data_count = len(
                                    read_group['Events'])
                            else:
                                read_info.has_event_data = False
                                read_info.event_data_count = 0
                            if read_number in self.read_number_map:
                                read_index = self.read_number_map[read_number]
                                self.read_info[
                                    read_index].has_event_data = read_info.has_event_data
                                self.read_info[
                                    read_index].event_data_count = read_info.event_data_count
                            else:
                                if not self._legacy_version():
                                    self.valid = False
                                self.read_info.append(read_info)
                                n = len(self.read_info) - 1
                                self.read_number_map[read_number] = n
                                self.read_id_map[read_id] = n
                        break
        except:
            self.valid = False
            raise

        if self._legacy_version():
            # There must be either raw data or event data (or both).
            if len(self.read_info) == 0:
                self.valid = False