Beispiel #1
0
    def __getitem__(self, identifier):
        """
        Access the item with id 'identifier' in the file by iterating the xml-tree.

        Arguments:
            identifier (str): native id of the item to access

        Returns:
            data (str): text associated with the given identifier
        """
        old_pos = self.file_handler.tell()
        self.file_handler.seek(0, 0)
        mzml_iter = iter(iterparse(self.file_handler, events=['end']))
        while True:
            event, element = next(mzml_iter)
            if event == 'end':
                if element.tag.endswith('}spectrum'):
                    if int(
                            regex_patterns.SPECTRUM_ID_PATTERN.search(
                                element.get('id')).group(0)) == identifier:
                        self.file_handler.seek(old_pos, 0)
                        return spec.Spectrum(element, measured_precision=5e-6)
                elif element.tag.endswith('}chromatogram'):
                    if element.get('id') == identifier:
                        self.file_handler.seek(old_pos, 0)
                        return spec.Chromatogram(element,
                                                 measured_precision=5e-6)
Beispiel #2
0
    def __getitem__(self, identifier):
        """
        Access the item with id 'identifier'.

        Either use linear, binary or interpolated search.

        Arguments:
            identifier (str): native id of the item to access

        Returns:
            data (str): text associated with the given identifier
        """
        #############################################################################
        # DOES NOT HOLD IF NUMBERS DONT START WITH ONE AND/OR DONT INCREASE BY ONE  #
        # TODO FIXME                                                                #
        #############################################################################

        self.file_handler.seek(0)

        spectrum = None

        if str(identifier).upper() == 'TIC':
            # print(str(identifier).upper())
            found = False
            mzmliter = iter(iterparse(self.file_handler, events=['end']))
            while found is False:
                event, element = next(mzmliter, ('STOP', 'STOP'))
                if event == 'end':
                    if element.tag.endswith('}chromatogram'):
                        if element.get('id') == 'TIC':
                            found = True
                            spectrum = spec.Chromatogram(
                                element, measured_precision=5e-6)
                elif event == 'STOP':
                    raise StopIteration

        elif identifier in self.offset_dict:
            start = self.offset_dict[identifier]
            with open(self.path, 'rb') as seeker:
                seeker.seek(start[0])
                start, end = self._read_to_spec_end(seeker)
            self.file_handler.seek(start, 0)
            data = self.file_handler.read(end - start)
            spectrum = spec.Spectrum(XML(data), measured_precision=5e-6)
        elif type(identifier) == str:
            return self._search_string_identifier(identifier)
        else:
            spectrum = self._interpol_search(identifier)

        return spectrum
Beispiel #3
0
    def _search_string_identifier(self, search_string, chunk_size=8):
        with open(self.path, 'rb') as seeker:
            data = None
            total_chunk_size = chunk_size * 512
            spec_start = None

            # NOTE: This needs to go intp regex_patterns.py

            regex_string = re.compile(
                "<\s*spectrum[^>]*index=\"[0-9]+\"\sid=\"({0})\"\sdefaultArrayLength=\"[0-9]+\">".format(
                    "".join(
                        ['.*', search_string, '.*']
                    )
                ).encode()
            )

            search_string = search_string.encode()

            while True:
                file_pointer = seeker.tell()

                data         = seeker.read(total_chunk_size)
                string, seeker = self._read_until_tag_end(seeker, byte_mode=True)
                data += string
                spec_start = regex_string.search(data)
                chrom_start = regex_patterns.CHROMO_OPEN_PATTERN.search(data)
                if spec_start:
                    spec_start_offset = file_pointer + spec_start.start()
                    current_index = spec_start.group(1)
                    if search_string in current_index:
                        seeker.seek(spec_start_offset)
                        start, end = self._read_to_spec_end(seeker)
                        seeker.seek(start)
                        spec_string = seeker.read(end-start)
                        xml_string = XML(spec_string)
                        return spec.Spectrum(
                            xml_string,
                            measured_precision=5e-6
                        )
                elif chrom_start:
                    chrom_start_offset = file_pointer + chrom_start.start()
                    if search_string == chrom_start.group(1):
                        seeker.seek(chrom_start_offset)
                        start, end = self._read_to_spec_end(seeker)
                        seeker.seek(start)
                        chrom_string = seeker.read(end-start)
                        xml_string = XML(chrom_string)
                        return spec.Chromatogram(xml_string)
                elif len(data) == 0:
                    raise Exception('cant find specified string')
Beispiel #4
0
    def __getitem__(self, key):
        """
        Execute a SQL request, process the data and return a spectrum object.

        Args:
            key (str or int): unique identifier for the given spectrum in the
            database
        """
        self.cursor.execute("SELECT * FROM spectra WHERE id=?", key)
        ID, element = self.cursor.fetchone()

        element = et.XML(element)
        if "spectrum" in element.tag:
            spectrum = spec.Spectrum(element)
        elif "chromatogram" in element.tag:
            spectrum = spec.Chromatogram(element)
        return spectrum
Beispiel #5
0
    def __getitem__(self, identifier):
        """
        Access the item with id 'identifier' in the file.

        Arguments:
            identifier (str): native id of the item to access

        Returns:
            data (str): text associated with the given identifier
        """
        #TODO more elegant way to add NameSpace (.register_namespace maybe??)
        ns_prefix = '<mzML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="test_Creinhardtii_QE_pH8" version="1.1.0" xmlns="http://psi.hupo.org/ms/mzml">'
        ns_suffix = '</mzML>'
        data     = self.Reader.read_block(identifier)
        element  = XML(ns_prefix + data.decode('utf-8') + ns_suffix)
        if 'chromatogram' in element[0].tag:
            return spec.Chromatogram(list(element)[0], measured_precision=5e-6)
        else:
            return spec.Spectrum(list(element)[0], measured_precision=5e-6)