コード例 #1
0
class Graph(object):
    def __init__(self, nodes, constant_modifications=None, variable_modifications=None):
        self.nodes = MassHeap(Node(s.mass, graph=self) for s in nodes)
        self.edges = defaultdict(list)
        self.constant_modifications = constant_modifications or []
        self.variable_modifications = variable_modifications or []
        self.variable_modifications += [modification.Modification("HexNAc").rule]
        self.parts = MassHeap(generate_component_set(
            self.constant_modifications,
            self.variable_modifications))
        self.long_parts = {1: self.parts}
        self.node_map = {}

    def __iter__(self):
        return iter(self.nodes)

    def process(self, node, parts=None):
        if parts is None:
            parts = self.parts
        for extent in self.nodes.get_higher_than(node.mass):
            gap_mass = extent.mass - node.mass
            for part in parts.get_lower_than(gap_mass + precursor_mass_shift + 1):
                if fabs(ppm_error(gap_mass + y_mass_shift, part.mass + y_mass_shift)) <= 2e-5:
                    self.edges[frozenset((node, extent))].append(Edge(
                            node, extent, node.mass, link_sequence=part)
                    )

    def process_all(self, length=1):
        parts = self.long_parts.get(length, None)
        if parts is None:
            self.build_unordered_sequences(length)
            parts = self.long_parts[length]
        for node in self:
            self.process(node, parts)
        self.build_node_map()

    def build_node_map(self):
        self.node_map = defaultdict(list)
        for node in self:
            for pair in [pair for pair in self.edges if node in pair]:
                if node == min(pair, key=masser):
                    self.node_map[node].extend(self.edges[pair])

    def roots(self):
        roots = set(self.node_map)
        for node, edges in self.node_map.items():
            for edge in edges:
                roots.discard(edge.to_terminus)
            if len(edges) == 0:
                roots.discard(node)
        return list(roots)

    def build_unordered_sequences(self, n=2):
        self.long_parts[n] = MassHeap(list(unordered_combinations(self.parts, n)))

    def get_sequence(self, node):
        for path in self.traverse(node):
            yield (node, SequenceCollection(map(lambda x: x.link_sequence, path)), path[-1].to_terminus)

    def traverse(self, node):
        if len(self.node_map[node]) == 0:
            yield []
        else:
            for edge in self.node_map[node]:
                for path in self.traverse(edge.to_terminus):
                    yield [edge] + path

    def all_paths(self):
        for root in self.roots():
            for path in self.traverse(root):
                yield (sum(map(len, path)), path)

    def all_sequences(self):
        for root in self.roots():
            for seq in self.get_sequence(root):
                yield (len(seq[1]), seq)

    def identify_node(self, node, parts=None):
        if parts is None:
            parts = self.parts
        for part in parts:
            if fabs(ppm_error(node.mass, part.mass + y_mass_shift)) <= 2e-5:
                node.composition.append(part)
                node.kind.append('y')
            elif fabs(ppm_error(node.mass, part.mass + b_mass_shift)) <= 2e-5:
                node.composition.append(part)
                node.kind.append('b')
        return zip(node.composition, node.kind)
コード例 #2
0
def sequence_spectra(ms_spectrum, drop_mass=0, constant_modifications=None, variable_modifications=None,
                     max_running_gaps=1, max_total_gaps=2):
    constant_modifications = constant_modifications or []
    variable_modifications = variable_modifications or []
    variable_modifications += [modification.Modification("HexNAc").rule]
    precursor_mass = ms_spectrum.neutral_mass - drop_mass
    logger.info("Precursor Mass: %f", precursor_mass)
    previous_sequences = SqliteDiskQueue()
    parts = map(SimpleFragment,
                generate_component_set(constant_modifications,
                                       variable_modifications))
    tandem = MassHeap(ms_spectrum.tandem_data)

    # Get starting component
    match = False
    for part in parts:
        for msms in tandem:
            if (fabs(ppm_error(msms.neutral_mass, part.mass + y_mass_shift)) < 2e-5):
                previous_sequences.append(SequenceRecord(part, kind='y'))
                match = True
            if (fabs(ppm_error(msms.neutral_mass, part.mass + b_mass_shift)) < 2e-5):
                previous_sequences.append(SequenceRecord(part, kind='b'))
                match = True
    if not match:
        for part in parts:
            previous_sequences.append(SequenceRecord(part, 1, 1))
    next_sequences = SqliteDiskQueue()
    solutions = deque(maxlen=4 * max_total_gaps)
    min_part = min(parts, key=lambda x: x.mass).mass
    max_part = max(parts, key=lambda x: x.mass).mass
    while len(previous_sequences) > 0:
        for seq in previous_sequences:
            match = []
            lower = (seq.mass + min_part)
            upper = seq.mass + max_part
            lower_threshold = lower - (y_mass_shift + lower * 2e-5)
            upper_threshold = upper + (y_mass_shift + upper * 2e-5)
            for msms in reversed(list(tandem.get_higher_than(lower_threshold))):
                if msms.neutral_mass > upper_threshold:
                    break
                for part in parts:
                    mass_query = part.mass + y_mass_shift + seq.mass
                    if seq.kind != 'b' and (fabs(ppm_error(msms.neutral_mass, mass_query)) < 2e-5):
                        ext = seq.extend(part, False)
                        ext.matches.append(msms)
                        ext.kind = 'y'
                        next_sequences.append(ext)
                        match.append(msms)
                        # logger.info("Match on %r -> %r", ext, msms)
                    mass_query += -y_mass_shift + b_mass_shift
                    if seq.kind != 'y' and (fabs(ppm_error(msms.neutral_mass, mass_query)) < 2e-5):
                        ext = seq.extend(part, False)
                        ext.kind = 'b'
                        next_sequences.append(ext)
                        match.append(msms)
                        # logger.info("Match on %r -> %r", ext, msms)
            if len(match) == 0:
                # logger.info("No match on %r", seq)
                if seq.current_gaps + 1 <= max_running_gaps and seq.total_gaps + 1 <= max_total_gaps:
                    for part in parts:
                        next_sequences.append(seq.extend(part, True))
        logger.info("Round over, %d candidates", len(next_sequences))
        if len(next_sequences) == 0:
            return ResultsGroup([seq for round in solutions for seq in round], parts)
        previous_sequences = next_sequences
        solutions.append(seq for seq in next_sequences if len(seq.matches) > 0)
        next_sequences = SqliteDiskQueue()