def get_relevant_substituents(self, residue, monosaccharides=None): ''' Retrieve the set of substituents not implicitly included in the base type's symbol name. ''' if monosaccharides is None: monosaccharides = self.monosaccharide_reference positions = [p for p, sub in residue.substituents() if not sub._derivatize] substituents = [sub.name for p, sub in residue.substituents() if not sub._derivatize] if identity.is_a(residue, monosaccharides["NeuAc"], exact=False, short_circuit=True): try: i = substituents.index("n_acetyl") substituents.pop(i) j = positions.pop(i) substituents.insert(i, "acetyl") positions.insert(i, j) except Exception: # pragma: no cover pass elif identity.is_a(residue, monosaccharides["NeuGc"], exact=False, short_circuit=True): try: i = substituents.index("n_glycolyl") substituents.pop(i) j = positions.pop(i) substituents.insert(i, "glycolyl") positions.insert(i, j) except Exception: # pragma: no cover pass elif identity.is_a(residue, monosaccharides["Neu"], exact=False, short_circuit=True): i = substituents.index("amino") substituents.pop(i) positions.pop(i) return zip(substituents, positions)
def test_identify_substituents(self): self.assertTrue( identity.is_a(Substituent("n-acetyl"), Substituent("n-acetyl"))) self.assertFalse( identity.is_a(Substituent('methyl'), Substituent('n-acetyl'))) self.assertFalse( identity.is_a(monosaccharides.Man, Substituent('n-acetyl'))) self.assertFalse( identity.is_a(Substituent('n-acetyl'), monosaccharides.Man))
def get_relevant_substituents(residue): ''' Retrieve the set of substituents not implicitly included in the base type's symbol name. ''' positions = [p for p, sub in residue.substituents()] substituents = [sub.name for p, sub in residue.substituents()] if identity.is_a(residue, monosaccharide_reference["HexNAc"], exact=False) or\ identity.is_a(residue, monosaccharide_reference["NeuAc"], exact=False): i = substituents.index("n_acetyl") substituents.pop(i) positions.pop(i) elif identity.is_a(residue, monosaccharide_reference["NeuGc"], exact=False): i = substituents.index("n_glycolyl") substituents.pop(i) positions.pop(i) return zip(substituents, positions)
def is_special_case(node): ''' Check to see if `node` is a special case which requires a different layout scheme. See `special_cases` ''' for case in special_cases: if identity.is_a(node, case) and len(list(node.children())) == 0: return True return False
def is_dhex(residue): try: return is_a( strip_derivatization(residue.clone( monosaccharide_type=MonosaccharideResidue)), dhex) except TypeError: if not isinstance(residue, MonosaccharideResidue): return False else: raise
def is_dhex(residue): try: return is_a( strip_derivatization( residue.clone(monosaccharide_type=MonosaccharideResidue)), dhex) except TypeError: if not isinstance(residue, MonosaccharideResidue): return False else: raise
def glycan_composition_to_terms(self, glycan_composition): out = [] term = self.resolve_gnome(glycan_composition) if term is not None: out.append({ "accession": term.id, "name": term.name, "cvRef": term.vocabulary.name }) reinterpreted = glycan_composition.clone().reinterpret(valid_monosaccharides) for mono, count in reinterpreted.items(): if isinstance(mono, SubstituentResidue): subst = inverted_substituent_map.get( mono.name.replace("@", "")) if subst is not None: out.append({ "name": "monosaccharide count", "value": ("%s:%d" % (subst, count)), "accession": "MS:XXXXX2", "cvRef": "PSI-MS" }) else: out.append({ "name": "unknown monosaccharide count", "value": ("%s:%0.3f:%d" % (mono.name.replace("@", ""), mono.mass(), count)), "accession": "MS:XXXXX3", "cvRef": "PSI-MS" }) elif isinstance(mono, MonosaccharideResidue): for known in valid_monosaccharides: if identity.is_a(mono, known): out.append({ "name": "monosaccharide count", "value": ("%s:%d" % (monosaccharide_to_term(known), count)), "accession": "MS:XXXXX2", "cvRef": "PSI-MS" }) break else: out.append({ "name": "unknown monosaccharide count", "value": ("%s:%0.3f:%d" % (monosaccharide_to_term(mono), mono.mass(), count)), "accession": "MS:XXXXX3", "cvRef": "PSI-MS" }) else: raise TypeError("Cannot handle unexpected component of type %s" % (type(mono), )) return out
def monosaccharide_to_linear_code(monosaccharide, max_tolerance=3): ''' Perform iteratively permissive attempts to translate `monosaccharide` into a nomenclature symbol. .. note:: Uses a multi-pass approach. Could alternatively do a single pass and keep the best match. Parameters ---------- monosaccharide: Monosaccharide The residue to be translated max_tolerance: int The maximum error tolerance to allow while looking for a match Returns ------- str Raises ------ ValueError: When no suitable translation can be found KeyError: When an unknown symbol is encountered ''' tolerance = 0 if identity.is_generic_monosaccharide(monosaccharide): raise LinearCodeError("Linear Code does not support generic monosaccharide %s" % str(monosaccharide)) while tolerance <= max_tolerance: for k, v in monosaccharides_to.items(): if k not in monosaccharide_reference: continue if identity.is_a(monosaccharide, monosaccharide_reference[k], tolerance=tolerance): residue_sym = v substituents_sym = [( substituent_to_linear_code( *s)) for s in get_relevant_substituents(monosaccharide)] if len(substituents_sym) > 0: residue_sym = residue_sym + '[{}]'.format(', '.join(substituents_sym)) residue_sym = residue_sym + anomer_map_to[monosaccharide.anomer] return residue_sym tolerance += 1 raise LinearCodeError("Cannot map {} to Linear Code".format(monosaccharide))
def test_precision(self): self.assertFalse(identity.is_a(monosaccharides.Kdn, monosaccharides.NeuAc)) self.assertFalse(identity.is_a(monosaccharides.NeuAc, monosaccharides.Kdn))
def test_is_a_predicate(self): for name, monosaccharide in monosaccharides.items(): result = identity.is_a(monosaccharide, name) self.assertTrue(result)