def getZfrags(): zFrag = lCnt() for i in range(1, N): zFrag += superAtoms[N - i] zFrag_tmp = lCnt(zFrag) frag_type = 'z' + str(i) if not frag_type in blockedFragments: yield (frag_type, atomCnt2string(zFrag_tmp), i)
def getCfrags(): cFrag = lCnt({'H':1}) # Adding one extra hydrogen to meet the definition of a c fragment. for i in range(N-1): cFrag += superAtoms[i] cFrag_tmp = lCnt(cFrag) fragType = 'c'+str(i) if not fragType in blockedFragments and not i == 0: yield (fragType, atomCnt2string(cFrag_tmp), i)
def makeBricks(): '''Prepare the base counters of atoms. These will be later collected into superatoms that finally make up the observed fragments.''' AAS = AminoAcids().get() AAS = [(aa, AAS[aa]['graph'], AAS[aa]['NalphaIDX'], AAS[aa]['CcarboIDX']) for aa in AAS] bricks = {} for aa, G, N, C in AAS: N = G.vs[N]['name'] C = G.vs[C]['name'] G.delete_edges(Roep_ne=None) G = G.decompose() brick = {} if len(G) == 2: assert aa == 'P' brick['C'] = lCnt() while len(G) > 0: g = G.pop() isN = N in g.vs['name'] isC = C in g.vs['name'] if isN or isC: if isN: tag = 'L' elif isC: tag = 'R' else: raise ValueError else: tag = 'C' brick[tag] = elementContent(g) bricks[aa] = brick return bricks
def standardize(modifications): """Standardize modifications so that they meet the internal nomenclature scheme. Parameters ---------- atomCnt : Counter The chemical formula counter. Returns ------- out : defaultdict The atomic modifications. Notes ----- It was easier for me to think of an amino acid as if it was composed out of three bricks: the left one, the center one, and the right one. The left one corresponds to the group with nitrogen, the center one - to the alpha carbon (including the side chain), and the right one - to the other carbon atom. """ backboneAtom2aaNomen = {'N': 'L', 'Calpha': 'C', 'C': 'R'} R = defaultdict(lambda: defaultdict(lCnt)) for tag, atomCnt in modifications.items(): match = re.match(r"([a-z]+)([0-9]+)", tag, re.I) if match: aa, aa_idx = match.groups() aa_idx = int(aa_idx) - 1 R[aa_idx][backboneAtom2aaNomen[aa]] = lCnt(atomCnt) return R
def standardize(modifications): '''Standardize modifications so that they meet the internal nomenclature scheme.''' backboneAtom2aaNomen = {'N':'L', 'Calpha':'C', 'C':'R'} R = defaultdict(lambda:defaultdict(lCnt)) for tag, atomCnt in list(modifications.items()): R[ tag[1]-1 ][ backboneAtom2aaNomen[tag[0]] ] = lCnt(atomCnt) return R
def make_cz_fragments(fasta, modifications): '''Prepares the precursor and the c and z fragments atom counts.''' bricks = makeBricks() def getBrick(aaPart, aa): brick = bricks[aa][aaPart] + modifications[aaNo][aaPart] if countIsNegative(brick): print("Attention: your modification has an unexpected effect. Part of your molecule now has negative atom count. Bear that in mind while publishing your results.") return brick superAtoms = [] sA = lCnt() for aaNo, aa in enumerate(fasta): sA += getBrick('L', aa) superAtoms.append( sA ) sA = getBrick('C', aa) + getBrick('R', aa) sA += lCnt({'O':1,'H':1}) superAtoms.append(sA) superAtoms[0] += lCnt({'H':1}) N = len(superAtoms) def getPrecursor(): precursor = sum(superAtoms) yield ('precursor', atomCnt2string(precursor), len(fasta) ) blockedFragments = prolineBlockedFragments(fasta) def getCfrags(): cFrag = lCnt({'H':1}) # Adding one extra hydrogen to meet the definition of a c fragment. for i in range(N-1): cFrag += superAtoms[i] cFrag_tmp = lCnt(cFrag) fragType = 'c'+str(i) if not fragType in blockedFragments and not i == 0: yield (fragType, atomCnt2string(cFrag_tmp), i) # def getZfrags(): zFrag = lCnt() for i in range(1,N): zFrag += superAtoms[N-i] zFrag_tmp = lCnt(zFrag) fragType = 'z'+str(i) if not fragType in blockedFragments: yield (fragType, atomCnt2string(zFrag_tmp), i) return getPrecursor, getCfrags, getZfrags
def elementContent(G): '''Extracts numbes of atoms of elements that make up the graph of a molecule.''' atomNo = lCnt() for el in G.vs['elem']: atomNo[el] += 1 return atomNo
def make_cz_fragments(fasta, modifications): """Prepares the precursor and the c and z fragments atom counts. Parameters ---------- fasta : str The fasta of the studied molecular species. modifications : list A list of modifications. Returns ------- out : tuple A tuple with generators of precursors, c fragments, and z fragments. """ data_path = pkg_resources.resource_filename('MassTodonPy', 'Data/') bricks = pickle.load(open(data_path + 'amino_acids.txt', 'rb')) def getBrick(aaPart, aa): brick = bricks[aa][aaPart] + modifications[aaNo][aaPart] if countIsNegative(brick): print( "Attention: your modification has an unexpected effect. Part of your molecule now has negative atom count. Bear that in mind while publishing your results." ) return brick superAtoms = [] sA = lCnt() for aaNo, aa in enumerate(fasta): sA += getBrick('L', aa) superAtoms.append(sA) sA = getBrick('C', aa) + getBrick('R', aa) sA += lCnt({'O': 1, 'H': 1}) superAtoms.append(sA) superAtoms[0] += lCnt({'H': 1}) N = len(superAtoms) def getPrecursor(): precursor = sum(superAtoms) yield ('precursor', atomCnt2string(precursor), len(fasta)) blockedFragments = prolineBlockedFragments(fasta) def getCfrags(): cFrag = lCnt({ 'H': 1 }) # Adding one extra hydrogen to meet the definition of a c fragment. for i in range(N - 1): cFrag += superAtoms[i] cFrag_tmp = lCnt(cFrag) frag_type = 'c' + str(i) if not frag_type in blockedFragments and not i == 0: yield (frag_type, atomCnt2string(cFrag_tmp), i) def getZfrags(): zFrag = lCnt() for i in range(1, N): zFrag += superAtoms[N - i] zFrag_tmp = lCnt(zFrag) frag_type = 'z' + str(i) if not frag_type in blockedFragments: yield (frag_type, atomCnt2string(zFrag_tmp), i) return getPrecursor, getCfrags, getZfrags