Ejemplo n.º 1
0
 def getZfrags():
     zFrag = lCnt()
     for i in range(1, N):
         zFrag += superAtoms[N - i]
         zFrag_tmp = lCnt(zFrag)
         frag_type = 'z' + str(i)
         if not frag_type in blockedFragments:
             yield (frag_type, atomCnt2string(zFrag_tmp), i)
Ejemplo n.º 2
0
 def getCfrags():
     cFrag = lCnt({'H':1}) # Adding one extra hydrogen to meet the definition of a c fragment.
     for i in range(N-1):
         cFrag += superAtoms[i]
         cFrag_tmp = lCnt(cFrag)
         fragType = 'c'+str(i)
         if not fragType in blockedFragments and not i == 0:
             yield (fragType, atomCnt2string(cFrag_tmp), i)
Ejemplo n.º 3
0
def makeBricks():
    '''Prepare the base counters of atoms.

    These will be later collected into superatoms that finally make up the observed fragments.'''
    AAS = AminoAcids().get()
    AAS = [(aa, AAS[aa]['graph'], AAS[aa]['NalphaIDX'], AAS[aa]['CcarboIDX'])
           for aa in AAS]
    bricks = {}
    for aa, G, N, C in AAS:
        N = G.vs[N]['name']
        C = G.vs[C]['name']
        G.delete_edges(Roep_ne=None)
        G = G.decompose()
        brick = {}
        if len(G) == 2:
            assert aa == 'P'
            brick['C'] = lCnt()
        while len(G) > 0:
            g = G.pop()
            isN = N in g.vs['name']
            isC = C in g.vs['name']
            if isN or isC:
                if isN:
                    tag = 'L'
                elif isC:
                    tag = 'R'
                else:
                    raise ValueError
            else:
                tag = 'C'
            brick[tag] = elementContent(g)
        bricks[aa] = brick
    return bricks
Ejemplo n.º 4
0
def standardize(modifications):
    """Standardize modifications so that they meet the internal nomenclature scheme.

    Parameters
    ----------
    atomCnt : Counter
        The chemical formula counter.

    Returns
    -------
    out : defaultdict
        The atomic modifications.

    Notes
    -----
    It was easier for me to think of an amino acid as if it was composed out of three bricks: the left one, the center one, and the right one. The left one corresponds to the group with nitrogen, the center one - to the alpha carbon (including the side chain), and the right one - to the other carbon atom.
    """
    backboneAtom2aaNomen = {'N': 'L', 'Calpha': 'C', 'C': 'R'}
    R = defaultdict(lambda: defaultdict(lCnt))
    for tag, atomCnt in modifications.items():
        match = re.match(r"([a-z]+)([0-9]+)", tag, re.I)
        if match:
            aa, aa_idx = match.groups()
            aa_idx = int(aa_idx) - 1
        R[aa_idx][backboneAtom2aaNomen[aa]] = lCnt(atomCnt)
    return R
Ejemplo n.º 5
0
def standardize(modifications):
    '''Standardize modifications so that they meet the internal nomenclature scheme.'''
    backboneAtom2aaNomen = {'N':'L', 'Calpha':'C', 'C':'R'}
    R = defaultdict(lambda:defaultdict(lCnt))
    for tag, atomCnt in list(modifications.items()):
        R[ tag[1]-1 ][ backboneAtom2aaNomen[tag[0]] ] = lCnt(atomCnt)
    return R
Ejemplo n.º 6
0
def make_cz_fragments(fasta, modifications):
    '''Prepares the precursor and the c and z fragments atom counts.'''
    bricks = makeBricks()

    def getBrick(aaPart, aa):
        brick = bricks[aa][aaPart] + modifications[aaNo][aaPart]
        if countIsNegative(brick):
            print("Attention: your modification has an unexpected effect. Part of your molecule now has negative atom count. Bear that in mind while publishing your results.")
        return brick

    superAtoms = []
    sA = lCnt()
    for aaNo, aa in enumerate(fasta):
        sA += getBrick('L', aa)
        superAtoms.append( sA )
        sA = getBrick('C', aa) + getBrick('R', aa)
    sA += lCnt({'O':1,'H':1})
    superAtoms.append(sA)
    superAtoms[0] += lCnt({'H':1})
    N = len(superAtoms)

    def getPrecursor():
        precursor = sum(superAtoms)
        yield ('precursor', atomCnt2string(precursor), len(fasta) )

    blockedFragments = prolineBlockedFragments(fasta)

    def getCfrags():
        cFrag = lCnt({'H':1}) # Adding one extra hydrogen to meet the definition of a c fragment.
        for i in range(N-1):
            cFrag += superAtoms[i]
            cFrag_tmp = lCnt(cFrag)
            fragType = 'c'+str(i)
            if not fragType in blockedFragments and not i == 0:
                yield (fragType, atomCnt2string(cFrag_tmp), i)
    #
    def getZfrags():
        zFrag = lCnt()
        for i in range(1,N):
            zFrag += superAtoms[N-i]
            zFrag_tmp = lCnt(zFrag)
            fragType = 'z'+str(i)
            if not fragType in blockedFragments:
                yield (fragType, atomCnt2string(zFrag_tmp), i)

    return getPrecursor, getCfrags, getZfrags
Ejemplo n.º 7
0
def elementContent(G):
    '''Extracts numbes of atoms of elements that make up the graph of a molecule.'''
    atomNo = lCnt()
    for el in G.vs['elem']:
        atomNo[el] += 1
    return atomNo
Ejemplo n.º 8
0
def make_cz_fragments(fasta, modifications):
    """Prepares the precursor and the c and z fragments atom counts.

    Parameters
    ----------
    fasta : str
        The fasta of the studied molecular species.

    modifications : list
        A list of modifications.

    Returns
    -------
    out : tuple
        A tuple with generators of precursors, c fragments, and z fragments.
    """

    data_path = pkg_resources.resource_filename('MassTodonPy', 'Data/')
    bricks = pickle.load(open(data_path + 'amino_acids.txt', 'rb'))

    def getBrick(aaPart, aa):
        brick = bricks[aa][aaPart] + modifications[aaNo][aaPart]
        if countIsNegative(brick):
            print(
                "Attention: your modification has an unexpected effect. Part of your molecule now has negative atom count. Bear that in mind while publishing your results."
            )
        return brick

    superAtoms = []
    sA = lCnt()
    for aaNo, aa in enumerate(fasta):
        sA += getBrick('L', aa)
        superAtoms.append(sA)
        sA = getBrick('C', aa) + getBrick('R', aa)
    sA += lCnt({'O': 1, 'H': 1})
    superAtoms.append(sA)
    superAtoms[0] += lCnt({'H': 1})
    N = len(superAtoms)

    def getPrecursor():
        precursor = sum(superAtoms)
        yield ('precursor', atomCnt2string(precursor), len(fasta))

    blockedFragments = prolineBlockedFragments(fasta)

    def getCfrags():
        cFrag = lCnt({
            'H': 1
        })  # Adding one extra hydrogen to meet the definition of a c fragment.
        for i in range(N - 1):
            cFrag += superAtoms[i]
            cFrag_tmp = lCnt(cFrag)
            frag_type = 'c' + str(i)
            if not frag_type in blockedFragments and not i == 0:
                yield (frag_type, atomCnt2string(cFrag_tmp), i)

    def getZfrags():
        zFrag = lCnt()
        for i in range(1, N):
            zFrag += superAtoms[N - i]
            zFrag_tmp = lCnt(zFrag)
            frag_type = 'z' + str(i)
            if not frag_type in blockedFragments:
                yield (frag_type, atomCnt2string(zFrag_tmp), i)

    return getPrecursor, getCfrags, getZfrags