예제 #1
0
class CoarseGrained:
    # Class for mapping an atomistic residue list to a coarsegrained one
    # Should get an __init__ function taking a residuelist, atomlist, Pymol selection or ChemPy model
    # The result should be stored in a list-type attribute
    # The class should have pdbstr and grostr methods

    # Standard mapping groups
    # Protein backbone
    bb = "N CA C O H H1 H2 H3 O1 O2"  #@#
    # Lipid tails
    palmitoyl1 = FUNC.nsplit("C1B C1C C1D C1E", "C1F C1G C1H C1I",
                             "C1J C1K C1L C1M", "C1N C1O C1P")  #@#
    palmitoyl2 = FUNC.nsplit("C2B C2C C2D C2E", "C2F C2G C2H C2I",
                             "C2J C2K C2L C2M", "C2N C2O C2P")  #@#
    oleyl1 = FUNC.nsplit("C1B C1C C1D C1E", "C1F C1G C1H", "C1I C1J",
                         "C1K C1L C1M C1N", "C1O C1P C1Q C1R")  #@#
    oleyl2 = FUNC.nsplit("C2B C2C C2D C2E", "C2F C2G C2H", "C2I C2J",
                         "C2K C2L C2M C2N", "C2O C2P C2Q C2R")  #@#
    #lauroyl1      = []
    #stearoyl1     = []
    #arachidonoyl1 = []
    #linoleyl1     = []
    #hexanoyl1     = []
    # Lipid head groups
    #phoshpatidylcholine      =
    phosphatydilethanolamine = FUNC.nsplit("N H1 H2 H3 CA", "CB P OA OB OC OD",
                                           "CC CD OG C2A OH",
                                           "CE OE C1A OF")  #@#
    phosphatidylglycerol = FUNC.nsplit("H1 O1 CA H2 O2 CB", "CC P OA OB OC OD",
                                       "CD CE OG C2A OH", "CF OE C1A OF")  #@#
    #phosphatidylserine       =

    dna_bb = "P OP1 OP2 O5' O3'", "C5' O4' C4'", "C3' O3' C2' C1'"

    # This is the mapping dictionary
    # For each residue it returns a list, each element of which
    # lists the atom names to be mapped to the corresponding bead.
    # The order should be the standard order of the coarse grained
    # beads for the residue. Only atom names matching with those
    # present in the list of atoms for the residue will be used
    # to determine the bead position. This adds flexibility to the
    # approach, as a single definition can be used for different
    # states of a residue (e.g., GLU/GLUH).
    # For convenience, the list can be specified as a set of strings,
    # converted into a list of lists by 'FUNC.nsplit' defined above.
    mapping = {
        "ALA":
        FUNC.nsplit(bb + " CB"),
        "CYS":
        FUNC.nsplit(bb, "CB SG"),
        "ASP":
        FUNC.nsplit(bb, "CB CG OD1 OD2"),
        "GLU":
        FUNC.nsplit(bb, "CB CG CD OE1 OE2"),
        "PHE":
        FUNC.nsplit(bb, "CB CG CD1 HD1", "CD2 HD2 CE2 HE2", "CE1 HE1 CZ HZ"),
        "GLY":
        FUNC.nsplit(bb),
        "HIS":
        FUNC.nsplit(bb, "CB CG", "CD2 HD2 NE2 HE2", "ND1 HD1 CE1 HE1"),
        "HIH":
        FUNC.nsplit(bb, "CB CG", "CD2 HD2 NE2 HE2",
                    "ND1 HD1 CE1 HE1"),  # Charged Histidine.
        "ILE":
        FUNC.nsplit(bb, "CB CG1 CG2 CD CD1"),
        "LYS":
        FUNC.nsplit(bb, "CB CG CD", "CE NZ HZ1 HZ2 HZ3"),
        "LEU":
        FUNC.nsplit(bb, "CB CG CD1 CD2"),
        "MET":
        FUNC.nsplit(bb, "CB CG SD CE"),
        "ASN":
        FUNC.nsplit(bb, "CB CG ND1 ND2 OD1 OD2 HD11 HD12 HD21 HD22"),
        "PRO":
        FUNC.nsplit(bb, "CB CG CD"),
        "HYP":
        FUNC.nsplit(bb, "CB CG CD OD"),
        "GLN":
        FUNC.nsplit(bb, "CB CG CD OE1 OE2 NE1 NE2 HE11 HE12 HE21 HE22"),
        "ARG":
        FUNC.nsplit(bb, "CB CG CD", "NE HE CZ NH1 NH2 HH11 HH12 HH21 HH22"),
        "SER":
        FUNC.nsplit(bb, "CB OG HG"),
        "THR":
        FUNC.nsplit(bb, "CB OG1 HG1 CG2"),
        "VAL":
        FUNC.nsplit(bb, "CB CG1 CG2"),
        "TRP":
        FUNC.nsplit(bb, "CB CG CD2", "CD1 HD1 NE1 HE1 CE2", "CE3 HE3 CZ3 HZ3",
                    "CZ2 HZ2 CH2 HH2"),
        "TYR":
        FUNC.nsplit(bb, "CB CG CD1 HD1", "CD2 HD2 CE2 HE2",
                    "CE1 HE1 CZ OH HH"),
        "POPE":
        phosphatydilethanolamine + palmitoyl1 + oleyl2,
        "DOPE":
        phosphatydilethanolamine + oleyl1 + oleyl2,
        "DPPE":
        phosphatydilethanolamine + palmitoyl1 + palmitoyl2,
        "POPG":
        phosphatidylglycerol + palmitoyl1 + oleyl2,
        "DOPG":
        phosphatidylglycerol + oleyl1 + oleyl2,
        "DPPG":
        phosphatidylglycerol + palmitoyl1 + palmitoyl2,
        "DA":
        FUNC.nsplit("P OP1 OP2 O5' O3' O1P O2P", "C5' O4' C4'", "C3' C2' C1'",
                    "N9 C4", "C8 N7 C5", "C6 N6 N1", "C2 N3"),
        "DG":
        FUNC.nsplit("P OP1 OP2 O5' O3' O1P O2P", "C5' O4' C4'", "C3' C2' C1'",
                    "N9 C4", "C8 N7 C5", "C6 O6 N1", "C2 N2 N3"),
        "DC":
        FUNC.nsplit("P OP1 OP2 O5' O3' O1P O2P", "C5' O4' C4'", "C3' C2' C1'",
                    "N1 C6", "C5 C4 N4", "N3 C2 O2"),
        "DT":
        FUNC.nsplit("P OP1 OP2 O5' O3' O1P O2P", "C5' O4' C4'", "C3' C2' C1'",
                    "N1 C6", "C5 C4 O4 C7 C5M", "N3 C2 O2"),
    }

    # Generic names for side chain beads
    residue_bead_names = FUNC.spl("BB SC1 SC2 SC3 SC4")
    # Generic names for DNA beads
    residue_bead_names_dna = FUNC.spl("BB1 BB2 BB3 SC1 SC2 SC3 SC4")

    # This dictionary contains the bead names for all residues,
    # following the order in 'mapping'
    names = {
        "POPE": "NH3 PO4 GL1 GL2 C1A C2A C3A C4A C1B C2B D3B C4B C5B".split(),
        "POPG": "GLC PO4 GL1 GL2 C1A C2A C3A C4A C1B C2B D3B C4B C5B".split()
    }
    # Add default bead names for all amino acids
    names.update([(i, ("BB", "SC1", "SC2", "SC3", "SC4")) for i in AA3])

    # Add the default bead names for all DNA nucleic acids
    names.update([(i, ("BB1", "BB2", "BB3", "SC1", "SC2", "SC3", "SC4"))
                  for i in nucleic])

    # This dictionary allows determining four letter residue names
    # for ones specified with three letters, e.g., resulting from
    # truncation to adhere to the PDB format.
    # Each entry returns a prototypical test, given as a string,
    # and the residue name to be applied if eval(test) is True.
    # This is particularly handy to determine lipid types.
    # The test assumes there is a local or global array 'atoms'
    # containing the atom names of the residue in correct order.
    restest = {
        "POP": [('atoms[0] == "CA"', "POPG"), ('atoms[0] == "N"', "POPE")]
    }

    # Crude mass for weighted average. No consideration of united atoms.
    # This will probably give only minor deviations, while also giving less headache
    mass = {'H': 1, 'C': 12, 'N': 14, 'O': 16, 'S': 32, 'P': 31, 'M': 0}