Ejemplo n.º 1
0
def chain2pos_scan_str(chain, pdb, mutation_set='a'):
  """
  Takes a chain ID and a model.PDBFile object, returns a string
  suitable as the PositionScan line for FoldX.
  """
  parser = PDBParser(PERMISSIVE=1)
  pdbfn = pdb.fullpath()
  struct = parser.get_structure(pdb.uuid, pdbfn)[0]
  #chains = pdb_extract_chain_seqs(struct)
  
  chainlist = Selection.unfold_entities(struct, 'C')
  
  position_scan_str = ''
  for c in chainlist:
    if c.id == chain:
      for r in c:
        try:
          aa = three_to_one(r.get_resname())
          resnum = r.id[1]
          position_scan_str += '%s%s%i%s,' % (aa, chain, resnum, mutation_set)
        except:
          # non-native amino acid or water
          pass


  position_scan_str = position_scan_str[:-1]
  
  return position_scan_str
Ejemplo n.º 2
0
 def create_seqrecord(sequence, name):
     sequence_string = ""
     for aa in sequence:
         if aa is None:
             symbol = "-"
         else:
             try:
                 symbol = three_to_one(aa.get_resname())
             except:
                 symbol = "?"
         sequence_string += symbol
     return SeqRecord(Seq(sequence_string, generic_protein), id=name)
Ejemplo n.º 3
0
def getResidueStrings(structure):
    seqs = []
    for model in structure:
        for ch in model.get_chains():
            seq = ''
            for residue in model.get_residues():
                resname = residue.get_resname()
                if is_aa(resname, standard=True):
                    seq += three_to_one(resname)
                elif resname in {'HIE', 'HID'}:
                    seq += 'H'
                elif resname in {'CYX', 'CYM'}:
                    seq += 'C'
                else:
                    seq += 'X'
            seqs.append(seq)
    return seqs
Ejemplo n.º 4
0
Archivo: itp.py Proyecto: zyxue/pybin
def print_martini_dihedrals(dihedrals, atoms, params):
    """
    atoms: the id:resname dict created by read_atoms
    params should be in the form:
    params = {
        # i.e. phase angle, force constant, multiplicity
        'GVPG': [[phi1, k1, n1], [ph2, k2, n2], [phi3, k3, n3] ... ],
        'VPGV': [[phi1, k1, n1], [ph2, k2, n2], [phi3, k3, n3] ... ],
        ....
        }
    Since MARTINI uses proper dih. function type 1
    """
    for d in dihedrals:
        dih_name = ''.join([three_to_one(atoms[di]) for di in d])
        for (phii, ki, ni) in params[dih_name]:
            print "{0:5d}{1:6d}{2:6d}{3:6d}{4:6d}{5:12.6f}{6:12.6f}{7:6d}".format(
                d[0], d[1], d[2], d[3], 1,
                phii, ki, ni)
Ejemplo n.º 5
0
print("parsing PDB")

PDB_list = glob.glob("../../../../PDBMining/*/*.ent")

p = PDBParser()
secondaryStruct = []
Valid = [False for _ in proteins]
PDBNames = []
for f in PDB_list:
    name = os.path.splitext(basename(f))[0]
    PDBNames.append(name)
    struct = p.get_structure(name,f)
    res_list = Selection.unfold_entities(struct, 'R')
    try:
        seq = [three_to_one(a.get_resname()).lower() for a in res_list]
    except (KeyError):
        seq = []
    try:
        if seq == [a for a in proteins[nameInd[name]]]:
            Valid[nameInd[name]] = True
    except KeyError:
        pass
    struct_dssp = p.get_structure(name,f)
    try:
        dssp = DSSP(struct_dssp[0], f)
    except Exception:
        Valid[nameInd[name]] = False
    a_keys = list(dssp.keys())
    sec = [dssp[a][2] for a in a_keys]
    try:
Ejemplo n.º 6
0
    def __init__(self,
                 model,
                 in_file,
                 dssp="dssp",
                 acc_array="Sander",
                 file_type='PDB'):
        """Create a DSSP object.

        Parameters
        ----------
        model : Model
            The first model of the structure
        in_file : string
            Either a PDB file or a DSSP file.
        dssp : string
            The dssp executable (ie. the argument to os.system)
        acc_array : string
            Accessible surface area (ASA) from either Miller et al. (1987),
            Sander & Rost (1994), or Wilke: Tien et al. 2013, as string
            Sander/Wilke/Miller. Defaults to Sander.
        file_type: string
            File type switch, either PDB or DSSP with PDB as default.

        """
        self.residue_max_acc = residue_max_acc[acc_array]

        # create DSSP dictionary
        file_type = file_type.upper()
        assert (file_type in ['PDB', 'DSSP'])
        # If the input file is a PDB file run DSSP and parse output:
        if file_type == 'PDB':
            # Newer versions of DSSP program call the binary 'mkdssp', so
            # calling 'dssp' will not work in some operating systems
            # (Debian distribution of DSSP includes a symlink for 'dssp' argument)
            try:
                dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
            except OSError:  # TODO: Use FileNotFoundError once drop Python 2
                if dssp == 'dssp':
                    dssp = 'mkdssp'
                elif dssp == 'mkdssp':
                    dssp = 'dssp'
                else:
                    raise
            dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
        # If the input file is a DSSP file just parse it directly:
        elif file_type == 'DSSP':
            dssp_dict, dssp_keys = make_dssp_dict(in_file)

        dssp_map = {}
        dssp_list = []

        def resid2code(res_id):
            """Serialize a residue's resseq and icode for easy comparison."""
            return '%s%s' % (res_id[1], res_id[2])

        # Now create a dictionary that maps Residue objects to
        # secondary structure and accessibility, and a list of
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            chain_id, res_id = key
            chain = model[chain_id]
            try:
                res = chain[res_id]
            except KeyError:
                # In DSSP, HET field is not considered in residue identifier.
                # Thus HETATM records may cause unnecessary exceptions.
                # (See 3jui chain A res 593.)
                # Try the lookup again with all HETATM other than water
                res_seq_icode = resid2code(res_id)
                for r in chain:
                    if r.id[0] not in (' ', 'W'):
                        # Compare resseq + icode
                        if resid2code(r.id) == res_seq_icode:
                            # Found a matching residue
                            res = r
                            break
                else:
                    raise KeyError(res_id)

            # For disordered residues of point mutations, Biopython uses the
            # last one as default, But DSSP takes the first one (alternative
            # location is blank, A or 1). See 1h9h chain E resi 22.
            # Here we select the res in which all atoms have altloc blank, A or
            # 1. If no such residues are found, simply use the first one appears
            # (as DSSP does).
            if res.is_disordered() == 2:
                for rk in res.disordered_get_id_list():
                    # All atoms in the disordered residue should have the same
                    # altloc, so it suffices to check the altloc of the first
                    # atom.
                    altloc = res.child_dict[rk].get_list()[0].get_altloc()
                    if altloc in tuple('A1 '):
                        res.disordered_select(rk)
                        break
                else:
                    # Simply select the first one
                    res.disordered_select(res.disordered_get_id_list()[0])

            # Sometimes point mutations are put into HETATM and ATOM with altloc
            # 'A' and 'B'.
            # See 3piu chain A residue 273:
            #   <Residue LLP het=H_LLP resseq=273 icode= >
            #   <Residue LYS het=  resseq=273 icode= >
            # DSSP uses the HETATM LLP as it has altloc 'A'
            # We check the altloc code here.
            elif res.is_disordered() == 1:
                # Check altloc of all atoms in the DisorderedResidue. If it
                # contains blank, A or 1, then use it.  Otherwise, look for HET
                # residues of the same seq+icode.  If not such HET residues are
                # found, just accept the current one.
                altlocs = set(a.get_altloc() for a in res.get_unpacked_list())
                if altlocs.isdisjoint('A1 '):
                    # Try again with all HETATM other than water
                    res_seq_icode = resid2code(res_id)
                    for r in chain:
                        if r.id[0] not in (' ', 'W'):
                            if resid2code(r.id) == res_seq_icode and \
                               r.get_list()[0].get_altloc() in tuple('A1 '):
                                res = r
                                break

            (aa, ss, acc, phi, psi, dssp_index, NH_O_1_relidx, NH_O_1_energy,
             O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy,
             O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key]

            res.xtra["SS_DSSP"] = ss
            res.xtra["EXP_DSSP_ASA"] = acc
            res.xtra["PHI_DSSP"] = phi
            res.xtra["PSI_DSSP"] = psi
            res.xtra["DSSP_INDEX"] = dssp_index
            res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx
            res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy
            res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx
            res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy
            res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx
            res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy
            res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx
            res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy

            # Relative accessibility
            resname = res.get_resname()
            try:
                rel_acc = acc / self.residue_max_acc[resname]
            except KeyError:
                # Invalid value for resname
                rel_acc = 'NA'
            else:
                if rel_acc > 1.0:
                    rel_acc = 1.0
            res.xtra["EXP_DSSP_RASA"] = rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            # NB: DSSP uses X often
            try:
                resname = three_to_one(resname)
            except KeyError:
                resname = 'X'
            if resname == "C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa = 'C'
            # Take care of HETATM again
            if (resname != aa) and (res.id[0] == ' ' or aa != 'X'):
                raise PDBException("Structure/DSSP mismatch at %s" % res)

            dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi, NH_O_1_relidx,
                         NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy,
                         NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx,
                         O_NH_2_energy)

            dssp_map[key] = dssp_vals
            dssp_list.append(dssp_vals)

        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys,
                                            dssp_list)
Ejemplo n.º 7
0
def get_one_letter(list_of_three):
    fasta_one=[]
    for x in list_of_three:
       x=three_to_one(x)
       fasta_one.append(x)
    return fasta_one
Ejemplo n.º 8
0
            print("You should not be here...")
        rowNum += 1
print("Number of proteins: {}".format(len(threeline_data)))

print(bridges.keys())
print("Number of total bridges: {}".format(len(bridges["all"])))
print("Number of membrane bridges: {}".format(len(bridges["mems"])))
print("Number of local bridges: {}".format(len(bridges["local"])))
r = 0
w = 0
index = 0
for key, bridge in bridges["local"].items():
    if key in threeline_data:
        seq = threeline_data[key][0]
        for b in bridge:
            first_aa = three_to_one(b[1])
            first_ix = b[0]
            second_aa = three_to_one(b[3])
            second_ix = b[2]
            if first_ix > len(seq):
                index += 1
                continue
            if first_aa == seq[first_ix - 1]:
                r += 1
            else:
                print(key)
                print(seq)
                print(b)
                w += 1

            # print(seq[first_ix-1], first_aa)
Ejemplo n.º 9
0
# parsing PDB files

PDB_list = glob.glob("../../../../PDBMining/*/*.ent")

p = PDBParser()

Valid = [False for _ in proteins]
PDBNames = []
for f in PDB_list:
    name = os.path.splitext(basename(f))[0]
    PDBNames.append(name)
    struct = p.get_structure(name,f)
    res_list = Selection.unfold_entities(struct, 'R')
    try:
        seq = [three_to_one(a.get_resname()).lower() for a in res_list]
    except (KeyError):
        seq = []
    try:
        if seq == proteins[nameInd[name]]:
            Valid[nameInd[name]] = True
    except KeyError:
        pass

PDBInd = dict((c, i) for i, c in enumerate(PDBNames))

occurences = [-1 for _ in proteins]

ind = -1
for rec in record:
    ind +=1
Ejemplo n.º 10
0
    def computeOneFileFromPDB(self, fileName, chainType):
        '''
      Gets the seq to struct mapping for a given pdb file
      @param fileName: str. fname to pdb file
      @param chainType: str. "l" for ligand and "r" for receptor
    '''
        self.seqsDict[chainType] = {}

        if not (fileName.endswith("_r_u.pdb")
                or fileName.endswith("_l_u.pdb")):
            prefixAndChainType = (
                os.path.split(fileName)[-1]).split(".pdb")[0] + "_" + chainType
        else:
            prefixAndChainType = (
                os.path.split(fileName)[-1]).split("_u.pdb")[0]
##    print(fileName)
        struct = self.parser.get_structure(prefixAndChainType, fileName)
        for chain in struct[0]:
            chainId = chain.get_id()
            if chainId == " ":
                chainId = "*"
            nResStandard = sum(
                [1 for res in chain if is_aa(res, standard=True)])
            resList = [
                res for res in sorted(chain.child_list,
                                      key=lambda x: x.get_id()[1:])
                if is_aa(res, standard=False)
            ]  #New version feature
            nResAll = len(resList)
            #      print(chainId, len(resList))
            if nResStandard < int(0.5 * nResAll):
                continue  #skip if most residues are not standard
            if len(
                    resList
            ) > SMALL_CHAINS_LIMIT:  #Too small chains will not be considered
                sequence = []
                resIds = []
                for i, res in enumerate(resList):
                    try:
                        letter = three_to_one(res.resname)
                    except KeyError:  # New version feature
                        print("Exception", res)
                        letter = "X"
                        if i == (nResAll - 1):
                            break  #This case is for TCGR....TLRX where X is GDP or other molecule
                    resId = res.get_full_id()[3]
                    sequence.append(letter)
                    ##          print(sequence[-1])
                    resIds.append("%d;%s;%s" % (i, letter, resId))
                    self.seqToStruct[(chainType, chainId, i)] = resId
                    self.structToSeq[(chainType, chainId, resId)] = i
                sequence = "".join(sequence)
                outNameFasta = os.path.join(
                    self.fastaOutDir,
                    prefixAndChainType + "_" + chainId + "_u.fasta")
                f = open(outNameFasta, "w")
                f.write(">" + prefixAndChainType + "_" + chainId + "\n" +
                        sequence)
                f.close()

                resIds = "\n".join(resIds)

                outName = os.path.join(
                    self.seqToStructDir,
                    prefixAndChainType + "_" + chainId + "_u.seqStruMap")
                self.seqToStructFnames[(chainType,
                                        chainId)] = (outName,
                                                     prefixAndChainType)
                f = open(outName, "w")
                f.write(">" + prefixAndChainType + "_" + chainId + "\n" +
                        resIds)
                f.close()

                self.seqsDict[chainType][chainId] = (sequence, outNameFasta)
Ejemplo n.º 11
0
def _add_flanking_seq_fragments(ddg_data_dict: Dict, dataset: str,
                                pdb_filename: str):

    if "left_flank" not in ddg_data_dict[dataset].columns:
        ddg_data_dict[dataset]["left_flank"] = np.nan
    if "wt_restype" not in ddg_data_dict[dataset].columns:
        ddg_data_dict[dataset]["wt_restype"] = np.nan
    if "mt_restype" not in ddg_data_dict[dataset].columns:
        ddg_data_dict[dataset]["mt_restype"] = np.nan
    if "right_flank" not in ddg_data_dict[dataset].columns:
        ddg_data_dict[dataset]["right_flank"] = np.nan

    pdbid = pdb_filename.split(r"/")[-1][0:4].upper()

    # # Load SEQRES
    # chain_id_to_seq_res = {}
    # for record in SeqIO.parse(pdb_filename, "pdb-seqres"):
    #     seq_res = str(record.seq)
    #     chain_id = record.id[-1]
    #     chain_id_to_seq_res[chain_id] = seq_res
    #     print(record.annotations)

    # # Load PDBSEQ
    # from Bio.SeqIO.PdbIO import PdbAtomIterator
    # chain_id_to_pdb_seq = {}
    # with open(pdb_filename) as handle:
    #     for record in PdbAtomIterator(handle):
    #         pdb_seq = str(record.seq)
    #         chain_id = record.id[-1]
    #         chain_id_to_pdb_seq[chain_id] = pdb_seq

    from Bio.PDB.PDBParser import PDBParser

    p = PDBParser()
    model_first = p.get_structure(pdbid, pdb_filename)[0]
    chain_id_to_pdb_seq = {}
    chain_id_to_pdb_residue_numbers = {}
    for chain in model_first:
        pdb_seq = []
        pdb_residue_numbers = []
        for residue in chain.get_residues():
            if residue.resname.strip() in [
                    index_to_three(i) for i in range(20)
            ]:
                pdb_residue_numbers.append(residue.id[1])
                pdb_seq.append(three_to_one(residue.resname.strip()))
        chain_id_to_pdb_seq[chain.id] = "".join(pdb_seq)
        chain_id_to_pdb_residue_numbers[chain.id] = pdb_residue_numbers

    for idx, row in ddg_data_dict[dataset].iterrows():
        if row["pdbid"] == pdbid:
            residue_number = int(row["variant"][1:-1])
            chain_id = row["chainid"]

            pdb_sequence = chain_id_to_pdb_seq[chain_id]
            resid = chain_id_to_pdb_residue_numbers[chain_id].index(
                residue_number)

            if row["variant"][0] == pdb_sequence[resid]:
                ddg_data_dict[dataset].loc[idx,
                                           "left_flank"] = _trim_left_flank(
                                               pdb_sequence[:resid])
                ddg_data_dict[dataset].loc[idx,
                                           "right_flank"] = _trim_right_flank(
                                               pdb_sequence[resid + 1:])
                ddg_data_dict[dataset].loc[idx,
                                           "wt_restype"] = row["variant"][0]
                ddg_data_dict[dataset].loc[idx,
                                           "mt_restype"] = row["variant"][-1]
            else:
                print("WRONG", row[["pdbid", "variant"]])
Ejemplo n.º 12
0
from Bio.PDB import PDBParser, PDBIO
from Bio.PDB.Polypeptide import is_aa, three_to_one
import sys

path = sys.argv[1]
code = path[:-4]

io = PDBIO()
pdb = PDBParser().get_structure(code, path)

for chain in pdb.get_chains():
    io.set_structure(chain)
    io.save(pdb.get_id() + "_" + chain.get_id() + ".pdb")
    seq = list()
    out = open(code + "_" + chain.get_id() + '.fasta', 'w')
    for residue in chain:
        if is_aa(residue.get_resname(), standard=True):
            seq.append(three_to_one(residue.get_resname()))
        else:
            seq.append("X")

## This line is used to display the sequence from each chain
    print(">Chain_" + chain.get_id() + "\n" + str("".join(seq)), file=out)
    out.close()
DSSPsequence = tplContent[5].split('= ')[-1]

name = os.path.basename(tplfile).split('.')[0]
if len(name)!=5:
	print 'the protein target name is incorrect. It must be composed of PDB ID and chain letter'
	exit(-1)

pdbfile = sys.argv[2]
parser = PDBParser()
structure = parser.get_structure(name, pdbfile)

chain=name[4]
residues = structure[0][chain].get_residues()
residueList = [ r for r in residues if is_aa(r) ]
#numResidues = len(residueList)
pdbseq = ''.join( [ three_to_one(r.get_resname()) for r in residueList ] )

#print pdbseq

### check if DSSPsequence is equivalent to pdbseq
validDSSPseq = DSSPsequence.replace('-', '')

if validDSSPseq != pdbseq:
	print 'Inconsistency between DSSPsequence in ', tplfile, ' and pdbseq in ', pdbfile
	print 'pdbseq: ', pdbseq
	print 'DSPseq: ', validDSSPseq
	diffs = [i for i in xrange(min(len(pdbseq), len(validDSSPseq) ) ) if pdbseq[i] != validDSSPseq[i] ]
	print 'inconsistent positions: ', diffs

	exit(-1)
Ejemplo n.º 14
0
        pos = frag[9]
        score = "%.2f" % (math.exp((float(frag[12].rstrip())) / 1000))
        if float(score) < 0.1 or math.isnan(float(score)):
            score = "1.0"

        try:
            pp = get_pp(
                pdb, chain, start, length, seq
            )  # Polypeptide pp now contains atomic information for fragment.
        except:
            print >> stderr, "E: failed to process", pdb, chain, start  # Cannot open file.
            fails += 1
        else:
            str1 = ""
            for res in pp[1:length + 1]:
                str1 += three_to_one(res.get_resname())
            if str(seq[0:length]) == str(str1):
                #      F   0   P    0    L   9        S   1.511    =   1fcd    A 116 R 0
                print "F ", s, " P ", pos, " L ", str(
                    length), " S ", score, " = ", pdb, " ", chain, "    ", str(
                        start)  #
                try:
                    print_pp(pp, offset=1, lng=length)
                    s += 1
                    if length > 6:
                        print "F ", s, " P ", pos, " L ", str(
                            6
                        ), " S ", score, " = ", pdb, " ", chain, "    ", str(
                            start)
                        print_pp(pp, offset=1, lng=6)
                        s += 1
def convert3to1(s):
    try:
        return three_to_one(s)
    except KeyError:
        return "X"
Ejemplo n.º 16
0
 def getSequenceFromChain(self, modelID, chainID):
     self.checkRead()
     seq = list()
     for model in self.structure:
         if model.id == modelID:
             for chain in model:
                 if str(chain.id) == chainID:
                     if len(chain.get_unpacked_list()[0].resname) == 1:
                         print("Your sequence is a nucleotide sequence ("
                               "RNA)\n")
                         # alphabet = IUPAC.IUPACAmbiguousRNA._upper()
                         for residue in chain:
                             # Check if the residue belongs to the
                             # standard RNA and add those residues to the
                             # seq
                             if residue.get_resname() in [
                                     'A', 'C', 'G', 'U'
                             ]:
                                 seq.append(residue.get_resname())
                             else:
                                 seq.append("X")
                     elif len(chain.get_unpacked_list()[0].resname) == 2:
                         print("Your sequence is a nucleotide sequence ("
                               "DNA)\n")
                         # alphabet = IUPAC.ExtendedIUPACDNA._upper()
                         for residue in chain:
                             # Check if the residue belongs to the
                             # standard DNA and add those residues to the
                             # seq
                             if residue.get_resname()[1] in [
                                     'A', 'C', 'G', 'T'
                             ]:
                                 seq.append(residue.get_resname()[1])
                             else:
                                 seq.append("X")
                     elif len(chain.get_unpacked_list()[0].resname) == 3:
                         counter = 0
                         for residue in chain:
                             if is_aa(residue.get_resname(), standard=True):
                                 # alphabet = IUPAC.ExtendedIUPACProtein._upper()
                                 # The test checks if the amino acid
                                 # is one of the 20 standard amino acids
                                 # Some proteins have "UNK" or "XXX", or other symbols
                                 # for missing or unknown residues
                                 seq.append(
                                     three_to_one(residue.get_resname()))
                                 counter += 1
                             else:
                                 seq.append("X")
                         if counter != 0:  # aminoacids
                             print("Your sequence is an aminoacid sequence")
                         else:  # HETAM
                             print("Your sequence is a HETAM sequence")
                             for residue in chain:
                                 seq.append(residue.get_resname())
                     while seq[-1] == "X":
                         del seq[-1]
                     while seq[0] == "X":
                         del seq[0]
                     # return Seq(str(''.join(seq)), alphabet=alphabet)
                     return Seq(str(''.join(seq)))
Ejemplo n.º 17
0
    def getModelsChains(self):
        """
        given an atomic structure returns two dictionaries:
            (1) for all models and respective chains (chainID and length of residues)
            (2) for each chain list of residues
        """
        self.checkRead()
        listOfChains = OrderedDict()
        listOfResidues = OrderedDict()

        for model in self.structure:
            chainDicLength = OrderedDict()
            chainDicFirstResidue = OrderedDict()
            for chain in model:
                if len(chain.get_unpacked_list()
                       [0].resname.strip()) == 1:  # RNA
                    seq = list()
                    seq_number = list()
                    for residue in chain:
                        if residue.get_resname() in ['A', 'C', 'G', 'U']:
                            seq.append(residue.get_resname())
                        else:
                            seq.append("X")
                        seq_number.append(
                            (residue.get_id()[1], residue.get_resname()))
                elif len(chain.get_unpacked_list()
                         [0].resname.strip()) == 2:  # DNA
                    seq = list()
                    seq_number = list()
                    for residue in chain:
                        if residue.get_resname()[1] in ['A', 'C', 'G', 'T']:
                            seq.append(residue.get_resname()[1])
                        else:
                            seq.append("X")
                        seq_number.append(
                            (residue.get_id()[1], residue.get_resname()))
                elif len(chain.get_unpacked_list()
                         [0].resname.strip()) == 3:  # Protein
                    seq = list()
                    seq_number = list()
                    counter = 0
                    for residue in chain:
                        if is_aa(residue.get_resname(),
                                 standard=True):  # aminoacids
                            seq.append(three_to_one(residue.get_resname()))
                            counter += 1
                        else:
                            seq.append("X")
                        seq_number.append(
                            (residue.get_id()[1], residue.get_resname()))
                    if counter == 0:  # HETAM
                        for residue in chain:
                            seq.append(residue.get_resname())
                while seq[-1] == "X":
                    del seq[-1]
                while seq[0] == "X":
                    del seq[0]
                chainDicLength[chain.id] = len(seq)
                chainDicFirstResidue[chain.id] = seq_number
            listOfChains[model.id] = chainDicLength
            listOfResidues[model.id] = chainDicFirstResidue

        return listOfChains, listOfResidues
Ejemplo n.º 18
0
def get_res_type(res_list, residue):
    return res_type_map[three_to_one(residue.get_resname())]
Ejemplo n.º 19
0
def to_label(a):
    from Bio.PDB.Polypeptide import three_to_one
    if (a == 'HID') | (a == 'HIP') | (a == 'HIE'):
        a = 'HIS'
    return "%s" % (three_to_one(a))
Ejemplo n.º 20
0
 def resn_one(self):
     return three_to_one(self.resn)
Ejemplo n.º 21
0
def pdb_to_casp(rr_name,
                pdb_file=False,
                mmCIF_file=False,
                cutoff=16,
                confidence=0.99,
                std=1,
                method="From_structure"):
    """
    Convert a pdb/mcif to CASP rr-format
    """

    if pdb_file:
        from Bio.PDB.PDBParser import PDBParser
        bio_parser = PDBParser(PERMISSIVE=1)
        structure_file = pdb_file
        structure_id = pdb_file[:-4]
    elif mmCIF_file:
        from Bio.PDB.MMCIFParser import MMCIFParser
        bio_parser = MMCIFParser()
        structure_file = mmCIF_file
        structure_id = mmCIF_file[:-4]
    else:
        print("No file given: one pdb or one mmCIF file has to be definied")
        sys.exit()

    line = "{i} {j} 0 {m:.5f} {c:.2f} {sd:.4f}\n"
    first_chain = ''
    chain_length = defaultdict()
    # Load structure
    structure = bio_parser.get_structure(structure_id, structure_file)

    # Get residues and length of protein
    residues = ""
    c_len = 0
    for chain in structure[0]:
        if not first_chain:
            first_chain = chain
        for residue1 in structure[0][chain.id]:
            residue1
            if not is_aa(residue1):
                continue
            c_len += 1
            residues += three_to_one(residue1.get_resname())
        chain_length[chain] = c_len

    plen = len(residues)

    header = '\n'.join(
        ("PFRMAT RR", "TARGET {}".format(structure_id), "AUTHOR pyconsFold",
         "METHOD {}".format(method), "MODEL 1", residues + '\n'))

    minvalue = 0.36
    dist_mat = np.full((plen, plen, 37), minvalue / 36)

    # Iterate over all residues and calculate distances
    i = 1
    j = 1
    content = [header]
    for chain in structure[0]:
        for residue1 in structure[0][chain.id]:
            # Only use real atoms, not HET or water
            if not is_aa(residue1):
                continue

            # If the residue lacks CB (Glycine etc), create a virtual
            if residue1.has_id('CB'):
                c1B = residue1['CB'].get_vector()
            else:
                c1B = _virtual_cb_vector(residue1)

            j = 1
            for chain in structure[0]:
                for residue2 in structure[0][chain.id]:
                    if not is_aa(residue2):
                        continue
                    if i == j:
                        j += 1
                        continue

                    if i > j:
                        j += 1
                        continue
                    # If the residue lacks CB (Glycine etc), create a virtual
                    if residue2.has_id('CB'):
                        c2B = residue2['CB'].get_vector()
                    else:
                        c2B = _virtual_cb_vector(residue2)
                    ###############################################
                    dist = (c2B - c1B).norm()

                    if dist < cutoff:
                        content.append(
                            line.format(i=i, j=j, m=dist, c=confidence,
                                        sd=std))
                    j += 1
            i += 1

    content.append("END\n")
    with open(rr_name, 'w') as contacts_handle:
        contacts_handle.write(''.join(content))
Ejemplo n.º 22
0
def to_label(a):
    from  Bio.PDB.Polypeptide import three_to_one
    if a.rId.serial%5==0:
        return "%s\n%d"%(three_to_one(a.rName.str), a.rId.serial)
    else:
        return "%s"%(three_to_one(a.rName.str))
Ejemplo n.º 23
0
    def launch(self):
        """Launches the pipeline to build a box around a selection of residues
        """
        #out_log, err_log = fu.get_logs(path=self.path, mutation=self.mutation, step=self.step)

        ##
        ## Loading and parsing reference PDB structure

        parser = Bio.PDB.PDBParser()

        # Parse input structure
        print "Loading input PDB structure %s..." % self.input_pdb_path
        structure_name = os.path.basename(self.input_pdb_path.split('.')[0])
        structPDB      = parser.get_structure(structure_name,self.input_pdb_path)[0]
        structPDB_seq = [three_to_one(res.get_resname()) for res in structPDB.get_residues() if is_aa(res.get_resname(), standard=True)]
        print structPDB_seq

        # Parse residue structure
        print "Loading residue PDB selection %s..." % self.resid_pdb_path
        resid_name   = os.path.basename(self.resid_pdb_path.split('.')[0])
        residPDB     = parser.get_structure(resid_name,self.resid_pdb_path)[0]
        residPDB_seq = [three_to_one(res.get_resname()) for res in residPDB.get_residues() if is_aa(res.get_resname(), standard=True)]
        print residPDB_seq

        ##
        ## Mapping residue structure into input structure

        # Listing residues to be selected from the residue structure
        # residPDB_res_list = []
        # p = re.compile('H_|W_')
        # for residPDB_res in residPDB.get_residues():
        #     m_het = p.match(residPDB_res.get_id()[0])
        #     if not m_het:
        #         residPDB_res_list.append(residPDB_res.get_id())
        #         binding_site_CA_list.append(residPDB_res['CA'])
        

        # Aligning
        # alignments = pairwise2.align.localxx("".join(structPDB_seq), "".join(residPDB_seq))
        # print alignments[0]
        # mappings = Bio.PDB.StructureAlignment(alignments[0], structPDB, residPDB).get_maps()
        # print mappings
        # # Mapping selected residues to input structure
        # selection_res_list   = []
        # selection_atoms_num  = 0
        # for struct_chain in structPDB:
        #     for struct_res in struct_chain:
        #         if struct_res.get_id() in residPDB_res_list:
        #             selection_res_list.append(struct_res)
        #             selection_atoms_num += len(struct_res.get_list())

         # Get AA sequence
        clusterPDB_seq = self.__get_pdb_sequence(clusterPDB)

        # Pairwise align
        aln, residue_map = self.__align_sequences(structPDB_seq,clusterPDB_seq)
        print residue_map
        print "    Matching residues to input PDB structure. Alignment is:\n    %s" % aln[1]

        # Calculate (gapless) sequence identity
        seq_identity, gap_seq_identity = self.__calculate_alignment_identity(aln[0], aln[1])
        print "    Sequence identity (%%): %s" % seq_identity
        print "    Gap less identity (%%): %s" % gap_seq_identity


        ##
        ## Selecting aligned CA atoms from first model, first chain

        struct_atoms  = []
        cluster_atoms = []

        for struct_res in residue_map:
            try:
                cluster_atoms.append(clusterPDB[residue_map[struct_res]]['CA'])
                struct_atoms.append(structPDB[struct_res]['CA'])
            except KeyError:
                print "Cannot find CA atom for residue %s  (input PDB  %s)" % (structPDB[struct_res],struct_res)
                pass

        if len(cluster_atoms)==0:
            raise Exception('Cannot find CA atoms (1st model, 1st chain) in cluster member {1} when aligning against {2}. Ignoring this member.'.format(clusterPDB_path,structure_name))
        else:
            print "    Superimposing %s aligned protein residues" % len(cluster_atoms)

        # Align against input structure
        si = Bio.PDB.Superimposer()
        si.set_atoms(struct_atoms, cluster_atoms)
        si.apply(clusterPDB.get_atoms())
        print "    RMSD: %s" %si.rms

        # Save transformed structure (and ligand)
        clusterPDB_ligand_aligned = clusterPDB[clusterPDB_ligand.get_id()]
        print "    Saving transformed ligand coordinates"

        clusterPDB_ligands_aligned.append(clusterPDB_ligand_aligned)

        if len(selection_res_list) == 0:
            raise Exception('Cannot match any of the residues listed in %s into %s' % (self.resid_pdb_path,self.input_pdb_path) )
        elif len(selection_res_list) !=  len(residPDB_res_list):
            warnings.warn('Cannot match all the residues listed in %s into %s. Found %s out of %s'  % (self.resid_pdb_path,self.input_pdb_path,len(selection_res_list),len(residPDB_res_list)))
        else:
            print "Selection residues successfully matched"


        ##
        ## Compute binding site box size

        # compute box center
        selection_box_center = numpy.sum(atom.coord for res in selection_res_list for atom in res.get_atoms()) / selection_atoms_num
        print "Binding site center (Amstrongs): %8.3f%8.3f%8.3f" % (selection_box_center[1],selection_box_center[1],selection_box_center[2])

        # compute box size
        selection_coords_max = numpy.amax([atom.coord for res in selection_res_list for atom in res.get_atoms()],axis=0)
        selection_box_size   = selection_coords_max - selection_box_center
        if self.offset:
            selection_box_size = [c + self.offset for c in selection_box_size]
        print "Binding site size (Amstrongs):   %8.3f%8.3f%8.3f" % (selection_box_size[0],selection_box_size[1],selection_box_size[2])

        vol = numpy.prod(selection_box_size) * 2**3
        print "Volume (cubic Amstrongs): %.0f" % vol

        # add box details as PDB remarks
        #remarks  = "REMARK 900\nREMARK 900 RELATED  ENTRIES\nREMARK 900 RELATED ID:%s CHAIN:%s\n" % (self.pdb_code,self.pdb_chain)
        remarks = "REMARK BOX CENTER:%8.3f%8.3f%8.3f" % (selection_box_center[1],selection_box_center[1],selection_box_center[2])
        remarks += " SIZE:%8.3f%8.3f%8.3f" % (selection_box_size[0],selection_box_size[1],selection_box_size[2])

        # add (optional) box coordinates as 8 ATOM records
        #selection_box_coords_txt  = self.get_box_coordinates(selection_box_center,selection_box_size)
        selection_box_coords_txt   = ""

        # write output pdb

        shutil.copy2(self.input_pdb_path, self.output_pdb_path)

        with open(self.output_pdb_path, 'r+') as f:
            content = f.read()
            if "END" in content:
                content = content.replace("END", selection_box_coords_txt + "END")
            else:
                content += selection_box_coords_txt
            f.seek(0, 0)
            f.write(remarks.rstrip('\r\n') + '\n' + content)


        print "Output PDB file (with box setting annotations): %s" % self.output_pdb_path
Ejemplo n.º 24
0
 if mem_len < min_mem_len:  # Only use membranes longer than 17
     continue    
 global_place = mem_data[0] + pdb_seq_offset[full_chain_id]
 mem_start = global_place + 1 
 mem_end = mem_start + mem_len
 ### bridge = [resi1, res1, resi2, res2, chain, dist]
 # print(mem)
 # print(mem_start, mem_end)
 for bridge in bridges:
     save_bridge = [bridge[0], bridge[1], bridge[2], bridge[3], bridge[4], bridge[5]]
     # print(mem[bridge[0]-mem_start], mem[bridge[2]-mem_start])
     # print(save_bridge)
     if (bridge[0] >= mem_start and bridge[0] <= mem_end) or (bridge[2] >= mem_start and bridge[2] <= mem_end):
         # print("Mem bridge")
         s = save_bridge[0]
         first_aa = three_to_one(save_bridge[1])
         e = save_bridge[2]
         second_aa = three_to_one(save_bridge[3])
         seq_first = int(save_bridge[0])-mem_start
         seq_second = int(save_bridge[2])-mem_start
         # print("**********************")
         # print(bridge)
         # print(mem_start, mem_end)
         if seq_first > -1 and seq_first < mem_len:
             if mem[seq_first] != first_aa:
                 print("Membrane bridge out of sync {}".format(full_chain_id))
                 keep_running = False
                 break
                 # print("ERROR! Membrane first")
                 # print(mem[seq_first], first_aa)
                 # print(full_chain_id, bridge, mem, mem_start)
Ejemplo n.º 25
0
    'Gly': 3.400,
    'His': 13.690,
    'Ile': 21.400,
    'Leu': 21.400,
    'Lys': 15.710,
    'Met': 16.250,
    'Phe': 19.800,
    'Pro': 17.430,
    'Ser': 9.470,
    'Thr': 15.770,
    'Trp': 21.670,
    'Tyr': 18.030,
    'Val': 21.570
}
bulkiness_one = {
    three_to_one(k.upper()): v
    for k, v in bulkiness_three.items()
}

_human_readable_pepstats = {
    'A_percent-biop': '% Ala',
    'C_percent-biop': '% Cys',
    'D_percent-biop': '% Asp',
    'E_percent-biop': '% Glu',
    'F_percent-biop': '% Phe',
    'G_percent-biop': '% Gly',
    'H_percent-biop': '% His',
    'I_percent-biop': '% Ile',
    'K_percent-biop': '% Lys',
    'L_percent-biop': '% Leu',
    'M_percent-biop': '% Met',
Ejemplo n.º 26
0
    return shannon_entropy_list

#START ACTUAL PROGRAM

############################################
#find match
############################################
#read in .pdb sequence
parser = PDBParser()
structure = parser.get_structure('', '1OTH.pdb')
header = parser.get_header()
trailer = parser.get_trailer()
pdbSequence = ''
for residue in structure[0]['A'].get_residues():
    if (residue.get_id()[0]==' '):
        residueName = three_to_one(residue.get_resname())
        pdbSequence += residueName
#check each fasta sequence for match to pdb sequence
bestMatch = ''
bestScore = 0
handle = open("uniprot-ornithine+transcarbamylase-2.fasta", "rU")
for record in SeqIO.parse(handle, "fasta") :
    foundMatch = False
    if foundMatch == False:
        tempFile = open("temp.fasta", "w")
        deleteContent(tempFile)
        tempFile.write(">sp|000000|FAKE HEADER OS=Fakus Faky GN=FAK PE=0 SV=0\n")
        tempFile.write(pdbSequence + "\n")
        tempFile.write(">sp|%s|FAKE HEADER OS=Fakus Faky GN=FAK PE=0 SV=0\n"%(record.name))
        for x in record.seq:
            tempFile.write(x)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('pdb_directory', action="store", type=str)
    inputs = parser.parse_args()
    #takes name of pdb file without the extention
    for pdb_file in glob.glob(inputs.pdb_directory + '*.pdb'):
        clean_pdb_file = pdb_file.replace('.pdb', '.clean.pdb')
        print('#######################')
        print('#######################{}'.format(pdb_file))
        if 'clean' in pdb_file:
            print('Will overwrite an existing clean pdb so am skipping')
            continue

        fasta_outfile_loc = pdb_file.replace('/PDBs/', '/wt_fastas/').replace(
            '.pdb', '.fasta')

        #Load and clean up pdb file
        cleanATOM(pdb_file)

        with open(clean_pdb_file, 'r') as infile:
            old_lines = infile.readlines()

        pdb_io = PDB.PDBIO()
        pdb_parser = PDB.PDBParser()
        structure = pdb_parser.get_structure(" ", clean_pdb_file)

        if len(structure) != 1:
            print(
                'THERE APPEARS TO BE MORE THAN ONE MODEL IN THIS STRUCTURE BEHAVIOR OF PRORAM IS UNKNOWN ({}). EXITING'
                .format(clean_pdb_file))
            continue

        chain_counts = {}
        for model in structure:
            for chain in model:
                new_number = 1
                for i, residue in enumerate(chain.get_residues()):
                    res_id = list(residue.id)
                    if res_id[1] != new_number:
                        res_id[1] = new_number
                        residue.id = tuple(res_id)
                    new_number += 1
                chain_counts[chain.id] = new_number

        chains = sorted(chain_counts.items(), key=lambda x: x[1])
        chain_to_keep = chains[-1][0]
        chains_to_delete = chains[:-1]
        chains_to_delete = [i for i, j in chains_to_delete]
        for i, j in enumerate(chains_to_delete):
            structure[0].detach_child(chains_to_delete[i])
        pdb_io.set_structure(structure)
        pdb_io.save(clean_pdb_file)

        for model in structure:
            for chain in model:
                print('kept ID {} and deleted {}'.format(
                    chain.id, chains_to_delete))
                seq_list = []
                chainID = chain.get_id()
                for residue in chain:
                    if is_aa(residue.get_resname(), standard=True):
                        seq_list.append(three_to_one(residue.get_resname()))
                    else:
                        seq_list.append('X')
                wt_seq = ''.join(seq_list)

        with open(fasta_outfile_loc, 'w') as outfile:
            outfile.write('>{}\n{}\n'.format('WT', wt_seq))
Ejemplo n.º 28
0
    def __init__(self, model, in_file, dssp="dssp", acc_array="Sander", file_type='PDB'):
        """Create a DSSP object.

        Parameters
        ----------
        model : Model
            The first model of the structure
        in_file : string
            Either a PDB file or a DSSP file.
        dssp : string
            The dssp executable (ie. the argument to os.system)
        acc_array : string
            Accessible surface area (ASA) from either Miller et al. (1987),
            Sander & Rost (1994), or Wilke: Tien et al. 2013, as string
            Sander/Wilke/Miller. Defaults to Sander.
        file_type: string
            File type switch, either PDB or DSSP with PDB as default.

        """
        self.residue_max_acc = residue_max_acc[acc_array]

        # create DSSP dictionary
        file_type = file_type.upper()
        assert(file_type in ['PDB', 'DSSP'])
        # If the input file is a PDB file run DSSP and parse output:
        if file_type == 'PDB':
            # Newer versions of DSSP program call the binary 'mkdssp', so
            # calling 'dssp' will not work in some operating systems
            # (Debian distribution of DSSP includes a symlink for 'dssp' argument)
            try:
                dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
            except OSError:  # TODO: Use FileNotFoundError once drop Python 2
                if dssp == 'dssp':
                    dssp = 'mkdssp'
                elif dssp == 'mkdssp':
                    dssp = 'dssp'
                else:
                    raise
            dssp_dict, dssp_keys = dssp_dict_from_pdb_file(in_file, dssp)
        # If the input file is a DSSP file just parse it directly:
        elif file_type == 'DSSP':
            dssp_dict, dssp_keys = make_dssp_dict(in_file)

        dssp_map = {}
        dssp_list = []

        def resid2code(res_id):
            """Serialize a residue's resseq and icode for easy comparison."""
            return '%s%s' % (res_id[1], res_id[2])

        # Now create a dictionary that maps Residue objects to
        # secondary structure and accessibility, and a list of
        # (residue, (secondary structure, accessibility)) tuples
        for key in dssp_keys:
            chain_id, res_id = key
            chain = model[chain_id]
            try:
                res = chain[res_id]
            except KeyError:
                # In DSSP, HET field is not considered in residue identifier.
                # Thus HETATM records may cause unnecessary exceptions.
                # (See 3jui chain A res 593.)
                # Try the lookup again with all HETATM other than water
                res_seq_icode = resid2code(res_id)
                for r in chain:
                    if r.id[0] not in (' ', 'W'):
                        # Compare resseq + icode
                        if resid2code(r.id) == res_seq_icode:
                            # Found a matching residue
                            res = r
                            break
                else:
                    raise KeyError(res_id)

            # For disordered residues of point mutations, Biopython uses the
            # last one as default, But DSSP takes the first one (alternative
            # location is blank, A or 1). See 1h9h chain E resi 22.
            # Here we select the res in which all atoms have altloc blank, A or
            # 1. If no such residues are found, simply use the first one appears
            # (as DSSP does).
            if res.is_disordered() == 2:
                for rk in res.disordered_get_id_list():
                    # All atoms in the disordered residue should have the same
                    # altloc, so it suffices to check the altloc of the first
                    # atom.
                    altloc = res.child_dict[rk].get_list()[0].get_altloc()
                    if altloc in tuple('A1 '):
                        res.disordered_select(rk)
                        break
                else:
                    # Simply select the first one
                    res.disordered_select(res.disordered_get_id_list()[0])

            # Sometimes point mutations are put into HETATM and ATOM with altloc
            # 'A' and 'B'.
            # See 3piu chain A residue 273:
            #   <Residue LLP het=H_LLP resseq=273 icode= >
            #   <Residue LYS het=  resseq=273 icode= >
            # DSSP uses the HETATM LLP as it has altloc 'A'
            # We check the altloc code here.
            elif res.is_disordered() == 1:
                # Check altloc of all atoms in the DisorderedResidue. If it
                # contains blank, A or 1, then use it.  Otherwise, look for HET
                # residues of the same seq+icode.  If not such HET residues are
                # found, just accept the current one.
                altlocs = set(a.get_altloc() for a in res.get_unpacked_list())
                if altlocs.isdisjoint('A1 '):
                    # Try again with all HETATM other than water
                    res_seq_icode = resid2code(res_id)
                    for r in chain:
                        if r.id[0] not in (' ', 'W'):
                            if resid2code(r.id) == res_seq_icode and \
                               r.get_list()[0].get_altloc() in tuple('A1 '):
                                res = r
                                break

            (aa, ss, acc, phi, psi, dssp_index,
                NH_O_1_relidx, NH_O_1_energy,
                O_NH_1_relidx, O_NH_1_energy,
                NH_O_2_relidx, NH_O_2_energy,
                O_NH_2_relidx, O_NH_2_energy) = dssp_dict[key]

            res.xtra["SS_DSSP"] = ss
            res.xtra["EXP_DSSP_ASA"] = acc
            res.xtra["PHI_DSSP"] = phi
            res.xtra["PSI_DSSP"] = psi
            res.xtra["DSSP_INDEX"] = dssp_index
            res.xtra["NH_O_1_RELIDX_DSSP"] = NH_O_1_relidx
            res.xtra["NH_O_1_ENERGY_DSSP"] = NH_O_1_energy
            res.xtra["O_NH_1_RELIDX_DSSP"] = O_NH_1_relidx
            res.xtra["O_NH_1_ENERGY_DSSP"] = O_NH_1_energy
            res.xtra["NH_O_2_RELIDX_DSSP"] = NH_O_2_relidx
            res.xtra["NH_O_2_ENERGY_DSSP"] = NH_O_2_energy
            res.xtra["O_NH_2_RELIDX_DSSP"] = O_NH_2_relidx
            res.xtra["O_NH_2_ENERGY_DSSP"] = O_NH_2_energy

            # Relative accessibility
            resname = res.get_resname()
            try:
                rel_acc = acc / self.residue_max_acc[resname]
            except KeyError:
                # Invalid value for resname
                rel_acc = 'NA'
            else:
                if rel_acc > 1.0:
                    rel_acc = 1.0
            res.xtra["EXP_DSSP_RASA"] = rel_acc
            # Verify if AA in DSSP == AA in Structure
            # Something went wrong if this is not true!
            # NB: DSSP uses X often
            try:
                resname = three_to_one(resname)
            except KeyError:
                resname = 'X'
            if resname == "C":
                # DSSP renames C in C-bridges to a,b,c,d,...
                # - we rename it back to 'C'
                if _dssp_cys.match(aa):
                    aa = 'C'
            # Take care of HETATM again
            if (resname != aa) and (res.id[0] == ' ' or aa != 'X'):
                raise PDBException("Structure/DSSP mismatch at %s" % res)

            dssp_vals = (dssp_index, aa, ss, rel_acc, phi, psi,
                         NH_O_1_relidx, NH_O_1_energy,
                         O_NH_1_relidx, O_NH_1_energy,
                         NH_O_2_relidx, NH_O_2_energy,
                         O_NH_2_relidx, O_NH_2_energy)

            dssp_map[key] = dssp_vals
            dssp_list.append(dssp_vals)

        AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys,
                dssp_list)
    for chain in model:
        for residue in chain:
            try:
                resName = residue.get_resname()
                """
				n = residue['N']
				o = residue['O']
				NOdist = n-o
				if not NOdists.has_key(resName):
					NOdists[resName] = [NOdist ]
				else:
					NOdists[resName].append(NOdist)
				"""
                ca = residue['CA']
                AA = three_to_one(resName)
                cg = residue[SelectCG(AA)]
                AGdist = ca - cg
                if not CaCgdists.has_key(resName):
                    CaCgdists[resName] = [AGdist]
                else:
                    CaCgdists[resName].append(AGdist)

            except:
                print 'WARNING: missing CA or CG atoms '
"""
finalNOdists = dict()
for res, dists in NOdists.iteritems():
	finalNOdists[res] = np.mean(dists)
	AA = three_to_one(res)
	finalNOdists[AA] = finalNOdists[res]