def load_chains(raw, pdb_id, pdb_type, known):
    parser = PDBParser()
    structure = parser.get_structure(pdb_id, raw)
    data = {'ordering': []}
    for model in structure:
        for chain in model:
            chain_id = chain.get_id()
            data[chain_id] = {'residues': [], 'sequence': []}
            for residue in chain:
                name = residue.resname.strip()
                if name in known:
                    res_id = residue.get_id()
                    id_data = [structure.get_id(), pdb_type, model.get_id(),
                               chain_id, res_id[1], residue.resname, res_id[2]]
                    id_data = [str(part).strip() for part in id_data]
                    unit_id = '_'.join(id_data)
                    data[chain_id]['residues'].append(unit_id)
                    data[chain_id]['sequence'].append(known[name])
                    data['ordering'].append(unit_id)

            if not data[chain_id]['residues']:
                del data[chain_id]
            else:
                data[chain_id]['sequence'] = ''.join(data[chain_id]['sequence'])
    return data
def get_aa_residues(pdb, chain):
    """
    pdb: Protein Data Bank file.
    chain: Chain of the PDB file.

    Get the amino acids from a protein.

    returns: List of Biopython PDB Residue objects representing the amino acids
    of the specified protein.
    """
    parser = PDBParser()
    structure = parser.get_structure("prot", pdb)
    model = structure[0]
    chain = model[chain]

    # Get a list of all residues in the specified protein model.
    residue_list = list(chain.get_residues())
    to_remove_list = []

    for res in residue_list:
        # Store non-amino acid residues in PDB in another list.
        if res.get_id()[0] != " ":
            to_remove_list.append(res)

    # Remove non-amino acid residues from original list.
    for res in to_remove_list:
        residue_list.remove(res)

    return residue_list
Exemple #3
0
def main(file, atom, CAd=15, CBd=12, mind=6):
    """Analyze the pdb using distance between atom and minimum distances."""

    logging.info("Analyzing %s using %s", file, atom)

    dist = {"CA": CAd, "CB": CBd, "min": mind}
    base = os.path.basename(args.file)
    name_f = os.path.splitext(base)[0]
    parser = PDBParser(PERMISSIVE=1)
    logging.captureWarnings(True)

    structure = parser.get_structure("test", file)

    residues = filter_residues(structure)
    dist_matrix = calc_dist_matrix(residues, atom)
    title_dist = 'Distances of the file {}'.format(name_f)
    name_heatmap = plots.plot_heatmap(dist_matrix, name_f, title_dist, atom)
    logging.info("Heatmap %s created", name_heatmap)
    cont_matrix = contact_map(dist_matrix, atom, dist)
    title_bin = 'Distance contacts of the file {}'.format(name_f)
    name_bin = plots.plot_matrix_binary(cont_matrix, name_f, title_bin, atom)
    logging.info("Contact map %s created", name_bin)
    logging.captureWarnings(False)

    return(dist_matrix, cont_matrix)
def get_normalized_pairs(n):
	'''Return a dictionary with keys corresponding to the pairs of residues found 
	within a radius n, and the values to the number of times found in a set of pdb files.\
	This dictionary sets the knowledge of pair-residues at a given frequency found naturally\
	in nature. It is based in 1.110 sequences with known structure with <40% of homology in\
	order to avoid family redundancy. Not necessary for the package.'''
	p = PDBParser(PERMISSIVE=1)
	pdb = glob.glob('./pdbfiles/*.ent')
	pairs = []
	file_list = []	
	
	###### Parsing through PDB files #######
	for filename in pdb:
		s = p.get_structure('X', filename)
		atom_list = np.array([atom for atom in s.get_atoms() if atom.name == 'CB'])
		
			
		if len(atom_list)>2:
			#creates a list containing all atom pairs within a n radius
			ns = Bio.PDB.NeighborSearch(atom_list)
			neighbors = ns.search_all(n)
			file_list.append(filename)
			sys.stderr.write(filename+' processed.\n') #check-point
		else:
			sys.stderr.write(filename+' could not be processed.\n') #check-point
			pass
		
	pairs = [(x.get_parent().get_resname(),y.get_parent().get_resname()) for x,y in neighbors]
	outfile = open( 'normalized_pairs8.py', 'w' )
	counter = dict(Counter(pairs))
	sys.stderr.write(str(len(file_list))+' files processed.\n')			#check-point
	sys.stderr.write('Dictionary length: '+str(len(counter))+'.\n') #check-point
	outfile.write('\nNormalized_pairs_'+str(n)+'='+str(counter))
	outfile.close()
Exemple #5
0
def Init():
    ptask = open("task.input","r")
    para = {}
    jobs = []
    for line in ptask.readlines():
        if(line[0]=='/' or line[0]=='\n'):
            continue
        [a,b] = line.split("=")
        if a=='angle':
            jobs.append([float(x) for x in b.strip().split(',')])
        else:
            para[a]=b.strip()
    ptask.close()
    filename = para['protein_file']
    protein_name = filename.strip().split('.')[0]
    file_type = filename.strip().split('.')[1]
    if file_type == 'cif':
        mt = MMCIF2Dict(filename)
        xlist = [float(x) for x in mt['_atom_site.Cartn_x']]
        ylist = [float(x) for x in mt['_atom_site.Cartn_y']]
        zlist = [float(x) for x in mt['_atom_site.Cartn_z']]
        allarr = numpy.vstack((xlist,ylist,zlist)).T
    elif file_type == 'pdb':
        parser = PDBParser()
        structure = parser.get_structure("test", filename)
        atoms = structure.get_atoms()
        alllist = []
        xlist = []
        ylist = []
        zlist = []
        for atom in atoms:
            xlist.append(atom.get_coord()[0])
            ylist.append(atom.get_coord()[1])
            zlist.append(atom.get_coord()[2])
            alllist.append(atom.get_coord())
        allarr = numpy.array(alllist)
    if para['CENTER'] == 'ON':
        x_ave = allarr.mean(axis=0)[0]
        y_ave = allarr.mean(axis=0)[1]
        z_ave = allarr.mean(axis=0)[2]
        allarr[:,0] = allarr[:,0]-x_ave;
        allarr[:,1] = allarr[:,1]-y_ave;
        allarr[:,2] = allarr[:,2]-z_ave

    scr_size = int(para['scr_size'])
    pix_size = float(para['pix_size'])
    distance = float(para['distance'])
    wavenum = 1.0/float(para['lambda'])
    ssc = scr_size/2.0-0.5

    s = numpy.zeros((scr_size,scr_size,3))
    for i in range(scr_size):
        for j in range(scr_size):
            x = (i-ssc)*pix_size
            y = (j-ssc)*pix_size
            z = distance
            sr = numpy.sqrt(x*x+y*y+z*z)
            s[i,j,:] = numpy.array([x*wavenum/sr,y*wavenum/sr,z*wavenum/sr-wavenum])

    return s,allarr
Exemple #6
0
def getPdbSequance(pdb_file, chain_id):
	pdb_indexes = []
	pdb_sequance = []

	p = PDBParser(PERMISSIVE=1)
	s = p.get_structure("",  pdb_file)
	pdb_id = pdb_file[0:-4]
	
	if not s[0].has_id(chain_id):
		print "PDB "+pdb_id+" doesn't have chain with id "+chain_id
		print
		exit()
	
	chain = s[0][chain_id]
	
	ires = 0
	for res in chain:
	        is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id('C') and (res.get_resname()=='GLY' or res.has_id('CB'))
       		res_id = res.get_id()[0]
	        if (res_id ==' ' or res_id =='H_MSE' or res_id =='H_M3L' or res_id =='H_CAS') and is_regular_res:
        	        ires = ires + 1
	                res_name = res.get_resname()
                	residue_no = res.get_id()[1]
        	        pdb_sequance.append(res_name)
	                pdb_indexes.append(residue_no)
	        elif res_id !='W':
        	        print "Unknown residue in "+pdb_id+" with res_id "+res_id

	pdb_seq = three2one(pdb_sequance)

	return pdb_seq, pdb_indexes
Exemple #7
0
def deleteChain():# Delete a complete chain from a pdb and save the new structure in pdbname_free.pdb
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	seq=''
	
	nb_chain=input('How many chain do you want to delete : ')
	for i in range(nb_chain):
		rm_chain=raw_input('What chain you want to delete : ')
		for model in structure:
			for chain in model:
				if(chain.id==rm_chain):
					model.detach_child(chain.id)
	pept = raw_input('Do you want to get a pdb with the sequence in its name : ')
	if(pept == 'y'):
		ppb=PPBuilder()
		for pp in ppb.build_peptides(structure):
			seq = seq + pp.get_sequence()
		seq=seq.lower()
		seq=str(seq)
		w = PDBIO()
		w.set_structure(structure)
		w.save(seq+'_bound.pdb')
	else:
		w = PDBIO()
		w.set_structure(structure)
		w.save(nameStruct+'_without'+rm_chain+'.pdb')
Exemple #8
0
    def __pdb_ordering__(self, raw, pdb_id, pdb_type):
        """Generate a dict of the form: { unit_id: {index: index, pdb: pdb }
        for all nucleotides in the given structure. Nucleotides are identified
        by being in the list of known units in self.known.
        """
        parser = PDBParser(QUIET=True)
        structure = parser.get_structure(pdb_id, raw)
        data = {}
        index = 0
        for model in structure:
            model_id = model.get_id() + 1
            for chain in model:
                chain_id = chain.get_id()
                for residue in chain:
                    name = residue.resname.strip()
                    if name in self.known:
                        res_id = residue.get_id()
                        id_data = [structure.get_id(), pdb_type, model_id,
                                   chain_id, res_id[1], name, res_id[2]]
                        id_data = [str(part).strip() for part in id_data]
                        unit_id = '_'.join(id_data)
                        data[unit_id] = {'index': index, 'pdb': pdb_id}
                        index += 1

        return data
def old_residue_ids(raw, filename):
    parser = PDBParser()
    path, ext = os.path.splitext(filename)
    pdb_id = os.path.basename(path)
    structure = parser.get_structure(pdb_id, raw)
    data = []

    pdb_type = 'AU'
    if ext != '.pdb':
        pdb_type = 'BA' + filename[-1]

    for model in structure:
        # BioPython seems to start number models at 0, but it should start
        # at 1.
        model_id = str(model.get_id() + 1)
        for chain in model:
            chain_id = chain.get_id()
            for residue in chain:
                res_id = residue.get_id()
                data.append({
                    'pdb': pdb_id,
                    'type': pdb_type,
                    'model': model_id,
                    'chain': chain_id,
                    'number': str(res_id[1]),
                    'unit': residue.resname.strip(),
                    'insertion': res_id[2].rstrip()
                })

    return data
def get_info(filename):
	'''
	Return header. Function adapted from Biopython Package.\n
	get_info(filename)\n
	Filename needs to be a PDB file format (*.ent or *.pdb)
	'''
	p = PDBParser(QUIET=True)
	s = p.get_header()
Exemple #11
0
def score(PDBfile):
    """
    Calculates the m-score for a given PDB file

    arguments:
    
    PDBfile - the PDB file to score

    hidden arguments:

    aas.scr, pro.scr, gly.scr - the scoring tables
    need to be present in working directory
    
    """
    from pro_angle import find_residue
    from Bio.PDB.PDBParser import PDBParser
    from pro_length import length
    
    (aas, gly, pro) = load_scores() ##define global tables
    score = 0 #initialize    
    pars = PDBParser(PERMISSIVE = 1)
    struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile)
    model = struct.child_list[0]
    chain = model.child_list[0]
    pro_list = find_residue(chain, 'PRO')
    gly_list = find_residue(chain, 'GLY')
    aas_list = range(chain.child_list[1].id[1],
                     chain.child_list[len(chain)-1].id[1])
    #need to remove pro/gly indices in first/last position
    if pro_list.count(1) > 0:        
        pro_list.remove(1)
    if pro_list.count(len(chain)-1) > 0:
        pro_list.remove(len(chain)-1)
    if gly_list.count(1) > 0:
        gly_list.remove(1)
    if gly_list.count(len(chain)-1) > 0:
        gly_list.remove(len(chain)-1)   
    try:
        for index in pro_list:       
            aas_list.remove(index) #remove pros from aas_list
        for index in gly_list:
            aas_list.remove(index) #remove glys from aas_list
    except ValueError:
        print 'incosistency in PDB file - will return score = 0' 
        return 0
    else:
        proscore = score_help(chain, pro_list, pro)
        glyscore = score_help(chain, gly_list, gly)
        aasscore = score_help(chain, aas_list, aas)
        score = proscore+glyscore+aasscore
        size=length(chain)
        try:
            score = (score/size)*1000 #normalize score
            return score
        except ZeroDivisionError:
            print "calculated protein length 0 -> returning score 0"
            score = 0
            return score
def get_structure(pdb_id):
    '''Returns a PDB structure.'''
    source_url = 'http://www.rcsb.org/pdb/files/' + pdb_id + '.pdb'
    target_filename = os.path.join(os.path.expanduser('~'), _DIR, _PDB_DIR,
                                   pdb_id + '.pdb')

    with open(io_utils.get_file(source_url, target_filename)) as pdb_file:
        parser = PDBParser(QUIET=True)
        return parser.get_structure(pdb_id, pdb_file.name)
Exemple #13
0
def main():
    if len(sys.argv) < 2:
        sys.exit("Usage: %s input_pdb_file" % sys.argv[0])
    pdb_name = sys.argv[1]

    parser = PDBParser(PERMISSIVE=1)
    structure_id = "temp"
    structure = parser.get_structure(structure_id, pdb_name)
    model = structure[0]

    calculate_ss(model)
 def parse(self, *pdb_filenames):
     """
     REQUIRED. Adds the protein PDB files. You can specify as many as you want, but only two will be used for the superimposition.
     """
     self.proteins = [] # reset proteins to an empty array
     parser = PDBParser(QUIET=True)
     for filename in pdb_filenames:
         # use file name as PDB id
         pdb_id = self.__get_pdb_id_from_filename(filename)
         # get PDB contents
         self.proteins.append(parser.get_structure(pdb_id, filename))
Exemple #15
0
def removeDoubleAtoms():# Remove all double atoms defined in a pdb and save the new structure in pdbname_noDouble.pdb
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	
	structure.remove_disordered_atoms()

	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_noDouble.pdb')
Exemple #16
0
 def Draw(self, parent, filename):
     p = PDBParser(PERMISSIVE=1)
     # structure_id = Rec[1]
     structure = p.get_structure("WHYY", filename)
     self.pdbMat = structure.get_list()
     rx = []
     ry = []
     rz = []
     bx = []
     by = []
     bz = []
     gx = []
     gy = []
     gz = []
     for chain in self.pdbMat[0].get_list():
         for resnum, residue in enumerate(chain.get_list()):
             atom = residue.get_list()
             if len(atom) > 3:
                 if resnum > 1:
                     bx[resnum - 2].append(npos[0])
                     by[resnum - 2].append(npos[1])
                     bz[resnum - 2].append(npos[2])
                 npos = atom[0].get_coord()
                 capos = atom[1].get_coord()
                 cpos = atom[2].get_coord()
                 opos = atom[3].get_coord()
                 rx.append([npos[0], capos[0]])
                 ry.append([npos[1], capos[1]])
                 rz.append([npos[2], capos[2]])
                 bx.append([capos[0], cpos[0]])
                 by.append([capos[1], cpos[1]])
                 bz.append([capos[2], cpos[2]])
                 gx.append([cpos[0], opos[0]])
                 gy.append([cpos[1], opos[1]])
                 gz.append([cpos[2], opos[2]])
     for n, line in enumerate(rx):
         x = np.array(line)
         y = np.array(ry[n])
         z = np.array(rz[n])
         parent.ax2.plot(x, y, z, "r-", linewidth=5)
     for n, line in enumerate(bx):
         x = np.array(line)
         y = np.array(by[n])
         z = np.array(bz[n])
         parent.ax2.plot(x, y, z, "b-", linewidth=5)
     for n, line in enumerate(gx):
         x = np.array(line)
         y = np.array(gy[n])
         z = np.array(gz[n])
         parent.ax2.plot(x, y, z, "g-", linewidth=5)
Exemple #17
0
def Pdb2Gro(pdb_file, gro_file, ch_name):
	from Bio.PDB.PDBParser import PDBParser

	p = PDBParser(PERMISSIVE=1)

	pdb_id = pdb_file 
	if pdb_file[-4:].lower()!=".pdb":
		pdb_file = pdb_file + ".pdb"
	if pdb_id[-4:].lower()==".pdb":
		pdb_id = pdb_id[:-4]
	
	output = gro_file
	
	s = p.get_structure(pdb_id, pdb_file)
	chains = s[0].get_list()
	
	if ch_name=='':
		ch_name = 'A'
	
	for chain in chains:
		if chain.get_id()==ch_name:
			ires = 0
			iatom = 0
			res_name = ""
			atoms = []
			for res in chain:
				is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id('C')
				res_id = res.get_id()[0]
		                if (res_id ==' ' or res_id =='H_MSE' or res_id =='H_M3L' or res_id=='H_CAS') and is_regular_res:
					ires = ires + 1
					res_name = res.get_resname()
					residue_no = res.get_id()[1]
					for atom in res:
						iatom = iatom + 1
						atom_name = atom.get_name()
						xyz = atom.get_coord()
						
#						residue_no = atom.get_full_id()[3][1]
			                        atoms.append( Atom(iatom, atom_name, residue_no, res_name, xyz) )
	
	out = open(output, 'w')
	out.write(" Structure-Based gro file\n")
	out.write( ("            "+str(len(atoms)))[-12:] )
	out.write("\n")
	for iatom in atoms:
		iatom.write_(out)
	out.close()
def get_ca(pdbfile):
	p=PDBParser(PERMISSIVE=1)
	ca_atoms = []
	s = p.get_structure(pdbfile,pdbfile)
	chains = s[0].get_list()
	for chain in chains:
        	for res in chain:
               		is_regular_res = res.has_id('CA') and res.has_id('O')
	                res_id = res.get_id()[0]
        	        if (res_id==' ' or res_id=='H_MSE' or res_id=='H_M3L' or res_id=='H_CAS' ) and is_regular_res:
                	        resname = res.get_resname(); 
                                ca_atoms.append(res['CA'].get_coord())
                	else :
                        	print "Pdb file contains irregular residue names or missing CA / O atoms! Fix it and run again! Exit with error."
				print "res_id :", res_id
				sys.exit()
	return ca_atoms
def score (query_pdb_path,
           against_pdb_path,
           query_fp_path = None,
           against_fp_path = None,
           query_epitope = [],
           against_epitope = [],
           spin_image_height_step = 5,
           spin_image_radius_step = 2,
           sphere_radius_step = 2,
           cutoff = 20.0,
           spin_image_radius_range = (0, 20),
           spin_image_height_range =  (-30, 10),
           sphere_radius_range = (0, 20),
           callback = write_score_to_file, cbargs=[]):

    p = PDBParser(PERMISSIVE=1)

    query_struct = p.get_structure(os.path.basename (query_pdb_path), query_pdb_path)
    against_struct = p.get_structure(os.path.basename (against_pdb_path), against_pdb_path)

    query_complex = Complex (query_struct, query_epitope)
    against_complex = Complex (against_struct, against_epitope)
    
    if query_fp_path is None or  against_fp_path is None:#if fp is not given
        query_complex.get_fp(spin_image_radius_step = spin_image_radius_step, spin_image_height_step = spin_image_height_step, sphere_radius_step = sphere_radius_step)
        against_complex.get_fp(spin_image_radius_step = spin_image_radius_step, spin_image_height_step = spin_image_height_step, sphere_radius_step = sphere_radius_step)
        
        query_fp_string = query_complex.fp2str ()
        against_fp_string = against_complex.fp2str ()
    else:
        #if fp is given, read them
        with open (query_fp_path, 'r') as f1, open(against_fp_path, 'r') as f2:
            query_fp_string = f1.read ()
            against_fp_string = f2.read ()
        
    query = FPWithComplex (query_complex, query_fp_string)
    against = FPWithComplex (against_complex, against_fp_string)
    
    score1, score2, score3 = similarity_between (query, against, cutoff = cutoff)
    #z1, z2, z3 = similarity_between (query, query, cutoff = cutoff) #the normalization constant
    #print score1, score2, score3

    if callback is not None:
        callback ((score1, score2, score3), *cbargs)
    return score1, score2, score3
Exemple #20
0
	def __init__(self, filename):
		
		self.spheredata = ''
		
		E2C = {}
		E2R = {}
		exec elements # Read the color mappings at the bottom of this file
		
		# Read the file
		atoms = []
		parser = PDBParser()
		structure = parser.get_structure('test',filename)
		for model in structure.get_list():
		  for chain in model.get_list():
		    for residue in chain.get_list():
		      for atom in residue.get_list():
						atoms += [atom]
		
		# Look up colors and radius
		spheres = []
		for atom in atoms:
			s = Sphere()
			s.x, s.y, s.z = atom.get_coord()
			element = atom.get_name().strip(string.digits)
			s.radius = E2R[element] if E2R.has_key(element) else 1.5
			color = E2C[element] if E2C.has_key(element) else 0xFF1493
			s.r = (color & 0xff) / 255.0
			s.g = ((color & 0xff00) >> 8) / 255.0
			s.b = ((color & 0xff0000) >> 16) / 255.0
			spheres += [s]
			
			self.spheredata += struct.pack('fff f ffff', s.x,s.y,s.z, s.radius, s.r,s.g,s.b,1.0)
			
			
		self.spheres = spheres

		# Figure out the total radius
		xs, ys, zs = [s.x for s in spheres], [s.y for s in spheres], [s.z for s in spheres]
		dx = max(xs) - min(xs)
		dy = max(ys) - min(ys)
		dz = max(zs) - min(zs)
		self.radius = np.sqrt(dx*dx + dy*dy + dz*dz) / 2 + 1.5
		self.x = (max(xs) + min(xs)) / 2
		self.y = (max(ys) + min(ys)) / 2
		self.z = (max(zs) + min(zs)) / 2
Exemple #21
0
def removeHetero():# Remove all heteroatoms from a pdb and save the new structure in pdbname_noHetero.pdb
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	for model in structure:
		for chain in model:
			for residue in chain:
				id = residue.id				
				if id[0] != ' ':
					chain.detach_child(residue.id)
			if len(chain) == 0:
				model.detach_child(chain.id)

	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_noHetero.pdb')
Exemple #22
0
def renameChain():
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	
	what_chain=raw_input('What is the chain you want to rename : ')
	what_chain2=raw_input('What is the new name of this chain : ')
	
	for model in structure:
		for chain in model:
			if chain.id == what_chain:
				chain.id = what_chain2
				
	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_rename.pdb')
Exemple #23
0
def deleteResidue():# Delete a residue from a pdb and save the new structure in pdbname_noResidue.pdb
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()

	rm_residue=raw_input('What residue you want to delete : ')
	for model in structure:
		for chain in model:
			for residue in chain:
				print residue.id
				if(residue.id[1]==rm_residue):
					print 'HELLO'
					chain.detach_child(residue.id)

	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_noResidue.pdb')
Exemple #24
0
 def Draw(self, parent, filename):
     p = PDBParser(PERMISSIVE=1)
     structure = p.get_structure('WHYY', filename)
     self.pdbMat = structure.get_list()
     rx = []
     ry = []
     rz = []
     bx = []
     by = []
     bz = []
     gx = []
     gy = []
     gz = []
     for chain in self.pdbMat[0].get_list():
         for residue in chain.get_list():
             for atom in residue.get_list():
                 if atom.get_id()[0][0] not in ["H","W"]:
                     pos = atom.get_coord()
                     if atom.get_name() == 'CA':
                         bx.append(pos[0])
                         by.append(pos[1])
                         bz.append(pos[2])
                     elif atom.get_name() == 'N':
                         rx.append(pos[0])
                         ry.append(pos[1])
                         rz.append(pos[2])
                     elif atom.get_name() == 'O':
                         gx.append(pos[0])
                         gy.append(pos[1])
                         gz.append(pos[2])
     x = np.array(bx)
     y = np.array(by)
     z = np.array(bz)
     parent.ax2.scatter(x, y, z,  zdir='z', marker='o', s=385, c='b')            #385 is the radius of carbon times 5
     x = np.array(rx)
     y = np.array(ry)
     z = np.array(rz)
     parent.ax2.scatter(x, y, z,  zdir='z', marker='o', s=350, c='r')            #350 is the radius of Nitrogen times 5
     x = np.array(gx)
     y = np.array(gy)
     z = np.array(gz)
     parent.ax2.scatter(x, y, z,  zdir='z', marker='o', s=330, c='g')            #330 is the radius of oxygen times 5
Exemple #25
0
 def Draw(self, parent, filename):
     p = PDBParser(PERMISSIVE=1)
     #structure_id = Rec[1]
     structure = p.get_structure('WHYY', filename)
     self.pdbMat = structure.get_list()
     x = []
     y = []
     z = []
     for chain in self.pdbMat[0].get_list():
         for residue in chain.get_list():
             for atom in residue.get_list():
                 if atom.get_name() == 'CA':
                     pos = atom.get_coord()
                     x.append(pos[0])
                     y.append(pos[1])
                     z.append(pos[2])
     x = np.array(x)
     y = np.array(y)
     z = np.array(z)
     parent.ax2.plot(x,y,z)
Exemple #26
0
def getSequence(): # Get the sequence of a specific chain
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	seq=''
	
	what_chain=raw_input('For what chain do you want the sequence : ')

	for model in structure:
		for chain in model:
			if chain.id != what_chain:
				model.detach_child(chain.id)

	ppb=PPBuilder()
	for pp in ppb.build_peptides(structure):
		seq = seq + pp.get_sequence()
	seq=seq.upper()
	print seq
Exemple #27
0
 def Init(self, parent, bigPanel, colorList):
     self.bigPanel = bigPanel
     self.bPSize = self.bigPanel.GetSize()
     self.cL = colorList
     self.tabButtons = []
     self.FrameInit()
     self.CoverInit()
     self.TabButtonInit()
     self.timesCalled = 0
     self.ssMeth = 0
     self.p = PDBParser(PERMISSIVE=1)
Exemple #28
0
def GetExec():
    Recs = os.listdir(os.getcwd())
    newList = []
    j = 0
    listdata=dict()
    k = 0
    p = PDBParser(PERMISSIVE=1)
    ftime = open('lastChecked.txt','r')
    pT = float(ftime.readline())
    ftime.close()
    f = open('lastChecked.txt','w')
    f.write(str(time.time()))
    f.close()
    while k < len(Recs):
        try:
            (name, ext) = os.path.splitext(Recs[k])
            if ext=='':
                2+2
            elif ext==".pdb":
                f = name + ".pickle"
                newList.append([Recs[k],os.getcwd()])
                if not os.path.isfile(f) or float(fmt.filemtime(Recs[k])) > pT:
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore") 
                        pdbRec = p.get_structure(name, Recs[k])
                    models = pdbRec.get_list()
                    listdata[j] = str(name), len(models), os.getcwd()+'/'+str(name) + str(ext)
                    rHoward = [str(name), len(models), str(name) + str(ext)]
                    mP.spickle(f, rHoward)
                else:
                    rHoward = mP.opickle(f)
                    listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2]
                
                j += 1
                
                    
        except IOError, e:
            print e

    
        k += 1
Exemple #29
0
def assembleChain(): # Allow to assemble 2 chains together
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()

	what_chain=raw_input('What is the 1st chain you want to assemble : ')
	what_chain2=raw_input('What is the 2nd chain you want to assemble : ')

	for model in structure:
		for chain in model:
			if chain.id == what_chain:
				parent=chain;
			elif chain.id == what_chain2:
				for residue in chain:
					residue.get_parent().id=what_chain

	w = PDBIO()
	w.set_structure(structure)
	w.save(nameStruct+'_assemble.pdb')
Exemple #30
0
def get_interaction_pairs_from_input(options):
    """
    Takes the path of a directory and returns a list holding the interaction dictionary
    of the pdbs in this directory, a similar chains dictionary and a dictionary that
    relates every chain with its id.
    :param directory: directory from where the pdb files we want to process are.
    :return: list holding the interaction dictionary
    of the pdbs in this directory, a similar chains dictionary and a dictionary that
    relates every chain with its id
    """
    directory = options.infile
    files_list = get_pdb_from_directory(directory)
    structure_list = []

    parser = PDBParser(PERMISSIVE=1)
    # Save the pdb files in separate structures
    for file in files_list:
        structure_id = get_structure_name(file)
        structure = parser.get_structure(structure_id, file)
        if len(list(structure.get_chains())) == 2:
            structure_list.append(structure)
        else:
            structure_list += get_all_interaction_pairs(options, file, False)[0]

    id_dict = get_id_dict(structure_list)

    chain_list = []
    # Add all the chains to the seq_dict
    for structure in structure_list:
        chain_list += list(structure.get_chains())

    seq_dict = get_seq_dict(chain_list)

    similar_sequences = get_similar_sequences(chain_list, seq_dict)

    interaction_dict = {}

    # Add all the interactions to a dictionary
    for structure in structure_list:
        chains = list(structure.get_chains())
        nr_interaction = tuple(sorted([id_dict[similar_sequences[chains[0]]], id_dict[similar_sequences[chains[1]]]]))
        if nr_interaction not in interaction_dict.keys():
            interaction_dict[nr_interaction] = []

        interaction_dict[nr_interaction].append(chains)

    clean_interaction_dict(interaction_dict, similar_sequences, options)

    if options.verbose:
        print('\n')
        counter = 0
        for pair in interaction_dict:
            print(pair)
            for int in interaction_dict[pair]:
                print("\t%s" % int)
                counter += 1
        print(counter)

    # TODO: Eliminar cadenas no utilizadas de similar sequences

    return [interaction_dict, id_dict, similar_sequences, seq_dict]
Exemple #31
0
def main():
    parser = argparse.ArgumentParser(prog='polarContacts',
                                     description='Polar contacts detector')

    parser.add_argument('--backonly',
                        action='store_true',
                        dest='backonly',
                        help='Restrict to backbone')

    parser.add_argument('--nowats',
                        action='store_true',
                        dest='nowats',
                        help='Exclude water molecules')

    parser.add_argument('--diel',
                        type=float,
                        action='store',
                        dest='diel',
                        default=1.0,
                        help='Relative dielectric constant')

    parser.add_argument('--vdw',
                        action='store',
                        dest='vdwprm',
                        help='VDW Paramters file')

    parser.add_argument('--rlib',
                        action='store',
                        dest='reslib',
                        help='AminoAcid library')

    parser.add_argument('pdb_path')

    args = parser.parse_args()

    print("Settings")
    print("--------")
    for k, v in vars(args).items():
        print('{:10}:'.format(k), v)

    backonly = args.backonly
    nowats = args.nowats
    pdb_path = args.pdb_path
    vdwprm = args.vdwprm
    reslib = args.reslib
    diel = args.diel

    # Load VDW parameters
    vdwParams = VdwParamset(vdwprm)
    print("{} atom types loaded".format(vdwParams.ntypes))

    # Load AA Library
    aaLib = ResiduesDataLib(reslib)
    print("{} amino acid atoms loaded".format(aaLib.nres))

    if not pdb_path:
        parser.print_help()
        sys.exit(2)

    parser = PDBParser(PERMISSIVE=1)

    try:
        st = parser.get_structure('st', pdb_path)
    except OSError:
        print("#ERROR: loading PDB")
        sys.exit(2)

# Checking for models
    if len(st) > 1:
        print("#WARNING: Several Models found, using only first")

# Using Model 0 any way
    st = st[0]

    # Making a list of polar atoms
    polats = []
    if backonly:
        selected_atoms = backbone_polars
    else:
        selected_atoms = all_polars

    for at in st.get_atoms():
        if at.id in selected_atoms:
            polats.append(at)
#Searching for contacts under HNLNK on diferent residues
    nbsearch = NeighborSearch(polats)
    hblist = []
    for at1, at2 in nbsearch.search_all(HBLNK):
        if at1.get_parent() == at2.get_parent():
            continue
#Discard covalents and neighbours
        if (at1 - at2) < COVLNK:
            continue
        if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1:
            continue
# remove waters
        if nowats:
            if at1.get_parent().get_resname() in waternames \
                          or at2.get_parent().get_resname() in waternames:
                continue

#     atom1 = Atom(at1,1,aaLib,vdwParams)
#     atom2 = Atom(at2,1,aaLib,vdwParams)
        if at1.get_serial_number() < at2.get_serial_number():
            hblist.append([at1, at2])
        else:
            hblist.append([at2, at1])

    print()
    print("Polar contacts")
    print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)'))

    for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()):
        r1 = hb[0].get_parent()
        r2 = hb[1].get_parent()
        print('{:14} {:14} {:6.3f} '.format(
            r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id,
            r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1]))
        print()
        print("Residue interactions")


# Making list or residue pairs to avoid repeated pairs
    respairs = []
    for hb in hblist:
        r1 = hb[0].get_parent()
        r2 = hb[1].get_parent()
        if [r1, r2] not in respairs:
            respairs.append([r1, r2])

    l = []

    for rpair in sorted(respairs, key=lambda i: i[0].id[1]):
        eint = 0.
        evdw = 0.

        for at1 in rpair[0].get_atoms():
            resid1 = rpair[0].get_resname()
            atid1 = at1.id
            atparam1 = aaLib.getParams(resid1, atid1)
            vdwprm1 = vdwParams.atTypes[atparam1.atType]

            for at2 in rpair[1].get_atoms():
                resid2 = rpair[1].get_resname()
                atid2 = at2.id
                atparam2 = aaLib.getParams(resid2, atid2)
                vdwprm2 = vdwParams.atTypes[atparam2.atType]
                eint = eint + 332.16 * atparam1.charg * atparam2.charg / diel / (
                    at1 - at2)
                eps = math.sqrt(vdwprm1.eps * vdwprm2.eps)
                sig = math.sqrt(vdwprm1.sig * vdwprm2.sig)
                evdw = evdw + 4 * eps * ((sig / (at1 - at2))**12 -
                                         (sig / (at1 - at2))**6)

            print(resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw,
                  eint + evdw)
            l.append([
                resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw,
                eint + evdw
            ])

    #here we have the code for finding the most stable contacts and plotting each energy component
    #(global, electrostatic and vdw) with respect to the residue number involved in these contacts
    print("Five most stable contacts")
    stable = []
    for index, element in enumerate(sorted(l, key=lambda i: i[6])):
        if index < 5:
            stable.append(element)
            print(element)

    n_groups = 5
    eint = (-96.879048334060371, -89.262401988309293, -63.650369322307412,
            -51.488465980661772, -50.345308360049728)
    vdw = (-1.0577685827505385, -1.5931226867662258, 1.5038605656892994,
           -2.9601475910966375, -2.0108017155800604)
    etot = (-97.936816916810912, -90.855524675075515, -62.146508756618111,
            -54.448613571758408, -52.356110075629786)
    fig, ax = plt.subplots()
    index = np.arange(n_groups)
    bar_width = 0.15
    opacity = 0.5
    inf1 = plt.bar(index,
                   eint,
                   bar_width,
                   alpha=opacity,
                   color='b',
                   label='electrostatic energies')
    inf2 = plt.bar(index + bar_width,
                   vdw,
                   bar_width,
                   alpha=opacity,
                   color='g',
                   label='van der waals energies')
    inf3 = plt.bar(index + bar_width,
                   etot,
                   bar_width,
                   alpha=opacity,
                   color='r',
                   label='total energies')
    plt.title('Energies for pair of residues')
    plt.xlabel('Contacts')
    plt.ylabel('Energy')
    plt.xticks(index + bar_width,
               ('LYS-ASP', 'LYS-GLU', 'GLU-LYS', 'GLU-ARG', 'ASP-ARG'))
    plt.legend()
    plt.tight_layout()
    plt.show()

    both_main_x = []
    both_main_y = []
    both_side_x = []
    both_side_y = []
    main_side_x = []
    main_side_y = []
    side_main_x = []
    side_main_y = []
    for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()):
        if hb[0].id in backbone_polars:
            where0 = 'main'
        else:
            where0 = 'side'
        if hb[1].id in backbone_polars:
            where1 = 'main'
        else:
            where1 = 'side'
        label = where0 + ':' + where1
        if label[0] == label[5] and label[0] == 'm':
            value = 1
            both_main_x.append(hb[0].get_parent().id[1])
            both_main_y.append(hb[1].get_parent().id[1])
        elif label[0] == label[5] and label[0] == 's':
            value = 2
            both_side_x.append(hb[0].get_parent().id[1])
            both_side_y.append(hb[1].get_parent().id[1])
        elif label[0] != label[5] and label[0] == 'm':
            value = 3
            main_side_x.append(hb[0].get_parent().id[1])
            main_side_y.append(hb[1].get_parent().id[1])
        elif label[0] != label[5] and label[0] == 's':
            value = 4
            side_main_x.append(hb[0].get_parent().id[1])
            side_main_y.append(hb[1].get_parent().id[1])
        linking = [label, value, hb[0].id, hb[1].id, hb[0] - hb[1]]
        print('{:14}{:14}{:14}{:14}{:6.3f}'.format(label, value, hb[0].id,
                                                   hb[1].id, hb[0] - hb[1]))

    plt.figure(figsize=(10, 8))
    plt.scatter(both_main_x, both_main_y, c='red', label='both_main')
    plt.scatter(both_side_x, both_side_y, c='green', label='both_side')
    plt.scatter(main_side_x, main_side_y, c='blue', label='main_side')
    plt.scatter(side_main_x, side_main_y, c='yellow', label='side_main')
    plt.title('Interaction')
    plt.xlabel('Residue1 number')
    plt.ylabel('Residue2 number')
    plt.legend(loc='upper right')
    plt.show()

    #The surface residues are the ones with an Area<5 (http://cib.cf.ocha.ac.jp/bitool/ASA/display.php?id=1513152459.2996)
    surface_res = [['ILE', 3], ['VAL', 5], ['ILE', 23], ['VAL', 26],
                   ['ILE', 30], ['GLN', 41], ['LEU', 43], ['LEU', 56],
                   ['ILE', 61], ['LEU', 67], ['LEU', 69]]

    for rpair in sorted(respairs, key=lambda i: i[0].id[1]):
        eint = 0.

        for atom1 in rpair[0].get_atoms():
            resname1 = rpair[0].get_resname()
            atid1 = at1.id
            atparam1 = aaLib.getParams(resid1, atid1)

            for atom2 in rpair[1].get_atoms():
                resname2 = rpair[1].get_resname()
                atid2 = at2.id
                atparam2 = aaLib.getParams(resid2, atid2)

                for values in surface_res:
                    for values2 in surface_res:
                        if resname1 == values[0] and rpair[0].id[1] == values[
                                1]:
                            if resname2 == values2[0] and rpair[1].id[
                                    1] == values2[1]:
                                eint = eint + 80 * atparam1.charg * atparam2.charg / diel / (
                                    atom1 - atom2)
        if eint != 0:
            print(resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw,
                  eint + evdw)
Exemple #32
0
    def __contains__(self, res):
        """True if the given residue is in any of the mapped fragments.

        @type res: L{Residue}
        """
        return (res in self.fd)

    def __getitem__(self, res):
        """
        @type res: L{Residue}

        @return: fragment classification
        @rtype: L{Fragment}
        """
        return self.fd[res]


if __name__=="__main__":

    import sys

    p = PDBParser()
    s = p.get_structure("X", sys.argv[1])
    m = s[0]
    fm = FragmentMapper(m, 10, 5, "levitt_data")

    for r in Selection.unfold_entities(m, "R"):
        print("%s:" % r)
        if r in fm:
            print(fm[r])
Exemple #33
0
                logger.error(str(err))
                raise SystemExit
        else:
            logger.warning(
                "PDB structure already exists ({0}), no need to download it again"
                .format(input_pdb_file))
    else:
        pdb_code = filename
        input_pdb_file = args.pdb_file_name

    if not os.path.exists(input_pdb_file):
        logger.error("PDB structure file {0} not found".format(input_pdb_file))
        raise SystemExit

    # Check if chain belongs to this PDB
    pdb_parser = PDBParser(PERMISSIVE=True, QUIET=True)
    structure = pdb_parser.get_structure(filename, input_pdb_file)
    chain_ids = [chain.id for chain in structure.get_chains()]
    chain_id = args.chain_id.upper()
    if len(chain_id) > 1:
        logger.error("Wrong chain id {0}".format(chain_id))
        raise SystemExit
    if chain_id not in chain_ids:
        logger.error("Chain {0} provided not in available chains: {1}".format(
            chain_id, str(chain_ids)))
        raise SystemExit

    # Save only the given chain and discard residues with alternative positions
    io = PDBIO()
    current_pdb_file = "{0}{1}_{2}.pdb".format(output_dir, pdb_code, chain_id)
    for chain in structure.get_chains():
Exemple #34
0
                        return True
        return False

    def _test_dist(self, c, n):
        """Return 1 if distance between atoms<radius (PRIVATE)."""
        if (c - n) < self.radius:
            return 1
        else:
            return 0


if __name__ == "__main__":
    import sys
    from Bio.PDB.PDBParser import PDBParser

    p = PDBParser(PERMISSIVE=True)

    s = p.get_structure("scr", sys.argv[1])

    ppb = PPBuilder()

    print("C-N")
    for pp in ppb.build_peptides(s):
        print(pp.get_sequence())
    for pp in ppb.build_peptides(s[0]):
        print(pp.get_sequence())
    for pp in ppb.build_peptides(s[0]["A"]):
        print(pp.get_sequence())

    for pp in ppb.build_peptides(s):
        for phi, psi in pp.get_phi_psi_list():
# [email protected]

from Bio.PDB import PDBIO
from Bio.PDB.PDBParser import PDBParser
from optparse import OptionParser

parser = OptionParser()
parser.add_option(
    "-f",
    "--pdbfile",
    default=None,
    type="string",
    help="pdb structure file for additional 3-coord cartesian per residue")
(options, args) = parser.parse_args()

parser = PDBParser()

structure = parser.get_structure("mystruct", options.pdbfile)
model = structure[0]

average_bfactors = {}

for residue in model["A"]:
    average_bfactors[residue.get_id()[1]] = 0.0

for chain in model.get_list():
    for residue in chain.get_list():
        if residue.has_id("CA"):
            ca = residue["CA"]
            average_bfactors[residue.get_id()[1]] += float(
                ca.get_bfactor()) / float(len(model.get_list()))
Exemple #36
0
from Bio.PDB.PDBIO import PDBIO
from Bio.PDB.PDBIO import Select
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.mmtf import MMTFParser
from Bio.PDB.PDBExceptions import PDBConstructionWarning
import os.path as op
import logging
import warnings
import ssbio.utils
from ssbio.biopython.bp_mmcifparser import MMCIFParserFix

log = logging.getLogger(__name__)

cifp = MMCIFParserFix(QUIET=True)
pdbp = PDBParser(PERMISSIVE=True, QUIET=True)
mmtfp = MMTFParser()


def as_protein(structure, filter_residues=True):
    """ Exposes methods in the Bio.Struct.Protein module.
        Parameters:
            - filter_residues boolean; removes non-aa residues through Bio.PDB.Polypeptide is_aa function
              [Default: True]
        Returns a new structure object.
    """

    from ssbio.biopython.Bio.Struct.Protein import Protein
    return Protein.from_structure(structure, filter_residues)


class StructureIO(PDBIO):
Exemple #37
0
# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'

# %%
from Bio.PDB.PDBParser import PDBParser
parser = PDBParser()
structure = parser.get_structure("test", "1osv_ligand_backup.pdb")
model = structure[0]
chain = model["A"]

# %%
for chain in model:
    print(chain)

# %%
from Bio.PDB import PDBParser, PDBIO

io = PDBIO()
pdb = PDBParser().get_structure("test", "1osv_ligand_backup.pdb")

# %%
import os

io = PDBIO()
end = "_ligand.pdb"
directory = "./ligands/"

for filename in os.listdir(directory):
    if filename.endswith(end):  # IF the file ends with the generic ending,
        fileid = filename  # duplicate the fileid for a shortened title
        fileid = fileid.replace(end, '')  # remove the end
    b2 = len(sequence_b_list2)
    b3 = len(sequence_b_list3)
    b4 = len(sequence_b_list4)
    num1 = b1 + b2 + b3 + b4
    ratio1b = float('%.6f' % (b1 / num1))
    ratio2b = float('%.6f' % (b2 / num1))
    ratio3b = float('%.6f' % (b3 / num1))
    ratio4b = float('%.6f' % (b4 / num1))
    string_b = str(ratio1b) + ',' + str(ratio2b) + ',' + str(
        ratio3b) + ',' + str(ratio4b) + ',' + '0'
    return string_b


for file in PDBList:
    try:
        parser = PDBParser(PERMISSIVE=1)
        structure_id = os.path.splitext(file)[0]
        filename = file
        structure1 = parser.get_structure(structure_id, filename)

        model = structure1[0]
        chain_A = model["H"]
        chain_B = model["L"]
        surface_list_a, surface_list_b = surface_list(file)

        string_aa = feature_extraction_sequence2(surface_list_a)
        string_bb = feature_extraction_sequence_b2(surface_list_b)

        with open("result_surface_sequence2.txt", "a") as f:
            f.write(string_aa + "\n")
            f.write(string_bb + "\n")
Exemple #39
0
def read_pdb(parametersobject):
    actualstdout = sys.stdout
    sys.stdout = open(os.devnull, 'w')
    pdbname = parametersobject.parameterdic['Initial_dimer_pdb']
    Path_to_awsem = parametersobject.parameterdic['Path_to_awsem']
    Python2_command = parametersobject.parameterdic['Python2_command']
    name = pdbname[:-4]
    structure = PDBParser(PERMISSIVE=1).get_structure('init', pdbname)
    if len(structure) > 1:
        print("More than one model found in PDB. Using model 0 only.")
    if len(structure[0]) > 2:
        print("More than two chains found in PDB. Exiting.")
        sys.exit(1)
    elif len(structure[0]) < 2:
        print("Less than two chains found in PDB. Exiting.")
        sys.exit(1)

    chainnames = []
    chain = structure[0].get_list()
    for c in chain:
        chainnames.append(c.id)
    if len(chain[0]) < len(chain[1]):
        bigger = 1
        bigid = chain[1].id
        smaller = 0
        smallid = chain[0].id
        first_chain_is_bigger = False
    else:
        bigger = 0
        bigid = chain[0].id
        smaller = 1
        smallid = chain[1].id
        first_chain_is_bigger = True
    #now chain[0] is the bigger chain
    #next steps are
    #recentre
    #get .data file.
    #convert to lammpstrj
    #convert back to pdb
    #yeah, it's a bit ridiculous but I don't know how the weirdly mangled pdb is created by awsem and I need it to be exactly the same as the simulation.
    #remove useless files
    #write information e
    centre = COM(chain[bigger])
    for atom in structure[0].get_atoms():
        atom.set_coord(atom.coord - centre._ar)
    w = PDBIO()
    w.set_structure(structure)
    w.save(name + '_recentred.pdb')

    cwd = os.getcwd()

    directorynames = [
        'md_input', 'md_output', 'analysis', 'results_main',
        'results_individual', 'pdb_trajectories'
    ]

    for d in directorynames:
        directory = os.path.normpath(cwd + '/' + d)
        try:
            os.makedirs(directory)
        except OSError as e:
            pass

    os.system(Python2_command + " " + Path_to_awsem +
              "/create_project_tools/PDBToCoordinates.py " + name +
              "_recentred " + name + "_recentred" + ".coord")
    os.system(Python2_command + " " + Path_to_awsem +
              "/create_project_tools/CoordinatesToWorkLammpsDataFile.py " +
              name + "_recentred" + ".coord " + name + "_recentred" +
              ".data -b")
    os.system(Python2_command + " " + Path_to_awsem +
              "/frag_mem_tools/Pdb2Gro.py " + name + "_recentred " +
              " md_input/chain1.gro " + chain[0].id)
    os.system(Python2_command + " " + Path_to_awsem +
              "/frag_mem_tools/Pdb2Gro.py " + name + "_recentred " +
              " md_input/chain2.gro " + chain[1].id)
    f_data = open(name + "_recentred" + ".data", "r")
    f_lammps = open(name + "_recentred" + ".lammpstrj", "w+")
    f_lammps.write("ITEM: TIMESTEP\n0\nITEM: BOX BOUNDS ff ff ff\n")
    for _ in range(3):
        f_lammps.write("-2.0000000000000000e+02 2.0000000000000000e+02\n")
    f_lammps.write("ITEM: ATOMS id type xs ys zs\n")
    firstchain = True
    for linecount, line in enumerate(f_data):
        if linecount < 28:
            continue
        linesplit = line.strip().split()
        if len(linesplit) < 1:
            break
        x = (float(linesplit[5]) + 200) / 400
        y = (float(linesplit[6]) + 200) / 400
        z = (float(linesplit[7]) + 200) / 400
        f_lammps.write(linesplit[0] + ' ' + linesplit[3])
        f_lammps.write(' %.9f %.9f %.9f\n' % (x, y, z))
        if firstchain:
            if linesplit[1] == '2':
                firstchain = False
                first_chain_max_id = int(linesplit[0]) - 1

    f_lammps.close()
    f_data.close()

    location = os.path.normpath(
        Path_to_awsem +
        "results_analysis_tools/BuildAllAtomsFromLammps_seq_multichain.py " +
        name + "_recentred" + ".lammpstrj")
    os.system(Python2_command + " " + location + " refpdb " + name +
              "_recentred" + ".seq")
    sys.stdout = actualstdout
    os.remove("refpdb.psf")
    os.remove(name + "_recentred" + ".lammpstrj")
    os.remove(name + "_recentred" + ".data")
    os.remove(name + "_recentred" + ".coord")
    d = parametersobject.deriveddic
    d['first_chain'] = chainnames[0]
    d['second_chain'] = chainnames[1]
    d['first_chain_length'] = len(chain[0])
    d['second_chain_length'] = len(chain[1])
    d['bigger_chain'] = bigid
    d['smaller_chain'] = smallid
    d['first_chain_max_id'] = first_chain_max_id
    d['first_chain_is_bigger'] = first_chain_is_bigger
    parametersobject.save_derived()
    pylab.xlim(0, c_info_base[-1])
    pylab.ylim(0, c_info_uniq[-1])
    rcParams['figure.figsize'] = 5, 10
    pylab.savefig(out_id + '.png', dpi=300)
    pylab.close()


def get_file(filein):
    string = ''
    f_pairs = open(filein, "r")
    for line in f_pairs:
        string = string + line
    return string


parser = PDBParser(PERMISSIVE=1, QUIET=1)

if __name__ == "__main__":
    parser2 = argparse.ArgumentParser()
    parser2.add_argument('-i',
                         '--input',
                         dest="infile",
                         action="store",
                         nargs="?",
                         default='.',
                         help="Input FASTA file",
                         required=True)
    parser2.add_argument(
        '-o',
        '--output',
        dest="outfile",
Exemple #41
0
if len(sys.argv)<=3:
    print "\nExtractGoModelCGCoeffs.py Input_file PDB_id snapshot\n"
    print "-s\tSplit into files for each chain"
    exit()

filename = sys.argv[1]
pdb_id = sys.argv[2]

if pdb_id[-4:].lower()==".pdb":
        pdb_file = pdb_id
else:
        pdb_file = pdb_id + ".pdb"

frame = int(sys.argv[3])

p = PDBParser(PERMISSIVE=1)

s = p.get_structure(pdb_id, pdb_file)

chains = s[0].get_list()
chain = chains[0]
for res in chain:
	is_regular_res = res.has_id('CA') and res.has_id('O')
	res_id = res.get_id()[0]
        if (res_id==' ' or res_id=='H_MSE' or res_id=='H_M3L') and is_regular_res:
		ca_atoms_pdb.append(res['CA'].get_coord())

for i in range( 0, len(ca_atoms_pdb) ):
	sigmaN.append([])
	for j in range( i+4, len(ca_atoms_pdb) ):
		if abs(j-i)<3: 
Exemple #42
0
                    fp.write("TER\n")
            if model_flag and model_residues_written:
                fp.write("ENDMDL\n")
            if write_end:
                fp.write('END\n')
        if close_file:
            fp.close()


if __name__ == "__main__":

    from Bio.PDB.PDBParser import PDBParser

    import sys

    p = PDBParser(PERMISSIVE=1)

    s = p.get_structure("test", sys.argv[1])

    io = PDBIO()
    io.set_structure(s)
    io.save("out1.pdb")

    fp = open("out2.pdb", "w")
    s1 = p.get_structure("test1", sys.argv[1])
    s2 = p.get_structure("test2", sys.argv[2])
    io = PDBIO(1)
    io.set_structure(s1)
    io.save(fp)
    io.set_structure(s2)
    io.save(fp, write_end=1)
class Rebuild(unittest.TestCase):
    """Read PDB and mmCIF structures, convert to/from internal coordinates."""

    PDB_parser = PDBParser(PERMISSIVE=True, QUIET=True)
    CIF_parser = MMCIFParser(QUIET=True)
    pdb_1LCD = PDB_parser.get_structure("1LCD", "PDB/1LCD.pdb")
    pdb_2XHE = PDB_parser.get_structure("2XHE", "PDB/2XHE.pdb")
    cif_3JQH = CIF_parser.get_structure("3JQH", "PDB/3JQH.cif")
    cif_4CUP = CIF_parser.get_structure("4CUP", "PDB/4CUP.cif")

    def test_rebuild_multichain_missing(self):
        """Convert multichain missing atom protein to internal coordinates and back."""
        # 2XHE has regions of missing chain, last residue has only N
        r = structure_rebuild_test(self.pdb_2XHE, False)
        self.assertEqual(r["residues"], 787)
        self.assertEqual(r["rCount"], 835)
        self.assertEqual(r["rMatchCount"], 835)
        self.assertEqual(r["aCount"], 6267)
        self.assertEqual(r["disAtmCount"], 0)
        self.assertEqual(r["aCoordMatchCount"], 6267)
        self.assertEqual(len(r["chains"]), 2)
        self.assertTrue(r["pass"])

    def test_rebuild_disordered_atoms_residues(self):
        """Convert disordered protein to internal coordinates and back."""
        # 3jqh has both disordered residues
        # and disordered atoms in ordered residues
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always", PDBConstructionWarning)
            r = structure_rebuild_test(self.cif_3JQH, False)
        # print(r)
        self.assertEqual(r["residues"], 26)
        self.assertEqual(r["rCount"], 47)
        self.assertEqual(r["rMatchCount"], 47)
        self.assertEqual(r["aCount"], 217)
        self.assertEqual(r["disAtmCount"], 50)
        self.assertEqual(r["aCoordMatchCount"], 217)
        self.assertEqual(len(r["chains"]), 1)
        self.assertTrue(r["pass"])

    def test_model_change_internal_coords(self):
        """Get model internal coords, modify psi and chi1 values and check."""
        for mdl in self.pdb_1LCD:
            if mdl.serial_num == 2:
                break
        mdl.atom_to_internal_coordinates()
        # other tests show can build with arbitrary internal coords
        # build here so changes below trigger more comlicated
        # xAtoms_needs_update mask arrays
        mdl.internal_to_atom_coordinates()
        nvt = {}
        nvc1 = {}
        nvpsi = {}
        tcount = 0
        c1count = 0
        psicount = 0
        for r in mdl.get_residues():
            ric = r.internal_coord
            if ric:
                # hedra change
                tau = ric.get_angle("tau")
                if ric.rprev != [] and tau is not None:
                    tcount += 1
                    nv = tau + 0.5
                    ric.set_angle("tau", nv)
                    nvt[str(r)] = nv
                # sidechain dihedron change
                chi1 = ric.get_angle("chi1")
                if chi1 is not None:
                    c1count += 1
                    nv = chi1 + 90
                    if nv > 180.0:
                        nv -= 360.0
                    ric.set_angle("chi1", nv)
                    nvc1[str(r)] = nv
                # backbone dihedron change
                psi = ric.get_angle("psi")
                if psi is not None:
                    psicount += 1
                    nv = psi - 90
                    if nv < -180.0:
                        nv += 360.0
                    ric.set_angle("psi", nv)
                    nvpsi[str(r)] = nv
        mdl.internal_to_atom_coordinates()
        sf = StringIO()
        write_PDB(self.pdb_1LCD, sf)
        sf.seek(0)
        new_1LCD = self.PDB_parser.get_structure("1LCD", sf)
        for mdl in new_1LCD:
            if mdl.serial_num == 2:
                break
        mdl.atom_to_internal_coordinates()
        ttcount = 0
        c1tcount = 0
        psitcount = 0
        for r in mdl.get_residues():
            ric = r.internal_coord
            if ric:
                tau = ric.get_angle("tau")
                if ric.rprev != [] and tau is not None:
                    ttcount += 1
                    self.assertAlmostEqual(tau, nvt[str(r)], places=1)
                chi1 = ric.get_angle("chi1")
                if chi1 is not None:
                    c1tcount += 1
                    self.assertAlmostEqual(chi1, nvc1[str(r)], places=1)
                psi = ric.get_angle("psi")
                if psi is not None:
                    psitcount += 1
                    self.assertAlmostEqual(psi, nvpsi[str(r)], places=1)
        self.assertEqual(tcount, ttcount)
        self.assertEqual(c1count, c1tcount)
        self.assertEqual(psicount, psitcount)
        self.assertTrue(ttcount > 0)
        self.assertTrue(c1count > 0)
        self.assertTrue(psicount > 0)

    def test_write_SCAD(self):
        """Check SCAD output plus MaxPeptideBond and Gly CB.

        SCAD tests: scaling, transform mtx, extra bond created (allBonds)
        """
        sf = StringIO()
        write_SCAD(
            self.cif_4CUP, sf, 10.0, pdbid="4cup", backboneOnly=True, includeCode=False
        )
        sf.seek(0)
        next_one = False
        with as_handle(sf, mode="r") as handle:
            for aline in handle.readlines():
                if "// (1856_S_CB, 1856_S_CA, 1856_S_C)" in aline:
                    m = re.search(r"\[\s+(\d+\.\d+)\,", aline)
                    if m:
                        # test correctly scaled atom bond length
                        self.assertAlmostEqual(float(m.group(1)), 15.30582, places=3)
                    else:
                        self.fail("scaled atom bond length not found")
                elif '[ 1, "1857M",' in aline:
                    next_one = True
                elif next_one:
                    next_one = False
                    # test last residue transform looks roughly correct
                    # some differences due to sorting issues on different python
                    # versions
                    target = [-12.413, -3.303, 35.771, 1.0]
                    ms = re.findall(  # last column of each row
                        r"\s+(-?\d+\.\d+)\s+\]", aline
                    )
                    if ms:
                        for i in range(0, 3):
                            self.assertAlmostEqual(float(ms[i]), target[i], places=0)
                    else:
                        self.fail("transform not found")
        sf.seek(0)
        IC_Residue.gly_Cbeta = True
        write_SCAD(
            self.pdb_2XHE[0]["A"],
            sf,
            10.0,
            pdbid="2xhe",
            maxPeptideBond=100.0,
            includeCode=False,
        )
        sf.seek(0)
        allBondsPass = False
        maxPeptideBondPass = False
        glyCbetaFound = False
        with as_handle(sf, mode="r") as handle:
            for aline in handle.readlines():
                # test extra bond created in TRP (allBonds is True)
                if '"Cres", 0, 0, 1, 0, StdBond, "W", 24, "CD2CE3CZ3"' in aline:
                    allBondsPass = True
                # test 509_K-561_E long bond created
                if "509_K" in aline and "561_E" in aline:
                    maxPeptideBondPass = True
                if "(21_G_CB, 21_G_CA, 21_G_C)" in aline:
                    glyCbetaFound = True
                    target = [15.33630, 110.17513, 15.13861]
                    ms = re.findall(r"\s+(-?\d+\.\d+)", aline)
                    if ms:
                        for i in range(0, 3):
                            self.assertAlmostEqual(float(ms[i]), target[i], places=0)
                    else:
                        self.fail("Cbeta internal coords not found")

        self.assertTrue(allBondsPass)
        self.assertTrue(glyCbetaFound)
        self.assertTrue(maxPeptideBondPass)
Exemple #44
0
args = parse_cmd.parse_args()

print("PDB.filename:", args.pdb_file.name)
print("Residue Lib.:", args.reslib_file)
print("PDB.filename:", args.vdwprm_file)
print("Distance:", args.cutoff_dist)

# Loading Libraries
# loading residue library from data/aaLib.lib
residue_library = ResiduesDataLib(args.reslib_file)

# loading VdW parameters
ff_params = VdwParamset(args.vdwprm_file)

parser = PDBParser(PERMISSIVE=1)
print('Parsing', args.pdb_file)
# load structure from PDB file of PDB ifle handler
st = parser.get_structure('STR', args.pdb_file.name)

# assign data types, and charges from libraries
# We will use the xtra attribute in Bio.PDB.Atom to hold the new data
# Possible errors on N-term and C-Term atoms
# Possible errors on HIS alternative forms

en.add_atom_parameters(st, residue_library, ff_params)

# Calculating surfaces
# The specific PATH to naccess script (in soft) is needed
# ASA goes to .xtra field directly
Exemple #45
0
import optparse
import string
import numpy as np
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.PDBIO import PDBIO
option_parser = optparse.OptionParser()
option_parser.add_option('--id',
                         type='str',
                         help='id to attach to pdb structure')

options, args = option_parser.parse_args()

#TODO PDBParser has additional keyword arguments like PERMISSIVE.
# We may decide to add options to the option parser for such
# keywords, but for now let's not over-engineer.
pdb_parser = PDBParser()


def _calculate_center_of_mass(structure):
    total_mass = 0
    mx_total = 0
    my_total = 0
    mz_total = 0
    for atom in structure.get_atoms():
        coords = atom.coord.tolist()
        mass = atom.mass
        total_mass += mass
        mx_total += coords[0] * mass
        my_total += coords[1] * mass
        mz_total += coords[2] * mass
    return [
Exemple #46
0
def get_interaction_pairs(options):
    """
    This function Takes a pdb file path and generates a folder with pdb files holding the unique pairwise
    interactions in the first pdb
    :param pdb_filename:
    :return: ...
    """
    pdb_filename = options.infile
    #Loading the pdb files in structure objects
    parser = PDBParser(PERMISSIVE=1)

    structure_id = get_structure_name(pdb_filename)
    filename = pdb_filename
    structure = parser.get_structure(structure_id, filename)

    neighbor_chains = get_neighbor_chains(structure, options)

    seq_dict = get_seq_dict(structure.get_chains())

    similar_sequences = get_similar_sequences(list(structure.get_chains()), seq_dict)

    interaction_dict = {}
    # Here we organize the data in similar_sequences and neighbor_chains in a dictionary with pairs of chain types (
    # an id representing all chains with more than 95% of similarity) with all the pairwise interactions within this
    # two chain types

    for chain1 in neighbor_chains:
        for chain2 in neighbor_chains[chain1]:
            nr_interaction = tuple(sorted([similar_sequences[chain1].get_id(), similar_sequences[chain2].get_id()]))
            if tuple(sorted(
                    [similar_sequences[chain1].get_id(), similar_sequences[chain2].get_id()])) not in interaction_dict:
                interaction_dict[nr_interaction] = []

            interaction_dict[nr_interaction].append([chain1, chain2])

    clean_interaction_dict(interaction_dict, similar_sequences)

    if options.verbose:
        counter = 0
        print('\n')
        for pair in interaction_dict:
            print(pair)
            for int in interaction_dict[pair]:
                print("\t%s" % int)
                counter += 1
        print(counter)

    if not os.path.exists(structure_id):
        os.makedirs(structure_id)
    else:
        for the_file in os.listdir(structure_id):
            file_path = os.path.join(structure_id, the_file)
            if os.path.isfile(file_path):
                os.unlink(file_path)

    io = PDBIO()
    io.set_structure(structure)

    for pair in interaction_dict:
        for interaction in interaction_dict[pair]:
            io.save('%s/%s_%s%s.pdb' % (structure_id, structure_id, interaction[0].get_id(), interaction[1].get_id()),
                    ChainSelect(interaction[0], interaction[1]))

    return structure_id
def _add_flanking_seq_fragments(ddg_data_dict: Dict, dataset: str,
                                pdb_filename: str):

    if "left_flank" not in ddg_data_dict[dataset].columns:
        ddg_data_dict[dataset]["left_flank"] = np.nan
    if "wt_restype" not in ddg_data_dict[dataset].columns:
        ddg_data_dict[dataset]["wt_restype"] = np.nan
    if "mt_restype" not in ddg_data_dict[dataset].columns:
        ddg_data_dict[dataset]["mt_restype"] = np.nan
    if "right_flank" not in ddg_data_dict[dataset].columns:
        ddg_data_dict[dataset]["right_flank"] = np.nan

    pdbid = pdb_filename.split(r"/")[-1][0:4].upper()

    # # Load SEQRES
    # chain_id_to_seq_res = {}
    # for record in SeqIO.parse(pdb_filename, "pdb-seqres"):
    #     seq_res = str(record.seq)
    #     chain_id = record.id[-1]
    #     chain_id_to_seq_res[chain_id] = seq_res
    #     print(record.annotations)

    # # Load PDBSEQ
    # from Bio.SeqIO.PdbIO import PdbAtomIterator
    # chain_id_to_pdb_seq = {}
    # with open(pdb_filename) as handle:
    #     for record in PdbAtomIterator(handle):
    #         pdb_seq = str(record.seq)
    #         chain_id = record.id[-1]
    #         chain_id_to_pdb_seq[chain_id] = pdb_seq

    from Bio.PDB.PDBParser import PDBParser

    p = PDBParser()
    model_first = p.get_structure(pdbid, pdb_filename)[0]
    chain_id_to_pdb_seq = {}
    chain_id_to_pdb_residue_numbers = {}
    for chain in model_first:
        pdb_seq = []
        pdb_residue_numbers = []
        for residue in chain.get_residues():
            if residue.resname.strip() in [
                    index_to_three(i) for i in range(20)
            ]:
                pdb_residue_numbers.append(residue.id[1])
                pdb_seq.append(three_to_one(residue.resname.strip()))
        chain_id_to_pdb_seq[chain.id] = "".join(pdb_seq)
        chain_id_to_pdb_residue_numbers[chain.id] = pdb_residue_numbers

    for idx, row in ddg_data_dict[dataset].iterrows():
        if row["pdbid"] == pdbid:
            residue_number = int(row["variant"][1:-1])
            chain_id = row["chainid"]

            pdb_sequence = chain_id_to_pdb_seq[chain_id]
            resid = chain_id_to_pdb_residue_numbers[chain_id].index(
                residue_number)

            if row["variant"][0] == pdb_sequence[resid]:
                ddg_data_dict[dataset].loc[idx,
                                           "left_flank"] = _trim_left_flank(
                                               pdb_sequence[:resid])
                ddg_data_dict[dataset].loc[idx,
                                           "right_flank"] = _trim_right_flank(
                                               pdb_sequence[resid + 1:])
                ddg_data_dict[dataset].loc[idx,
                                           "wt_restype"] = row["variant"][0]
                ddg_data_dict[dataset].loc[idx,
                                           "mt_restype"] = row["variant"][-1]
            else:
                print("WRONG", row[["pdbid", "variant"]])
Exemple #48
0
def GenerateMutations(DataFrame, PDB, PATH):
    """
	Purpose:
	
	This function returns the mutated pdb protein files 
	from skempi_v2 database (https://life.bsc.es/pid/skempi2/). 

	Both single mutations and multiple comma separated mutations 
	are taken in to account. 

	If there are multiple mutation indices for the same protein, 
	then this will generate multiple pdb files.

	Parameters
	----------
	DataFrame: pandas table 
	    The pandas table to read_csv
	PDB: str
	    The string of the pdb file
	"""
    try:
        from Bio.PDB.PDBIO import PDBIO
        from Bio.PDB.PDBParser import PDBParser
        from Bio.Data.IUPACData import protein_letters
        from Bio.SeqUtils.ProtParam import ProteinAnalysis
        from Bio.PDB.Polypeptide import PPBuilder
        from Bio.PDB.Polypeptide import standard_aa_names  # Standard amino acid names - https://biopython.org/DIST/docs/api/Bio.PDB.Polypeptide-module.html
        from Bio.PDB.Polypeptide import aa1  #  aa1 = 'ACDEFGHIKLMNPQRSTVWY'
        from Bio.PDB.Polypeptide import aa3  #  aa3 = ['ALA', 'CYS', 'ASP', 'GLU', 'PHE', 'GLY', 'HIS', 'ILE',... ]
        import tqdm as tqdm  # tqdm - useful for estimating computing times for long for loops

    except ImportError:
        print("ERROR: Need to check Biopython imports!")

    # Before running anything, call foldx on the WT to get the optimized structure to mutate
    title = PDB.split('.')
    name = title[0]
    callfoldx(PDB)  # Call FoldX on the WT

    # Path to where the WT PDBs are stored
    WTArray = []
    nameArray = []

    for file in os.listdir(
            PATH
    ):  # List the fxout files in the directory, and store them in the array
        if file.endswith(".pdb"):
            FileLocation = os.path.join(PATH, file)
            WTArray.append(FileLocation)
            nameArray.append(file)

    # Subprocessing block for WT
    subprocess.Popen("mkdir {}".format(name), shell=True)  # Make directory
    subprocess.Popen("mv OP_{}.fxout {}/.".format(name, name),
                     shell=True)  # Move optmiized fxout file to directory
    subprocess.Popen(
        "mv Optimized_{}.pdb {}".format(name, PDB), shell=True
    )  # Rename file from Optimized_PDB.pdb to the same name as the original file to make our lives easier

    MutationSpecies = []  # List to store the names of the mutated speices
    AminoAcidListDict = {
    }  # Dictionary to assign alpabetical letters to amino acids
    for index, code in enumerate(standard_aa_names):
        AminoAcidListDict[aa1[index]] = aa3[
            index]  # Building the mutation dictionary for each code
    parser = PDBParser(PERMISSIVE=1)  # Standard PDB parser
    PDBList = set()

    for pdb in DataFrame['#Pdb']:
        pdbname = pdb.split('_')[0]
        string = "{}.pdb".format(pdbname)
        PDBList.add(string)

    if PDB not in PDBList:
        raise Exception(
            "The PDB is not in the SKEMPI list"
        )  # Not in the PDB list we expect - i.e. from the SKEMPI list

    # Search for PDB mutations that contain the PDB string - e.g. the 1CSE mutations will have the format 1CSE_E_I
# where it indicates the mutations were made in the 1CSE E and I chains

    MutationList = DataFrame.loc[(DataFrame['NAME'] == PDB.split('.')[0]
                                  )]  # This should get the PDB mutations
    MutationList = MutationList.reset_index()
    print(MutationList)

    # Make a dictionary (hash map) with the mutation name and the residue lists to change
    for index, entry in MutationList.iterrows():
        structure = parser.get_structure(str(title[0]),
                                         PDB)  # reset structure each time
        model = structure[0]  # Switch back to the unchanged one
        for mut in entry['MutCleanSplit']:
            initAA, chain, loc, mutAA = re.findall('(\d+|.)', mut)
            # Check we are reading the right residue and index
            assert (model[chain][int(loc)].resname == AminoAcidListDict[str(
                initAA)]
                    )  # This will check that the model is the unmutated pdb
            print("Mutating {} on index {} of chain {} to {}".format(
                AminoAcidListDict[str(initAA)], chain, loc,
                AminoAcidListDict[str(mutAA)]))
            model[chain][int(loc)].resname = AminoAcidListDict[str(
                mutAA
            )]  # This command replaces the nonmutated species into the mutated one
            assert (
                model[chain][int(loc)].resname == AminoAcidListDict[str(mutAA)]
            )  # This will check that the mutation was successful
        mutanttotalstring = '_'.join(entry['MutCleanSplit'])
        mutatedname = "{}_{}_{}.pdb".format(entry['#Pdb'], mutanttotalstring,
                                            index)
        MutationSpecies.append(mutatedname)
        io = PDBIO(structure)
        io.set_structure(model)
        io.save(
            mutatedname
        )  # This should print out the name of protein, the mutaton list, and the index on the pandas file
        print("Produced new mutation PDB file {}".format(
            mutatedname))  # Printing out sign to say the pdb was produced

    # Call foldx on the mutatied species

    print(" -----------------------------------------------")
    print("The following mutant species are to be optimized")
    print(" -----------------------------------------------")

    for mutant in MutationSpecies:
        print("PDB file: {}".format(mutant))
    ANS = []

    for species in MutationSpecies:
        callfoldx(species)  # Call FoldX on each mutated species
        subprocess.Popen("mv {} {}/.".format(species, name), shell=True)
        subprocess.Popen("mv OP_{}.fxout {}/.".format(
            species.split(".")[0], name),
                         shell=True)
        subprocess.Popen("mv Optimized_{}.pdb {}/.".format(
            species.split(".")[0], name),
                         shell=True)
        ANS.extend(ReadEnergy("{}/".format(name)))
        print(ANS)

    print("Finished Optimization")
    print("Running Ialign..")
Exemple #49
0
print()
print("PDB file:", args.pdb_file.name)
print("Selected Residue 1 Chain: {}, Residue number: {}".format(
    chain_id1, res_num1))
print("Selected Residue 2 Chain: {}, Residue number: {}".format(
    chain_id2, res_num2))

# Check whether the input is complete sys.exit print on the std.err and exits
if not chain_id1 or not res_num1:
    sys.exit("ERROR: unknown either chain id or residue 1 number")

if not chain_id2 or not res_num2:
    sys.exit("ERROR: unknown either chain id or residue 2 number")

parser = PDBParser(PERMISSIVE=1)

print()
print('Parsing', args.pdb_file.name)

# load structure from PDB file of PDB ifle handler
st = parser.get_structure('STR', args.pdb_file)

# Checking residues exist and are different
if chain_id1 not in st[0] or res_num1 not in st[0][chain_id1]:
    sys.exit("ERROR: non existing chain or residue")
if chain_id2 not in st[0] or res_num2 not in st[0][chain_id2]:
    sys.exit("ERROR: non existing chain or residue")
if (chain_id1 == chain_id2) and (res_num1 == res_num2):
    sys.exit("ERROR: identical residues")
print()
def get_pdb(pdb_code):
    p = PDBParser()
    structure = p.get_structure(pdb_code, pdb_code)
    structure.header
    return structure
Exemple #51
0
def make_structure_for_pdbfile(file, structure_id):
    p = PDBParser(PERMISSIVE=1)
    structure = p.get_structure(structure_id, file)
    #model = structure[0]
    return structure
# VARIOUS CLASSIFIERS FOR THE PDB FILES

import gzip
import sys
from Bio.PDB.PDBParser import PDBParser

parser = PDBParser(PERMISSIVE=0, QUIET=True)

pathPDB = "/bmm/data/rcsb/data/structures/all/pdb"

# LOADING THE FILE WHICH WAS USED TO DEFINE THE MUTANTS

f = open("pdb_seqres.txt", "r")
ft = f.readlines()
f.close()

k = 0
gt = ft[k].split()
R = gt[1][4:len(gt[1])]
while R == "protein":
    gt = ft[k].split()
    R = gt[1][4:len(gt[1])]
    k = k + 2

proindex = k

# OUTPUT

g = open("PDB_classifier.txt", "w")
h = open("struct_not_found.txt", "w")
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.cm as cm
import matplotlib.colors as col
from scipy.optimize import curve_fit
import pandas as pd
from math import factorial
import random
import time
import Bio
import pickle

from Bio.PDB.PDBParser import PDBParser
parser = PDBParser(PERMISSIVE=1)

def atom_id(atom):
    n_atomtypes = 4
    id_mat = np.zeros([1,n_atomtypes])[0]
    # Atom type 1: Carbon
    if atom.get_name()[0]=='C':
        id_mat[0] = 1
        
    # Atom type 2: Nitrogen
    if atom.get_name()[0]=='N':
        id_mat[1] = 1
        
    # Atom type 3: Oxygen
    if atom.get_name()[0]=='O':
        id_mat[2] = 1
        "--output_path",
        default='/data/databases/pdb/processed/domain_analisis')

    args = parser.parse_args()

    domains = defaultdict(lambda: [])
    for seq in bpio.parse(args.data_path + "/processed/domains.fasta",
                          "fasta"):
        domains["_".join(seq.id.split("_")[0:2])].append(seq.id.split("_"))

    for (code, pdb_path) in tqdm(PDBs(pdb_dir=args.data_path)):

        pdb_model = PDB(code=code)
        pdb_model.save()

        p = PDBParser(PERMISSIVE=True, QUIET=True)
        try:
            for chain in p.get_structure(code, pdb_path).get_chains():
                chains_dir = args.output_path + "/chains/" + code[1:3] + "/"
                mkdir(chains_dir)
                cs = ChainSplitter(chains_dir)
                process_chain(pdb_path, code, chain.id, pdb_model)

                for (_, _, res_start, res_end, dn, dn_start,
                     dn_end) in domains[code + "_" + chain.id]:
                    # 1r9d_A_2_787_PF02901.14_8_648
                    try:
                        domains_dir = args.output_path + "/domains/" + code[
                            1:3] + "/"
                        dn_start = int(dn_start)
                        dn_end = int(dn_end)
Exemple #55
0
#Note: If top500H folder isn't in same directory, then the randint-funciton
# end-value ,len(list), will be equal to 0, and so this code will not run due to
# 'empty range for randrange() (0,0, 0) error


#Borrowed from teacher's solution (distance-histograms assignment) and modified a bit.
if __name__=="__main__":
    '''Iterate through and parse all files in a folder '''
    import glob #Filename pattern matching
    
    # Create a list of protein structures
    structure_list = []
    for index, fname in enumerate(glob.glob("top500H/*")):
        print(f"Parsing {fname}... ")
        p=PDBParser(QUIET=True) #Silences warnings
        try:
            #Extract structure and append to list
            s=p.get_structure("", fname)
            structure_list.append(s)
        except:
            #Skips unparsable files and print error code
            print(f"- ERROR in {fname}, therefor it has been skipped.")



def protein_aalist(s, aa):
    '''Goes through one protein and createst a list of amino acids from it '''
    list_of_aa = []
    for res in s[0].get_residues():
        if is_aa(res): #Tests object identity
Exemple #56
0
def ParsePDB(pdbpth, mutant_tag, accept_atom=('CA', ), center='CA'):
    """
    :param pdbpth:
    :param mutant_tag:# ['key', 'PDB', 'WILD_TYPE', 'CHAIN', 'POSITION', 'MUTANT']
    :param atom_list:
    :param center:
    :return:
    """
    import warnings
    from Bio import BiopythonWarning
    from Bio.PDB.PDBParser import PDBParser
    warnings.simplefilter('ignore', BiopythonWarning)
    df_pdb = pd.DataFrame({
        'chain': [],
        'res': [],
        'het': [],
        'posid': [],
        'inode': [],
        'full_name': [],
        'atom_name': [],
        'dist': [],
        'x': [],
        'y': [],
        'z': [],
        'occupancy': [],
        'b_factor': []
    })
    key, pdbid, wtaa, mtchain, pos, mtaa = mutant_tag.split('.')
    print('The pdbid is:', pdbid, 'pth: %s' % pdbpth)
    # --------------------------------------------------------------------------------------------------------------
    # consider mapping
    if pdbpth.split('/')[-1] == 'model1.pdb':
        map_pos_pth = '/public/home/sry/mCNN/dataset/TR/map_pos/%s_mapping.csv' % pdbid
        df_map = pd.read_csv(map_pos_pth)
        df_map[['POSITION_OLD']] = df_map[['POSITION_OLD']].astype(str)
        df_map[['POSITION_NEW']] = df_map[['POSITION_NEW']].astype(str)

        pos = df_map.loc[
            (df_map.CHAIN == mtchain) & (df_map.POSITION_OLD == pos),
            'POSITION_NEW'].values[0]  #CHAIN,POSITION_OLD,POSITION_NEW
    # --------------------------------------------------------------------------------------------------------------

    if pos.isdigit():
        INODE = ' '
        POSID = int(pos)
    else:
        INODE = pos[-1]
        POSID = int(pos[:-1])
    MT_pos = (' ', POSID, INODE)

    parser = PDBParser(PERMISSIVE=1)
    structure = parser.get_structure(pdbid, pdbpth)
    model = structure[0]
    if pdbpth.split('/')[-1] == 'model1.pdb':
        try:
            assert model['A'][MT_pos].get_resname() == aa_123dict[
                wtaa]  #TR_wild
        except:
            assert model['A'][MT_pos].get_resname() == aa_123dict[
                mtaa]  #TR_mut
    else:
        assert model[mtchain][MT_pos].get_resname() == aa_123dict[wtaa]
    if center == 'CA':
        if pdbpth.split('/')[-1] == 'model1.pdb':
            center_coord = model['A'][MT_pos]['CA'].get_coord()
        else:
            center_coord = model[mtchain][MT_pos]['CA'].get_coord()

    for chain in model:
        chain_name = chain.get_id()
        res_id_lst = [res.get_id() for res in chain]

        print('The res_number in chain %s is: %d' %
              (chain_name, len(res_id_lst)))

        res_list = [chain[res_id] for res_id in res_id_lst]
        for res in res_list:
            res_name = res.get_resname()
            het, pos_id, inode = res.get_id()
            for atom in res:
                full_name, coord, occupancy, b_factor = atom.get_name(
                ), atom.get_coord(), atom.get_occupancy(), atom.get_bfactor()
                if not full_name in accept_atom:
                    continue
                name = full_name.strip()[0]
                # if name in ('0','1','2','3','4','5','6','7','8','9','H','D'):
                # if not name in ('C','O','N','S'):
                dist = np.linalg.norm(center_coord - coord)
                x, y, z = coord
                temp_array = np.array([
                    chain_name, res_name, het, pos_id, inode, full_name, name,
                    dist, x, y, z, occupancy, b_factor
                ]).reshape(1, -1)
                temp_df = pd.DataFrame(temp_array)
                temp_df.columns = df_pdb.columns
                df_pdb = pd.concat([df_pdb, temp_df],
                                   axis=0,
                                   ignore_index=True)
                break
    df_pdb[['dist']] = df_pdb[['dist']].astype(float)
    print('The atom_number (only CA) is:', len(df_pdb))
    return df_pdb, center_coord
Exemple #57
0
    "AE", "BE", "CE", "DE", "EE", "FE", "AF", "BF", "CF", "DF", "EF", "FF"
]
with open(outfile, "a") as f:
    f.write("id" + '\t')
    for i in alphabet:
        f.write(i + '\t')
    f.write('\n')

with open(infile) as f1:
    pdblist = f1.read().splitlines()

for pdbid in pdblist:
    pdbFile = pdbid + ".pdb"

    ## First, open and parse the protein file
    p = PDBParser(PERMISSIVE=1)
    structure = p.get_structure(pdbFile, pdbFile)
    print(pdbid)
    for model in structure:
        for chain in model:
            seq = list()
            chainID = chain.get_id()

            for residue in chain:
                if is_aa(residue.get_resname(), standard=True):
                    seq.append(three_to_one(residue.get_resname()))
                else:
                    seq.append("X")

            chainseq = str("".join(seq))
            chainlength = len(chainseq)
def main():

    parser = argparse.ArgumentParser(prog='polarContacts',
                                     description='Polar contacts detector')

    parser.add_argument('--backonly',
                        action='store_true',
                        dest='backonly',
                        help='Restrict to backbone')

    parser.add_argument('--nowats',
                        action='store_true',
                        dest='nowats',
                        help='Exclude water molecules')

    parser.add_argument('--diel',
                        type=float,
                        action='store',
                        dest='diel',
                        default=1.0,
                        help='Relative dielectric constant')

    parser.add_argument('--vdw',
                        action='store',
                        dest='vdwprm',
                        help='VDW Paramters file')

    parser.add_argument('--rlib',
                        action='store',
                        dest='reslib',
                        help='AminoAcid library')

    parser.add_argument('pdb_path')

    args = parser.parse_args()

    print("Settings")
    print("--------")
    for k, v in vars(args).items():
        print('{:10}:'.format(k), v)

    backonly = args.backonly
    nowats = args.nowats
    pdb_path = args.pdb_path
    vdwprm = args.vdwprm
    reslib = args.reslib
    diel = args.diel

    # Load VDW parameters
    vdwParams = VdwParamset(vdwprm)
    print("{} atom types loaded".format(vdwParams.ntypes))

    # Load AA Library
    aaLib = ResiduesDataLib(reslib)
    print("{} amino acid atoms loaded".format(aaLib.nres))

    if not pdb_path:
        parser.print_help()
        sys.exit(2)

    parser = PDBParser(PERMISSIVE=1)

    try:
        st = parser.get_structure('st', pdb_path)
    except OSError:
        print("#ERROR: loading PDB")
        sys.exit(2)

# Checking for models
    if len(st) > 1:
        print("#WARNING: Several Models found, using only first")

# Using Model 0 any way
    st = st[0]

    # Making a list of polar atoms
    polats = []
    if backonly:
        selected_atoms = backbone_polars
    else:
        selected_atoms = all_polars

    for at in st.get_atoms():
        if at.id in selected_atoms:
            polats.append(at)
#Searching for contacts under HNLNK on diferent residues
    nbsearch = NeighborSearch(polats)
    hblist = []
    for at1, at2 in nbsearch.search_all(HBLNK):
        if at1.get_parent() == at2.get_parent():
            continue
#Discard covalents and neighbours
        if (at1 - at2) < COVLNK:
            continue
        if abs(at2.get_parent().id[1] - at1.get_parent().id[1]) == 1:
            continue
# remove waters
        if nowats:
            if at1.get_parent().get_resname() in waternames \
             or at2.get_parent().get_resname() in waternames:
                continue

#     atom1 = Atom(at1,1,aaLib,vdwParams)
#     atom2 = Atom(at2,1,aaLib,vdwParams)
        if at1.get_serial_number() < at2.get_serial_number():
            hblist.append([at1, at2])
        else:
            hblist.append([at2, at1])

    print()

    print()
    print("Polar contacts")
    print('{:13} {:13} {:6} '.format('Atom1', 'Atom2', 'Dist (A)'))

    for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()):
        r1 = hb[0].get_parent()
        r2 = hb[1].get_parent()
        print('{:14} {:14} {:6.3f} '.format(
            r1.get_resname() + ' ' + str(r1.id[1]) + hb[0].id,
            r2.get_resname() + ' ' + str(r2.id[1]) + hb[1].id, hb[0] - hb[1]))
    print()
    print("Residue interactions")

    # Making list or residue pairs to avoid repeated pairs
    respairs = []
    for hb in hblist:
        r1 = hb[0].get_parent()
        r2 = hb[1].get_parent()
        if [r1, r2] not in respairs:
            respairs.append([r1, r2])

    print('Exercise A')

    l = []
    for rpair in sorted(respairs, key=lambda i: i[0].id[1]):
        eint = 0.
        evdw = 0.
        for at1 in rpair[0].get_atoms():
            resid1 = rpair[0].get_resname()
            atid1 = at1.id
            atparam1 = aaLib.getParams(resid1, atid1)
            vdwprm1 = vdwParams.atTypes[atparam1.atType]
            for at2 in rpair[1].get_atoms():
                resid2 = rpair[1].get_resname()
                atid2 = at2.id
                atparam2 = aaLib.getParams(resid2, atid2)
                vdwprm2 = vdwParams.atTypes[atparam2.atType]
                eint = eint + 332.16 * atparam1.charg * atparam2.charg / diel / (
                    at1 - at2)
                eps = math.sqrt(vdwprm1.eps * vdwprm2.eps)
                sig = math.sqrt(vdwprm1.sig * vdwprm2.sig)
                evdw = evdw + 4 * eps * ((sig / (at1 - at2))**12 -
                                         (sig / (at1 - at2))**6)
            #print (resid1,rpair[0].id[1],resid2,rpair[1].id[1],eint,evdw, eint+evdw)
        l.append([
            resid1, rpair[0].id[1], resid2, rpair[1].id[1], eint, evdw,
            eint + evdw
        ])
    for index, element in enumerate(sorted(l, key=lambda i: i[6])):
        if index < 5:
            print(element)

    #Exercise B 1
    print('Exercise B.1')

    mainmain = []
    mainside = []
    sidemain = []
    sideside = []
    to_main = []
    to_side = []
    for hb in sorted(hblist, key=lambda i: i[0].get_serial_number()):
        resid1 = hb[0].get_parent()
        resid2 = hb[1].get_parent()
        if hb[0].id in backbone_polars:
            a = 'main'
        else:
            a = 'side'
        if hb[1].id in backbone_polars:
            b = 'main'
        else:
            b = 'side'
        label = a + '-' + b
        if label == 'main-main':
            mainmain.append([
                resid1.get_resname(), resid1.id[1],
                resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id,
                hb[0] - hb[1]
            ])
            if (str(resid1.get_resname()) + '  ' +
                    str(resid1.id[1])) not in to_main:
                to_main.append(
                    str(resid1.get_resname()) + '  ' + str(resid1.id[1]))
            if (str(resid2.get_resname()) + '  ' +
                    str(resid2.id[1])) not in to_main:
                to_main.append(
                    str(resid2.get_resname()) + '  ' + str(resid2.id[1]))
        elif label == 'main-side':
            mainside.append([
                resid1.get_resname(), resid1.id[1],
                resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id,
                hb[0] - hb[1]
            ])
            if (str(resid2.get_resname()) + '  ' +
                    str(resid2.id[1])) not in to_main:
                to_main.append(
                    str(resid2.get_resname()) + '  ' + str(resid2.id[1]))
            if (str(resid1.get_resname()) + '  ' +
                    str(resid1.id[1])) not in to_side:
                to_side.append(
                    str(resid1.get_resname()) + '  ' + str(resid1.id[1]))
        elif label == 'side-main':
            sidemain.append([
                resid1.get_resname(), resid1.id[1],
                resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id,
                hb[0] - hb[1]
            ])
            if (str(resid2.get_resname()) + '  ' +
                    str(resid2.id[1])) not in to_side:
                to_side.append(
                    str(resid2.get_resname()) + '  ' + str(resid2.id[1]))
            if (str(resid1.get_resname()) + '  ' +
                    str(resid1.id[1])) not in to_main:
                to_main.append(
                    str(resid1.get_resname()) + '  ' + str(resid1.id[1]))
        else:
            sideside.append([
                resid1.get_resname(), resid1.id[1],
                resid2.get_resname(), resid2.id[1], label, hb[0].id, hb[1].id,
                hb[0] - hb[1]
            ])
            if (str(resid1.get_resname()) + '  ' +
                    str(resid1.id[1])) not in to_side:
                to_side.append(
                    str(resid1.get_resname()) + '  ' + str(resid1.id[1]))
            if (str(resid2.get_resname()) + '  ' +
                    str(resid2.id[1])) not in to_side:
                to_side.append(
                    str(resid2.get_resname()) + '  ' + str(resid2.id[1]))
    for i in mainmain:
        print(i)
    for i in mainside:
        print(i)
    for i in sidemain:
        print(i)
    for i in sideside:
        print(i)
    nmain = []
    nummain = []
    nside = []
    numside = []
    for i in range(len(to_main)):
        nmain.append('to_main')
        nummain.append(i)
    for i in range(len(to_side)):
        nside.append('to_side')
        numside.append(i + len(to_main))
    x = np.array(nummain + numside)
    y = np.array(nmain + nside)
    res = to_main + to_side
    plt.xticks(x, res)
    plt.plot(x, y, 'ro')
    plt.show()
    #It is generated a plot indicating if each residue is interacting with one or more elements either in main chain or in side chain

    #End of exercise B 1

    print()
    print('Exercise B', 2)

    ## From http://cib.cf.ocha.ac.jp/bitool/ASA/ I have obtained that the residues in the surface are:
    surface = [['ILE', 3], ['VAL', 5], ['ILE', 23], ['VAL', 26], ['ILE', 30],
               ['GLN', 41], ['LEU', 43], ['LEU', 56], ['ILE', 61], ['LEU', 67],
               ['LEU', 69]]
    l = []
    for rpair in sorted(respairs, key=lambda i: i[0].id[1]):
        eint = 0.
        for at1 in rpair[0].get_atoms():
            resid1 = rpair[0].get_resname()
            resid1id = rpair[0].id[1]
            atid1 = at1.id
            atparam1 = aaLib.getParams(resid1, atid1)
            for at2 in rpair[1].get_atoms():
                resid2 = rpair[1].get_resname()
                resid2id = rpair[1].id[1]
                atid2 = at2.id
                atparam2 = aaLib.getParams(resid2, atid2)
                for i in surface:
                    for j in surface:
                        if resid1 == i[0] and resid1id == i[1] and resid2 == j[
                                0] and resid2id == j[1]:
                            eint = eint + 80 * atparam1.charg * atparam2.charg / diel / (
                                at1 - at2)
        if eint != 0:
            l.append([resid1, resid1id, resid2, resid2id, eint])
    for i in l:
        print(i)
Exemple #59
0
     args.input
 except AttributeError:
     from modeller import environ
     import modeller_caller as mc
     env = environ()  # Some variables needed for the modeller
     modeler = mc.modeller_caller(env)
     # Convert the fasta alignment in pir format
     if not args.fasta and not args.pir:
         raise argparser.error("Required a fasta or a pir alignment")
     elif args.fasta:
         modeler.convert_ali(args.fasta, args.pir)
     modeler.modelize(args.pir, args.seq, args.models)
 else:
     # Retrieve the PDB structure, filter and get sequence
     logging.captureWarnings(True)
     parser = PDBParser(PERMISSIVE=1)
     if os.path.isfile(args.input):
         pdbpath = args.input
     else:
         pdbl = PDBList()
         try:
             pdbpath = plots.pdb_download(args.input, os.getcwd())
         except:
             raise FileExistsError("Make sure your query format is correct")
     structure = parser.get_structure("cozmic_pdb_query", pdbpath)
     residues = cm.filter_residues(structure)
     s = ""
     for residue in residues:
         s += SCOPData.protein_letters_3to1.get(residue.get_resname(), 'X')
     seq = Seq(s, generic_protein)
     #         sys.stderr.write("Protein sequence:%s\n" % seq)
Exemple #60
-1
def parse_atoms_infile(filename):
	'''
	Parse a PDB file and return atom list.\n
	parse_atoms_infile(filename):\n
	File needs to be a PDB file format (*.ent or *.pdb)
	'''
	p = PDBParser(QUIET=True)
	s = p.get_structure("X", filename)
	atom_list = [atom for atom in s.get_atoms() if atom.name == 'CB']
	return atom_list