def test_basic_with_epitope(self): """ epitope is specified """ path1 = DIRNAME + '/data/sample1.pdb' path2 = DIRNAME + '/data/sample2.pdb' p = PDBParser(PERMISSIVE=1) query_struct = p.get_structure(os.path.basename(path1), path1) against_struct = p.get_structure(os.path.basename(path2), path2) query_complex = Complex( query_struct, epitope=[211, 213, 214, 224, 225, 226, 227, 228, 229]) against_complex = Complex(against_struct, epitope=[216, 217, 218, 219, 220, 221]) query_complex.get_fp() against_complex.get_fp() query_fp_string = query_complex.fp2str() against_fp_string = against_complex.fp2str() query = FPWithComplex(query_complex, query_fp_string) against = FPWithComplex(against_complex, against_fp_string) score1, score2, score3 = similarity_between(query, against) expected = {'score1': 34.705754203703862, 'score3': 0, 'score2': 6} actual = {"score1": score1, "score2": score2, "score3": score3} self.assertEqual(actual, expected)
def test_with_epitope_another_cutoff(self): """ the similarity calculation cutoff is set to 5 """ path1 = DIRNAME + '/data/sample1.pdb' path2 = DIRNAME + '/data/sample2.pdb' p = PDBParser(PERMISSIVE=1) query_struct = p.get_structure(os.path.basename(path1), path1) against_struct = p.get_structure(os.path.basename(path2), path2) query_complex = Complex(query_struct) against_complex = Complex(against_struct) query_complex.get_fp() against_complex.get_fp() query_fp_string = query_complex.fp2str() against_fp_string = against_complex.fp2str() query = FPWithComplex(query_complex, query_fp_string) against = FPWithComplex(against_complex, against_fp_string) score1, score2, score3 = similarity_between(query, against, cutoff=5) expected = {"score1": 119.75339423551459, "score3": -8, "score2": 20} actual = {"score1": score1, "score2": score2, "score3": score3} self.assertEqual(actual, expected)
def test_basic_with_another_spinimage(self): """ non-default spinimage """ path1 = DIRNAME + '/data/sample1.pdb' path2 = DIRNAME + '/data/sample2.pdb' p = PDBParser(PERMISSIVE=1) query_struct = p.get_structure(os.path.basename(path1), path1) against_struct = p.get_structure(os.path.basename(path2), path2) query_complex = Complex(query_struct) against_complex = Complex(against_struct) query_complex.get_fp(spin_image_radius_step=2, spin_image_height_step=2, sphere_radius_step=2) against_complex.get_fp(spin_image_radius_step=2, spin_image_height_step=2, sphere_radius_step=2) query_fp_string = query_complex.fp2str() against_fp_string = against_complex.fp2str() query = FPWithComplex(query_complex, query_fp_string) against = FPWithComplex(against_complex, against_fp_string) score1, score2, score3 = similarity_between(query, against) expected = {'score1': 129.68169758476202, 'score3': 5, 'score2': 20} actual = {"score1": score1, "score2": score2, "score3": score3} self.assertEqual(actual, expected)
def test_basic(self): """ nothing is specified """ path1 = DIRNAME + '/data/sample1.pdb' path2 = DIRNAME + '/data/sample2.pdb' p = PDBParser(PERMISSIVE=1) query_struct = p.get_structure(os.path.basename(path1), path1) against_struct = p.get_structure(os.path.basename(path2), path2) query_complex = Complex(query_struct) against_complex = Complex(against_struct) query_complex.get_fp() against_complex.get_fp() query_fp_string = query_complex.fp2str() against_fp_string = against_complex.fp2str() query = FPWithComplex(query_complex, query_fp_string) against = FPWithComplex(against_complex, against_fp_string) score1, score2, score3 = similarity_between(query, against) expected = {"score1": 118.00269647021572, "score3": 20, "score2": -8} actual = {"score1": score1, "score3": score2, "score2": score3} self.assertEqual(actual, expected)
def get_structure(self, *args): if len(args) == 2: pdbId, fileName = args elif len(args) == 1: fileName = args[0] pdbId, fileName = str(fileName), fileName else: raise ValueError( "Error, input should be (id, fileName) or (fileName))") if re.match("http(s?)://", fileName): r = requests.get(fileName) if r.ok: fileName = StringIO(r.text) else: raise Exception("Error downloading pdb") try: if not isinstance(fileName, str) or not fileName.endswith(".gz"): structure = PDBParser.get_structure(self, pdbId, fileName) else: with gzip.open(fileName) as f: structure = PDBParser.get_structure(self, pdbId, f) except Exception as e: print(e) structure = MMCIFParser.get_structure(self, pdbId, fileName) if self.removeHeteroDuplicated: structure = self.filterOutDuplicated(structure) return structure
def test_bad_charge(self): """Test if missing or malformed charge case is handled correctly.""" # Test Entries malformed = "ATOM 1 N PRO 1 000001 02.000 3.0000 -0.W000 1.0000 N\n" missing = "ATOM 1 N PRO 1 000001 02.000 3.0000 1.0000 N\n" # Malformed parser = PDBParser(PERMISSIVE=True, is_pqr=True) # default initialization with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", PDBConstructionWarning) structure = parser.get_structure("test", StringIO(malformed)) atom = next(structure.get_atoms()) self.assertEqual(atom.get_charge(), None) # Missing with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", PDBConstructionWarning) structure = parser.get_structure("test", StringIO(missing)) atom = next(structure.get_atoms()) self.assertEqual(atom.get_charge(), None) # Test PERMISSIVE mode behaviour parser = PDBParser(PERMISSIVE=False, is_pqr=True) # default initialization self.assertRaises( PDBConstructionException, parser.get_structure, "example", StringIO(malformed), )
def score(query_pdb_path, against_pdb_path, query_fp_path=None, against_fp_path=None, query_epitope=[], against_epitope=[], spin_image_height_step=5, spin_image_radius_step=2, sphere_radius_step=2, cutoff=20.0, spin_image_radius_range=(0, 20), spin_image_height_range=(-30, 10), sphere_radius_range=(0, 20), callback=write_score_to_file, cbargs=[]): p = PDBParser(PERMISSIVE=1) query_struct = p.get_structure(os.path.basename(query_pdb_path), query_pdb_path) against_struct = p.get_structure(os.path.basename(against_pdb_path), against_pdb_path) query_complex = Complex(query_struct, query_epitope) against_complex = Complex(against_struct, against_epitope) if query_fp_path is None or against_fp_path is None: #if fp is not given query_complex.get_fp(spin_image_radius_step=spin_image_radius_step, spin_image_height_step=spin_image_height_step, sphere_radius_step=sphere_radius_step) against_complex.get_fp(spin_image_radius_step=spin_image_radius_step, spin_image_height_step=spin_image_height_step, sphere_radius_step=sphere_radius_step) query_fp_string = query_complex.fp2str() against_fp_string = against_complex.fp2str() else: #if fp is given, read them with open(query_fp_path, 'r') as f1, open(against_fp_path, 'r') as f2: query_fp_string = f1.read() against_fp_string = f2.read() query = FPWithComplex(query_complex, query_fp_string) against = FPWithComplex(against_complex, against_fp_string) score1, score2, score3 = similarity_between(query, against, cutoff=cutoff) #z1, z2, z3 = similarity_between (query, query, cutoff = cutoff) #the normalization constant #print score1, score2, score3 if callback is not None: callback((score1, score2, score3), *cbargs) return score1, score2, score3
def save_to_csv(input_path, output_path): parser = PDBParser(PERMISSIVE=True) pdb_files = glob(input_path + '*.pdb') str_id = [] for filename in pdb_files: base = os.path.basename(filename) structure_id = os.path.splitext(base)[0] parser.get_structure(structure_id, filename) str_id.append(structure_id) data_total = data_processing(input_path) data_total[0].to_csv(output_path + 'coordinate_' + structure_id + '.csv', index=False) data_total[1].to_csv(output_path + 'missing_seq_' + structure_id + '.csv', index=False)
def getPdbSequance(pdb_file, chain_id): pdb_indexes = [] pdb_sequance = [] p = PDBParser(PERMISSIVE=1, QUIET=True) s = p.get_structure("", pdb_file) pdb_id = pdb_file[0:-4] if not s[0].has_id(chain_id): print("PDB " + pdb_id + " doesn't have chain with id " + chain_id) print() exit() chain = s[0][chain_id] ires = 0 for res in chain: is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id( 'C') and (res.get_resname() == 'GLY' or res.has_id('CB')) res_id = res.get_id()[0] if (res_id == ' ' or res_id == 'H_MSE' or res_id == 'H_M3L' or res_id == 'H_CAS') and is_regular_res: ires = ires + 1 res_name = res.get_resname() residue_no = res.get_id()[1] pdb_sequance.append(res_name) pdb_indexes.append(residue_no) elif res_id != 'W': print("Unknown residue in " + pdb_id + " with res_id " + res_id) pdb_seq = three2one(pdb_sequance) return pdb_seq, pdb_indexes
def get_CA_coordinates(filename, my_set): """ Given a pdb file, it creates a dictionary with the CA (alpha-carbon) coordinates of those residues that are in the surface (set). """ p = PDBParser(PERMISSIVE=1) s = p.get_structure("code.pdb", filename) model = s[0] CA_coordinates = {} sys.stderr.write("Calculating CA coordinates of residues...\n") for chain in model: for residue in chain: residue_name = str( residue.get_full_id()[3][1]) + residue.get_full_id()[2] if residue.get_id()[0] == " " and residue_name in my_set: residue_number = str(residue.get_resname()) + str( residue.get_id()[1]) for atom in residue: if atom.get_name() == "CA": # get CA coordinates, will be the values CA = atom.get_coord() CA_coordinates[residue_number] = tuple(CA) return CA_coordinates
def getrange(pdbfile): p = PDBParser(PERMISSIVE=1) s = p.get_structure("", pdbfile) chnames = [] pf, pt = 0, 1000000 for c in s[0]: # iterate over chains cl = list(c) # list of all residues in a chain #if len(cl)<25: continue # skip short chains chnames.append(c.get_id()) c = cl pos = 0 assert (c[0].get_full_id()[3][0] ) == ' ' # first residue is not an HET atom while c[pos].get_full_id()[3][0] == ' ' and pos < len(c) - 1: pos += 1 pos -= 1 f, t = c[0].get_id()[1], c[pos].get_id()[1] #print f,t if f > pf: pf = f if t < pt: pt = t return pf, pt, ' '.join(chnames)
def get_best_res(target_dist, label=None): minimal_score = 1000000000000000000 best_part = None ava = 0 for subdir, dirs, files in os.walk("top500H"): for file in files: try: ava += 1 filepath = subdir + os.sep + file p = PDBParser(PERMISSIVE=1, QUIET=True) structure = p.get_structure('file', filepath) residues = [ residue for model in structure for chain in model for residue in chain ] atoms = [atom for residue in residues for atom in residue] print("Processing ", file, ava / 5, "%") for i in range(0, len(residues)): res = residues[i] if label == None or label == three_to_one( res.get_resname()): first, last = get_first_last_atoms(res, i) dist = get_distance(atoms, first, last) if getScore(dist, target_dist) < minimal_score: minimal_score = getScore(dist, target_dist) best_part = res except Exception: pass return best_part
def get_surface_residues(filename, my_acc_array, my_threshold): """ Given a pdb file, finds the residues exposed to the solvent (not buried) according to the ASA (accessible surface area) value given by DSSP module. The user can select a threshold of ASA. Default is 0.2. """ p = PDBParser(PERMISSIVE=1) s = p.get_structure("code.pdb", filename) model = s[0] d = DSSP(model, filename, dssp='mkdssp', acc_array=my_acc_array) sys.stderr.write("\nHandled %i residues\n" % len(d)) residue_number = set() for element in sorted(d): if type(element[3]) is not str: #Sometimes the element[3] is NA if element[3] >= my_threshold: # foreach aa in the surface (according to threshold) store residue_number try: residue_number.add( str(list(d.keys())[element[0] - 1][1][1]) + list(d.keys())[element[0] - 1][0]) except IndexError: sys.stderr.write("Element " + str(d.keys()[0]) + " index out of range\n") return residue_number
def main(file, atom, CAd=15, CBd=12, mind=6): """Analyze the pdb using distance between atom and minimum distances.""" logging.info("Analyzing %s using %s", file, atom) dist = {"CA": CAd, "CB": CBd, "min": mind} base = os.path.basename(args.file) name_f = os.path.splitext(base)[0] parser = PDBParser(PERMISSIVE=1) logging.captureWarnings(True) structure = parser.get_structure("test", file) residues = filter_residues(structure) dist_matrix = calc_dist_matrix(residues, atom) title_dist = 'Distances of the file {}'.format(name_f) name_heatmap = plots.plot_heatmap(dist_matrix, name_f, title_dist, atom) logging.info("Heatmap %s created", name_heatmap) cont_matrix = contact_map(dist_matrix, atom, dist) title_bin = 'Distance contacts of the file {}'.format(name_f) name_bin = plots.plot_matrix_binary(cont_matrix, name_f, title_bin, atom) logging.info("Contact map %s created", name_bin) logging.captureWarnings(False) return(dist_matrix, cont_matrix)
def load_model(model_fname, model_fmt): """ Load a transformation model from the file of the specified format. If something goes wrong, the function returns None, otherwise the loaded transformation model is returned. """ result = None if model_fmt == 'pdb': parser = PDBParser(PERMISSIVE=True) struct = parser.get_structure('PROMPTPY', model_fname) if not pdb.is_transformation(struct): logger.error('specified PDB file is not a transformation') return None x = pdb.get_atom_coordinates(struct) w = pdb.get_atom_masses(pdb.extract_model(struct, 0), True) result = model.from_conf_coords(x, w) elif model_fmt == 'json': with open(model_fname) as f: result = model.Transformation.from_dict(json.load(f)) elif model_fmt == 'hdf5': result = model.read_hdf5(model_fname) else: logger.error('unknown model format %s', model_fmt) return result
def distance_alpha_c(centers, draw, unit_dir): """ compute the distance between center of mass and each alpha carbon in the corresponding unit :param centers: list of coordinates of the centers of mass for each unit :param draw: boolean, if True pymol is used to draw the respective geometric element :return: dictionary of the distaces between center of mass and alpha carbon """ unit_dir_list = os.listdir(unit_dir) unit_dir_list.sort(key=utils.natural_keys) distances = {} for unit in unit_dir_list: pdb_parser = PDBParser() structure = pdb_parser.get_structure(unit, unit_dir + unit) alpha_c = alpha_carbon(structure) center = centers[unit] distances[unit] = {} for i, ca in enumerate(alpha_c): distances[unit][ca.get_id() + str(i)] = distance( ca.get_coord(), center) # drawing distances if draw: utils.draw_distance_center_mass_alpha(unit, center, alpha_c) return distances
def compute_localQ_init(MAX_OFFSET=4, DISTANCE_CUTOFF=9.5): from pathlib import Path home = str(Path.home()) struct_id = '2xov' filename = os.path.join(home, "opt/pulling/2xov.pdb") p = PDBParser(PERMISSIVE=1) s = p.get_structure(struct_id, filename) chains = s[0].get_list() # import pdb file native_coords = [] for chain in chains: dis = [] all_res = [] for res in chain: is_regular_res = res.has_id('CA') and res.has_id('O') res_id = res.get_id()[0] if (res.get_resname() == 'GLY'): native_coords.append(res['CA'].get_coord()) elif (res_id == ' ' or res_id == 'H_MSE' or res_id == 'H_M3L' or res_id == 'H_CAS') and is_regular_res: native_coords.append(res['CB'].get_coord()) else: print('ERROR: irregular residue at %s!' % res) exit() native_contacts_table = compute_native_contacts(native_coords, MAX_OFFSET, DISTANCE_CUTOFF) return native_contacts_table
def deleteChain():# Delete a complete chain from a pdb and save the new structure in pdbname_free.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() seq='' nb_chain=input('How many chain do you want to delete : ') for i in range(nb_chain): rm_chain=raw_input('What chain you want to delete : ') for model in structure: for chain in model: if(chain.id==rm_chain): model.detach_child(chain.id) pept = raw_input('Do you want to get a pdb with the sequence in its name : ') if(pept == 'y'): ppb=PPBuilder() for pp in ppb.build_peptides(structure): seq = seq + pp.get_sequence() seq=seq.lower() seq=str(seq) w = PDBIO() w.set_structure(structure) w.save(seq+'_bound.pdb') else: w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_without'+rm_chain+'.pdb')
def load_chains(raw, pdb_id, pdb_type, known): parser = PDBParser() structure = parser.get_structure(pdb_id, raw) data = {'ordering': []} for model in structure: for chain in model: chain_id = chain.get_id() data[chain_id] = {'residues': [], 'sequence': []} for residue in chain: name = residue.resname.strip() if name in known: res_id = residue.get_id() id_data = [ structure.get_id(), pdb_type, model.get_id(), chain_id, res_id[1], residue.resname, res_id[2] ] id_data = [str(part).strip() for part in id_data] unit_id = '_'.join(id_data) data[chain_id]['residues'].append(unit_id) data[chain_id]['sequence'].append(known[name]) data['ordering'].append(unit_id) if not data[chain_id]['residues']: del data[chain_id] else: data[chain_id]['sequence'] = ''.join( data[chain_id]['sequence']) return data
def get_aa_residues(pdb, chain): """ pdb: Protein Data Bank file. chain: Chain of the PDB file. Get the amino acids from a protein. returns: List of Biopython PDB Residue objects representing the amino acids of the specified protein. """ parser = PDBParser() structure = parser.get_structure("prot", pdb) model = structure[0] chain = model[chain] # Get a list of all residues in the specified protein model. residue_list = list(chain.get_residues()) to_remove_list = [] for res in residue_list: # Store non-amino acid residues in PDB in another list. if res.get_id()[0] != " ": to_remove_list.append(res) # Remove non-amino acid residues from original list. for res in to_remove_list: residue_list.remove(res) return residue_list
def residue_depth(pdbName, ReaderAtomsInput, filename, UseInterfaceAtoms=False): parser = PDBParser(PERMISSIVE=1) structure = parser.get_structure(pdbName, filename) model = structure[0] BioAtoms = [] for chain in model: for residue in chain: for atom in residue: BioAtoms.append(atom) if UseInterfaceAtoms: BioAtoms = pdbReader_to_BioPyth(ReaderAtomsInput, BioAtoms) surface = get_surface(model) BioDepthDistances = [] for atom in BioAtoms: dist = min_dist(atom.get_coord(), surface) BioDepthDistances.append([atom, dist]) pdbReaderDistances = BioPyth_to_pdbReader(BioDepthDistances, ReaderAtomsInput) return pdbReaderDistances
def free_cys_tyr(pdb_utils): parser = PDBParser(PERMISSIVE=1, QUIET=1) _log.debug("procesing free cys/tyr") total = ExperimentalStructure.objects(residue_sets__name__ne = "free_tyr").count() for strdoc in tqdm(ExperimentalStructure.objects(residue_sets__name__ne = "free_tyr").no_cache().timeout(False), total=total): if not (strdoc.residue_set("free_cys") or strdoc.residue_set("free_tyr")): if not os.path.exists(pdb_utils.pdb_path(strdoc.name)): pdb_utils.update_pdb(strdoc.name) if not os.path.exists(pdb_utils.pdb_path(strdoc.name)): continue try: bp_pdb = list(parser.get_structure(strdoc.name, pdb_utils.pdb_path(strdoc.name) ))[0] except PDBConstructionException: continue except TypeError: continue free = {"CYS": [], "TYR": []} codes = {"CYS": "SG", "TYR": "OH"} for x in bp_pdb.get_residues(): if x.resname in codes: neighbor_atoms = set(list(bp_pdb.get_atoms())) - set(list(x)) if (codes[x.resname] in x) and ( not any(map(lambda atom: (x[codes[x.resname]] - atom) <= 3, neighbor_atoms))): free[x.resname].append(x.parent.id + "_" + str(x.id[1])) if free["CYS"]: rs = ResidueSet(name="free_cys", residues=free["CYS"]) strdoc.residue_sets.append(rs) if free["TYR"]: rs = ResidueSet(name="free_tyr", residues=free["TYR"]) strdoc.residue_sets.append(rs) if free["CYS"] or free["TYR"]: strdoc.save()
def getPDBSequence(pdb_name, pdb_path, chain): logging.info("getPDBSequence pdb " + pdb_name + " cadena " + chain) from Bio.PDB.PDBParser import PDBParser from Bio.PDB.Polypeptide import three_to_one from Bio.PDB.Polypeptide import is_aa residue_position = [] residue_name = list() try: parser = PDBParser(PERMISSIVE=1) structure = parser.get_structure(pdb_name, pdb_path) model = structure[0] chain = model[chain] for residue in chain: if is_aa(residue.get_resname(), standard=True): residue_name.append(three_to_one(residue.get_resname())) residue_position.append(residue.get_full_id()[3][1]) #else: #residue_name.append("X") #residue_position.append(residue.get_full_id()[3][1]) #raise Exception("Secuencia no valida, error en la posicion: " + str(residue.get_full_id()[3][1])) except Exception as inst: print inst logging.error( "Error no controlado intentando leer la sequencia del pdb " + pdb_name + " cadena " + chain + " path " + pdb_path) raise Exception("PDB Invalido pdb " + pdb_name + " cadena " + chain + " path " + pdb_path) return residue_position, residue_name '''
def parse_pdb_local(code): code = code.lower() path = '%s/%s/pdb%s.ent.gz' % (LOCAL_PDB_DIR, code[1:3], code) f = gzip.open(path, 'rb') p = PDBParser() structure = p.get_structure(code, f) return structure
def update_entry_data(self, code, pdb_path): pdb_model = PDB.objects.get(code=code) p = PDBParser(PERMISSIVE=True, QUIET=True) chains = list(p.get_structure(code, pdb_path)[0].get_chains()) for chain in tqdm(chains): self._process_chain_residues(pdb_model, chain) self._process_chain_atoms(pdb_model, chain)
def load_chains(raw, pdb_id, pdb_type, known): parser = PDBParser() structure = parser.get_structure(pdb_id, raw) data = {'ordering': []} for model in structure: for chain in model: chain_id = chain.get_id() data[chain_id] = {'residues': [], 'sequence': []} for residue in chain: name = residue.resname.strip() if name in known: res_id = residue.get_id() id_data = [structure.get_id(), pdb_type, model.get_id(), chain_id, res_id[1], residue.resname, res_id[2]] id_data = [str(part).strip() for part in id_data] unit_id = '_'.join(id_data) data[chain_id]['residues'].append(unit_id) data[chain_id]['sequence'].append(known[name]) data['ordering'].append(unit_id) if not data[chain_id]['residues']: del data[chain_id] else: data[chain_id]['sequence'] = ''.join(data[chain_id]['sequence']) return data
def getPdbSequance(pdb_file, chain_id): pdb_indexes = [] pdb_sequance = [] p = PDBParser(PERMISSIVE=1) s = p.get_structure("", pdb_file) pdb_id = pdb_file[0:-4] if not s[0].has_id(chain_id): print "PDB "+pdb_id+" doesn't have chain with id "+chain_id print exit() chain = s[0][chain_id] ires = 0 for res in chain: is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id('C') and (res.get_resname()=='GLY' or res.has_id('CB')) res_id = res.get_id()[0] if (res_id ==' ' or res_id =='H_MSE' or res_id =='H_M3L' or res_id =='H_CAS') and is_regular_res: ires = ires + 1 res_name = res.get_resname() residue_no = res.get_id()[1] pdb_sequance.append(res_name) pdb_indexes.append(residue_no) elif res_id !='W': print "Unknown residue in "+pdb_id+" with res_id "+res_id pdb_seq = three2one(pdb_sequance) return pdb_seq, pdb_indexes
def get_biopython_structure(path, model_id=None): structure = None path = path.strip() parser = PDBParser() if not model_id: model_id = os.path.basename(path) if os.path.basename(path).split('.')[-1] == "pdb": structure = parser.get_structure(model_id, path) elif os.path.basename(path).split('.')[-1] == 'gz': GZ = gzip.open(path, 'rb') structure = parser.get_structure(model_id, GZ) GZ.close() else: sys.exit("Unknown extension to read PDB: " + path) return structure
def read_pdbs(directory, verbose=False): """Reads the input directory and generates pdb models""" if verbose: print("Reading pdb input files from %s" % directory) parser = PDBParser(PERMISSIVE=1, QUIET=True) if os.path.isdir(directory) and directory.endswith("/"): try: pdbmodels = [ parser.get_structure("Model_pair", directory + f)[0] for f in listdir(directory) if f.endswith(".pdb") ] # Generates pdb objects for files that end with .pdb except: sys.stderr.write( "PDB files couldn't be opened. Please, revise that their format is correct." ) sys.exit(1) else: sys.stderr.write( "Directory %s doesn't exists, please select a valid directory." % directory) sys.exit(1) if not bool(pdbmodels): # If no pdb instance is generated sys.stderr.write( "No pdb files where read. Please make sure the given directory contains pdb files. " ) sys.exit(1) for model in pdbmodels: if len(model.child_list) != 2: sys.stderr.write( "A pdb input file doesn't contains two chains. Please, all input pdbs must only contain " "two chains.") sys.exit(1) if verbose: print("Pdb objects stored") return pdbmodels
def get_template_stech_dict(template, seq_dict, verbose=False): """Generates a stechometry dictionary for a given pdb template""" template_stech_dict = { } # Format: { "A": 2, "B": 3, ...}, where key is chain id and value # is the number of repetitions parser = PDBParser(PERMISSIVE=1, QUIET=True) template_object = parser.get_structure( "template", template)[0] # Generates pdb template object for chain in template_object: chain = CustomChain( chain) # Transforms pdb chain object to CustomChain instance chain.parent = None # Removes previous parent to evade biopython errors of id repetitions chain_seq = chain.get_sequence() if chain_seq in seq_dict: chain.id = seq_dict[ chain_seq] # Updates the template chain id to the corresponding by its sequence template_stech_dict.setdefault(chain.id, 0) template_stech_dict[chain.id] += 1 # Adds to chain id counter if verbose: # Transforms the stech_dict to a string to be printed stechometry_string = "" for key in sorted(template_stech_dict.keys()): stechometry_string += key + ":" + str( template_stech_dict[key]) + "," stechometry_string = stechometry_string[:-1] print("Template's Stoichiometry is: " + stechometry_string) return template_stech_dict
def Init(): ptask = open("task.input","r") para = {} jobs = [] for line in ptask.readlines(): if(line[0]=='/' or line[0]=='\n'): continue [a,b] = line.split("=") if a=='angle': jobs.append([float(x) for x in b.strip().split(',')]) else: para[a]=b.strip() ptask.close() filename = para['protein_file'] protein_name = filename.strip().split('.')[0] file_type = filename.strip().split('.')[1] if file_type == 'cif': mt = MMCIF2Dict(filename) xlist = [float(x) for x in mt['_atom_site.Cartn_x']] ylist = [float(x) for x in mt['_atom_site.Cartn_y']] zlist = [float(x) for x in mt['_atom_site.Cartn_z']] allarr = numpy.vstack((xlist,ylist,zlist)).T elif file_type == 'pdb': parser = PDBParser() structure = parser.get_structure("test", filename) atoms = structure.get_atoms() alllist = [] xlist = [] ylist = [] zlist = [] for atom in atoms: xlist.append(atom.get_coord()[0]) ylist.append(atom.get_coord()[1]) zlist.append(atom.get_coord()[2]) alllist.append(atom.get_coord()) allarr = numpy.array(alllist) if para['CENTER'] == 'ON': x_ave = allarr.mean(axis=0)[0] y_ave = allarr.mean(axis=0)[1] z_ave = allarr.mean(axis=0)[2] allarr[:,0] = allarr[:,0]-x_ave; allarr[:,1] = allarr[:,1]-y_ave; allarr[:,2] = allarr[:,2]-z_ave scr_size = int(para['scr_size']) pix_size = float(para['pix_size']) distance = float(para['distance']) wavenum = 1.0/float(para['lambda']) ssc = scr_size/2.0-0.5 s = numpy.zeros((scr_size,scr_size,3)) for i in range(scr_size): for j in range(scr_size): x = (i-ssc)*pix_size y = (j-ssc)*pix_size z = distance sr = numpy.sqrt(x*x+y*y+z*z) s[i,j,:] = numpy.array([x*wavenum/sr,y*wavenum/sr,z*wavenum/sr-wavenum]) return s,allarr
def get_pdb_sequence(input_pdb_file, chain_id, mapping_output=False, with_gaps=False): """Gets the PDB sequence in a dictionary""" mapping = {} pdb_parser = PDBParser(PERMISSIVE=True, QUIET=True) structure = pdb_parser.get_structure(input_pdb_file, input_pdb_file) model = structure[0] chain = model[chain_id] residues = list(chain) for res in residues: # Remove alternative location residues if "CA" in res.child_dict and is_aa(res) and res.id[2] == ' ': try: mapping[res.id[1]] = three_to_one(res.get_resname()) except KeyError: # Ignore non standard residues such as HIC, MSE, etc. pass if with_gaps: # Add missing gap residues by their residue number res_numbers = sorted(mapping.keys()) start, end = res_numbers[0], res_numbers[-1] missing = sorted(set(range(start, end + 1)).difference(res_numbers)) for m in missing: mapping[m] = '-' if mapping_output: return mapping else: return ''.join([mapping[k] for k in sorted(mapping.keys())])
def collect_1(self, checkboard = []): def getChains(s): ret = s.split('___')[:2] assert(len(ret) == 2) return ret parser = PDBParser() io = PDBIO() for f in self.files: if not checkboard: break if f not in checkboard: continue try: os.mkdir(os.path.join( self.outpath, f)) except OSError as e: if e.errno != errno.EEXIST: raise structure = parser.get_structure(f, os.path.join(self.inpath, f)) chain_A, chain_B = getChains(f) io.set_structure(structure[0]['A']) io.save(os.path.join(self.outpath,f,chain_A + '.pdb')) io.set_structure(structure[0]['B']) io.save(os.path.join(self.outpath,f,chain_B + '.pdb')) #make this module can be reuse to other application self.then_do(f) #remove the finished file from checkboard checkboard.remove(f)
def scwrl(self, altseq): """ Repacks sidechains using SCWRL4 and returns a copy """ io = PDBIO() seqfname = "temp/%d.txt" % multidigit_rand(10) with open(seqfname, 'wb') as seqfile: structfile = "temp/%d.pdb" % multidigit_rand(10) seqfile.write(altseq) scwrlfile = structfile + ".scwrl" io.set_structure(self.structure) io.save(structfile) cmd = [ "scwrl", "-0", "-i", structfile, '-s', seqfname, '-o', scwrlfile ] print "\n%s" % ' '.join(cmd) sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE).communicate() p = PDBParser() with open(scwrlfile, 'rb') as fin: filterwarnings('ignore', category=PDBConstructionWarning) s = p.get_structure(self.id, scwrlfile) resetwarnings() s = PDBMapStructure(s, pdb2pose={}, refseq=self.refseq) os.remove(structfile) os.remove(scwrlfile) os.remove(seqfname) return s
def main(args): if not os.path.exists(args.out_folder): os.mkdir(args.out_folder) proteins = [] for file in os.listdir(args.in_folder): if file.endswith('.pdb'): proteins.append(file) if args.join == 'yes': result_joined = pd.DataFrame() for protein in proteins: ID = protein.replace('.pdb', '') parser = PDBParser() protein_path = args.in_folder + '/' + protein structure = parser.get_structure(ID, protein_path) result = combine(structure) if result is None: print('No ligands and/or water present in ', ID) continue if args.join == 'no': result_path = args.out_folder + '/' + ID + '.csv' result.to_csv(result_path) elif args.join == 'yes': result_joined = pd.concat([result_joined, result]) if args.join == 'yes': result_joined_path = args.out_folder + '/' + 'AALI_contacts.csv' result_joined.to_csv(result_joined_path)
def __pdb_ordering__(self, raw, pdb_id, pdb_type): """Generate a dict of the form: { unit_id: {index: index, pdb: pdb } for all nucleotides in the given structure. Nucleotides are identified by being in the list of known units in self.known. """ parser = PDBParser(QUIET=True) structure = parser.get_structure(pdb_id, raw) data = {} index = 0 for model in structure: model_id = model.get_id() + 1 for chain in model: chain_id = chain.get_id() for residue in chain: name = residue.resname.strip() if name in self.known: res_id = residue.get_id() id_data = [structure.get_id(), pdb_type, model_id, chain_id, res_id[1], name, res_id[2]] id_data = [str(part).strip() for part in id_data] unit_id = '_'.join(id_data) data[unit_id] = {'index': index, 'pdb': pdb_id} index += 1 return data
def old_residue_ids(raw, filename): parser = PDBParser() path, ext = os.path.splitext(filename) pdb_id = os.path.basename(path) structure = parser.get_structure(pdb_id, raw) data = [] pdb_type = 'AU' if ext != '.pdb': pdb_type = 'BA' + filename[-1] for model in structure: # BioPython seems to start number models at 0, but it should start # at 1. model_id = str(model.get_id() + 1) for chain in model: chain_id = chain.get_id() for residue in chain: res_id = residue.get_id() data.append({ 'pdb': pdb_id, 'type': pdb_type, 'model': model_id, 'chain': chain_id, 'number': str(res_id[1]), 'unit': residue.resname.strip(), 'insertion': res_id[2].rstrip() }) return data
def prepare_virtual_sites(pdb_file, use_cis_proline=False): parser = PDBParser(QUIET=True) structure=parser.get_structure('X',pdb_file,) for model in structure: for chain in model: r_im={} r_i={} for residue in chain: r_im=r_i r_i={} for atom in residue: r_i[atom.get_name()]=atom if use_cis_proline and residue.get_resname() == "IPR": if 'N' in r_i: r_i['N'].set_coord(-0.2094*r_im['CA'].get_coord()+ 0.6908*r_i['CA'].get_coord() + 0.5190*r_im['O'].get_coord()) if 'C' in r_im: r_im['C'].set_coord(0.2196*r_im['CA'].get_coord()+ 0.2300*r_i['CA'].get_coord() + 0.5507*r_im['O'].get_coord()) if 'H' in r_i: r_i['H'].set_coord(-0.9871*r_im['CA'].get_coord()+ 0.9326*r_i['CA'].get_coord() + 1.0604*r_im['O'].get_coord()) else: if 'N' in r_i: r_i['N'].set_coord(0.48318*r_im['CA'].get_coord()+ 0.70328*r_i['CA'].get_coord()- 0.18643 *r_im['O'].get_coord()) if 'C' in r_im: r_im['C'].set_coord(0.44365*r_im['CA'].get_coord()+ 0.23520*r_i['CA'].get_coord()+ 0.32115 *r_im['O'].get_coord()) if 'H' in r_i: r_i['H'].set_coord(0.84100*r_im['CA'].get_coord()+ 0.89296*r_i['CA'].get_coord()- 0.73389 *r_im['O'].get_coord()) io = PDBIO() io.set_structure(structure) io.save(pdb_file)
def get_normalized_pairs(n): '''Return a dictionary with keys corresponding to the pairs of residues found within a radius n, and the values to the number of times found in a set of pdb files.\ This dictionary sets the knowledge of pair-residues at a given frequency found naturally\ in nature. It is based in 1.110 sequences with known structure with <40% of homology in\ order to avoid family redundancy. Not necessary for the package.''' p = PDBParser(PERMISSIVE=1) pdb = glob.glob('./pdbfiles/*.ent') pairs = [] file_list = [] ###### Parsing through PDB files ####### for filename in pdb: s = p.get_structure('X', filename) atom_list = np.array([atom for atom in s.get_atoms() if atom.name == 'CB']) if len(atom_list)>2: #creates a list containing all atom pairs within a n radius ns = Bio.PDB.NeighborSearch(atom_list) neighbors = ns.search_all(n) file_list.append(filename) sys.stderr.write(filename+' processed.\n') #check-point else: sys.stderr.write(filename+' could not be processed.\n') #check-point pass pairs = [(x.get_parent().get_resname(),y.get_parent().get_resname()) for x,y in neighbors] outfile = open( 'normalized_pairs8.py', 'w' ) counter = dict(Counter(pairs)) sys.stderr.write(str(len(file_list))+' files processed.\n') #check-point sys.stderr.write('Dictionary length: '+str(len(counter))+'.\n') #check-point outfile.write('\nNormalized_pairs_'+str(n)+'='+str(counter)) outfile.close()
def score(PDBfile): """ Calculates the m-score for a given PDB file arguments: PDBfile - the PDB file to score hidden arguments: aas.scr, pro.scr, gly.scr - the scoring tables need to be present in working directory """ from pro_angle import find_residue from Bio.PDB.PDBParser import PDBParser from pro_length import length (aas, gly, pro) = load_scores() ##define global tables score = 0 #initialize pars = PDBParser(PERMISSIVE = 1) struct = pars.get_structure(PDBfile.rstrip('.pdb'), PDBfile) model = struct.child_list[0] chain = model.child_list[0] pro_list = find_residue(chain, 'PRO') gly_list = find_residue(chain, 'GLY') aas_list = range(chain.child_list[1].id[1], chain.child_list[len(chain)-1].id[1]) #need to remove pro/gly indices in first/last position if pro_list.count(1) > 0: pro_list.remove(1) if pro_list.count(len(chain)-1) > 0: pro_list.remove(len(chain)-1) if gly_list.count(1) > 0: gly_list.remove(1) if gly_list.count(len(chain)-1) > 0: gly_list.remove(len(chain)-1) try: for index in pro_list: aas_list.remove(index) #remove pros from aas_list for index in gly_list: aas_list.remove(index) #remove glys from aas_list except ValueError: print 'incosistency in PDB file - will return score = 0' return 0 else: proscore = score_help(chain, pro_list, pro) glyscore = score_help(chain, gly_list, gly) aasscore = score_help(chain, aas_list, aas) score = proscore+glyscore+aasscore size=length(chain) try: score = (score/size)*1000 #normalize score return score except ZeroDivisionError: print "calculated protein length 0 -> returning score 0" score = 0 return score
def score (query_pdb_path, against_pdb_path, query_fp_path = None, against_fp_path = None, query_epitope = [], against_epitope = [], spin_image_height_step = 5, spin_image_radius_step = 2, sphere_radius_step = 2, cutoff = 20.0, spin_image_radius_range = (0, 20), spin_image_height_range = (-30, 10), sphere_radius_range = (0, 20), callback = write_score_to_file, cbargs=[]): p = PDBParser(PERMISSIVE=1) query_struct = p.get_structure(os.path.basename (query_pdb_path), query_pdb_path) against_struct = p.get_structure(os.path.basename (against_pdb_path), against_pdb_path) query_complex = Complex (query_struct, query_epitope) against_complex = Complex (against_struct, against_epitope) if query_fp_path is None or against_fp_path is None:#if fp is not given query_complex.get_fp(spin_image_radius_step = spin_image_radius_step, spin_image_height_step = spin_image_height_step, sphere_radius_step = sphere_radius_step) against_complex.get_fp(spin_image_radius_step = spin_image_radius_step, spin_image_height_step = spin_image_height_step, sphere_radius_step = sphere_radius_step) query_fp_string = query_complex.fp2str () against_fp_string = against_complex.fp2str () else: #if fp is given, read them with open (query_fp_path, 'r') as f1, open(against_fp_path, 'r') as f2: query_fp_string = f1.read () against_fp_string = f2.read () query = FPWithComplex (query_complex, query_fp_string) against = FPWithComplex (against_complex, against_fp_string) score1, score2, score3 = similarity_between (query, against, cutoff = cutoff) #z1, z2, z3 = similarity_between (query, query, cutoff = cutoff) #the normalization constant #print score1, score2, score3 if callback is not None: callback ((score1, score2, score3), *cbargs) return score1, score2, score3
def get_structure(pdb_id): '''Returns a PDB structure.''' source_url = 'http://www.rcsb.org/pdb/files/' + pdb_id + '.pdb' target_filename = os.path.join(os.path.expanduser('~'), _DIR, _PDB_DIR, pdb_id + '.pdb') with open(io_utils.get_file(source_url, target_filename)) as pdb_file: parser = PDBParser(QUIET=True) return parser.get_structure(pdb_id, pdb_file.name)
def main(): if len(sys.argv) < 2: sys.exit("Usage: %s input_pdb_file" % sys.argv[0]) pdb_name = sys.argv[1] parser = PDBParser(PERMISSIVE=1) structure_id = "temp" structure = parser.get_structure(structure_id, pdb_name) model = structure[0] calculate_ss(model)
def parse(self, *pdb_filenames): """ REQUIRED. Adds the protein PDB files. You can specify as many as you want, but only two will be used for the superimposition. """ self.proteins = [] # reset proteins to an empty array parser = PDBParser(QUIET=True) for filename in pdb_filenames: # use file name as PDB id pdb_id = self.__get_pdb_id_from_filename(filename) # get PDB contents self.proteins.append(parser.get_structure(pdb_id, filename))
def removeDoubleAtoms():# Remove all double atoms defined in a pdb and save the new structure in pdbname_noDouble.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() structure.remove_disordered_atoms() w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noDouble.pdb')
def get_pdb_structure(pdb_file, pdb_id=None, quiet=True): """Set QUIET to False to output warnings like incomplete chains etc.""" if pdb_id is None: pdb_id = get_pdb_id(pdb_file) parser = PDBParser(get_header=True, QUIET=quiet) if pdb_file.endswith('.gz'): with gzip.open(pdb_file, 'rt') as ifh: structure = parser.get_structure(pdb_id, ifh) else: structure = parser.get_structure(pdb_id, pdb_file) # Rename empty chains (i.e. chain.id == ' ') model = structure[0] chain_ids = {chain.id for chain in model.child_list} for chain in model.child_list: if chain.id in [' ', 'Z']: chain_ids.remove(chain.id) chain.id = next(c for c in string.ascii_uppercase if c not in chain_ids) chain_ids.add(chain.id) model.child_dict = {chain.id: chain for chain in model.child_list} return structure
def Draw(self, parent, filename): p = PDBParser(PERMISSIVE=1) # structure_id = Rec[1] structure = p.get_structure("WHYY", filename) self.pdbMat = structure.get_list() rx = [] ry = [] rz = [] bx = [] by = [] bz = [] gx = [] gy = [] gz = [] for chain in self.pdbMat[0].get_list(): for resnum, residue in enumerate(chain.get_list()): atom = residue.get_list() if len(atom) > 3: if resnum > 1: bx[resnum - 2].append(npos[0]) by[resnum - 2].append(npos[1]) bz[resnum - 2].append(npos[2]) npos = atom[0].get_coord() capos = atom[1].get_coord() cpos = atom[2].get_coord() opos = atom[3].get_coord() rx.append([npos[0], capos[0]]) ry.append([npos[1], capos[1]]) rz.append([npos[2], capos[2]]) bx.append([capos[0], cpos[0]]) by.append([capos[1], cpos[1]]) bz.append([capos[2], cpos[2]]) gx.append([cpos[0], opos[0]]) gy.append([cpos[1], opos[1]]) gz.append([cpos[2], opos[2]]) for n, line in enumerate(rx): x = np.array(line) y = np.array(ry[n]) z = np.array(rz[n]) parent.ax2.plot(x, y, z, "r-", linewidth=5) for n, line in enumerate(bx): x = np.array(line) y = np.array(by[n]) z = np.array(bz[n]) parent.ax2.plot(x, y, z, "b-", linewidth=5) for n, line in enumerate(gx): x = np.array(line) y = np.array(gy[n]) z = np.array(gz[n]) parent.ax2.plot(x, y, z, "g-", linewidth=5)
def Pdb2Gro(pdb_file, gro_file, ch_name): from Bio.PDB.PDBParser import PDBParser p = PDBParser(PERMISSIVE=1) pdb_id = pdb_file if pdb_file[-4:].lower()!=".pdb": pdb_file = pdb_file + ".pdb" if pdb_id[-4:].lower()==".pdb": pdb_id = pdb_id[:-4] output = gro_file s = p.get_structure(pdb_id, pdb_file) chains = s[0].get_list() if ch_name=='': ch_name = 'A' for chain in chains: if chain.get_id()==ch_name: ires = 0 iatom = 0 res_name = "" atoms = [] for res in chain: is_regular_res = res.has_id('N') and res.has_id('CA') and res.has_id('C') res_id = res.get_id()[0] if (res_id ==' ' or res_id =='H_MSE' or res_id =='H_M3L' or res_id=='H_CAS') and is_regular_res: ires = ires + 1 res_name = res.get_resname() residue_no = res.get_id()[1] for atom in res: iatom = iatom + 1 atom_name = atom.get_name() xyz = atom.get_coord() # residue_no = atom.get_full_id()[3][1] atoms.append( Atom(iatom, atom_name, residue_no, res_name, xyz) ) out = open(output, 'w') out.write(" Structure-Based gro file\n") out.write( (" "+str(len(atoms)))[-12:] ) out.write("\n") for iatom in atoms: iatom.write_(out) out.close()
def get_ca(pdbfile): p=PDBParser(PERMISSIVE=1) ca_atoms = [] s = p.get_structure(pdbfile,pdbfile) chains = s[0].get_list() for chain in chains: for res in chain: is_regular_res = res.has_id('CA') and res.has_id('O') res_id = res.get_id()[0] if (res_id==' ' or res_id=='H_MSE' or res_id=='H_M3L' or res_id=='H_CAS' ) and is_regular_res: resname = res.get_resname(); ca_atoms.append(res['CA'].get_coord()) else : print "Pdb file contains irregular residue names or missing CA / O atoms! Fix it and run again! Exit with error." print "res_id :", res_id sys.exit() return ca_atoms
def __init__(self, filename): self.spheredata = '' E2C = {} E2R = {} exec elements # Read the color mappings at the bottom of this file # Read the file atoms = [] parser = PDBParser() structure = parser.get_structure('test',filename) for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): atoms += [atom] # Look up colors and radius spheres = [] for atom in atoms: s = Sphere() s.x, s.y, s.z = atom.get_coord() element = atom.get_name().strip(string.digits) s.radius = E2R[element] if E2R.has_key(element) else 1.5 color = E2C[element] if E2C.has_key(element) else 0xFF1493 s.r = (color & 0xff) / 255.0 s.g = ((color & 0xff00) >> 8) / 255.0 s.b = ((color & 0xff0000) >> 16) / 255.0 spheres += [s] self.spheredata += struct.pack('fff f ffff', s.x,s.y,s.z, s.radius, s.r,s.g,s.b,1.0) self.spheres = spheres # Figure out the total radius xs, ys, zs = [s.x for s in spheres], [s.y for s in spheres], [s.z for s in spheres] dx = max(xs) - min(xs) dy = max(ys) - min(ys) dz = max(zs) - min(zs) self.radius = np.sqrt(dx*dx + dy*dy + dz*dz) / 2 + 1.5 self.x = (max(xs) + min(xs)) / 2 self.y = (max(ys) + min(ys)) / 2 self.z = (max(zs) + min(zs)) / 2
def renameChain(): parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() what_chain=raw_input('What is the chain you want to rename : ') what_chain2=raw_input('What is the new name of this chain : ') for model in structure: for chain in model: if chain.id == what_chain: chain.id = what_chain2 w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_rename.pdb')
def removeHetero():# Remove all heteroatoms from a pdb and save the new structure in pdbname_noHetero.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() for model in structure: for chain in model: for residue in chain: id = residue.id if id[0] != ' ': chain.detach_child(residue.id) if len(chain) == 0: model.detach_child(chain.id) w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noHetero.pdb')
def deleteResidue():# Delete a residue from a pdb and save the new structure in pdbname_noResidue.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() rm_residue=raw_input('What residue you want to delete : ') for model in structure: for chain in model: for residue in chain: print residue.id if(residue.id[1]==rm_residue): print 'HELLO' chain.detach_child(residue.id) w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_noResidue.pdb')
def Draw(self, parent, filename): p = PDBParser(PERMISSIVE=1) structure = p.get_structure('WHYY', filename) self.pdbMat = structure.get_list() rx = [] ry = [] rz = [] bx = [] by = [] bz = [] gx = [] gy = [] gz = [] for chain in self.pdbMat[0].get_list(): for residue in chain.get_list(): for atom in residue.get_list(): if atom.get_id()[0][0] not in ["H","W"]: pos = atom.get_coord() if atom.get_name() == 'CA': bx.append(pos[0]) by.append(pos[1]) bz.append(pos[2]) elif atom.get_name() == 'N': rx.append(pos[0]) ry.append(pos[1]) rz.append(pos[2]) elif atom.get_name() == 'O': gx.append(pos[0]) gy.append(pos[1]) gz.append(pos[2]) x = np.array(bx) y = np.array(by) z = np.array(bz) parent.ax2.scatter(x, y, z, zdir='z', marker='o', s=385, c='b') #385 is the radius of carbon times 5 x = np.array(rx) y = np.array(ry) z = np.array(rz) parent.ax2.scatter(x, y, z, zdir='z', marker='o', s=350, c='r') #350 is the radius of Nitrogen times 5 x = np.array(gx) y = np.array(gy) z = np.array(gz) parent.ax2.scatter(x, y, z, zdir='z', marker='o', s=330, c='g') #330 is the radius of oxygen times 5
def getSequence(): # Get the sequence of a specific chain parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() seq='' what_chain=raw_input('For what chain do you want the sequence : ') for model in structure: for chain in model: if chain.id != what_chain: model.detach_child(chain.id) ppb=PPBuilder() for pp in ppb.build_peptides(structure): seq = seq + pp.get_sequence() seq=seq.upper() print seq
def Draw(self, parent, filename): p = PDBParser(PERMISSIVE=1) #structure_id = Rec[1] structure = p.get_structure('WHYY', filename) self.pdbMat = structure.get_list() x = [] y = [] z = [] for chain in self.pdbMat[0].get_list(): for residue in chain.get_list(): for atom in residue.get_list(): if atom.get_name() == 'CA': pos = atom.get_coord() x.append(pos[0]) y.append(pos[1]) z.append(pos[2]) x = np.array(x) y = np.array(y) z = np.array(z) parent.ax2.plot(x,y,z)
def GetExec(): Recs = os.listdir(os.getcwd()) newList = [] j = 0 listdata=dict() k = 0 p = PDBParser(PERMISSIVE=1) ftime = open('lastChecked.txt','r') pT = float(ftime.readline()) ftime.close() f = open('lastChecked.txt','w') f.write(str(time.time())) f.close() while k < len(Recs): try: (name, ext) = os.path.splitext(Recs[k]) if ext=='': 2+2 elif ext==".pdb": f = name + ".pickle" newList.append([Recs[k],os.getcwd()]) if not os.path.isfile(f) or float(fmt.filemtime(Recs[k])) > pT: with warnings.catch_warnings(): warnings.simplefilter("ignore") pdbRec = p.get_structure(name, Recs[k]) models = pdbRec.get_list() listdata[j] = str(name), len(models), os.getcwd()+'/'+str(name) + str(ext) rHoward = [str(name), len(models), str(name) + str(ext)] mP.spickle(f, rHoward) else: rHoward = mP.opickle(f) listdata[j] = str(rHoward[0]), rHoward[1], rHoward[2] j += 1 except IOError, e: print e k += 1
def assembleChain(): # Allow to assemble 2 chains together parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() what_chain=raw_input('What is the 1st chain you want to assemble : ') what_chain2=raw_input('What is the 2nd chain you want to assemble : ') for model in structure: for chain in model: if chain.id == what_chain: parent=chain; elif chain.id == what_chain2: for residue in chain: residue.get_parent().id=what_chain w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_assemble.pdb')
def parsePDBInformation(self, file): """ Parses a single pdb file and counts the residues in helices, sheets and the total length of the protein """ helices = [] helixSequences = [] f = open(self.dir + "/" + file) line = f.readline() while line: #If HELIX, check for type and add length and positions if line.startswith("HELIX"): start = int(line[21:25].replace(" " ,"")) end = int(line[33:37].replace(" ", "")) type = int(line[39:40]) chain = line[19:20].replace(" ", "") currentHelix = (start, end, chain) if type == 1: helices.append(currentHelix) line = f.readline() else: line = f.readline() f.close() # Parse the structure with a PDBParser object pdbParser = PDBParser() structure = pdbParser.get_structure("currentFile", self.dir+"/"+file) # For every helix tuple, extract the residues and store them in helixSequences for helix in helices: if helix[2] == "": residues = structure.get_residues() helixSequences.append(self.getResiduesFromList(residues, helix[0], helix[1])) chains = structure.get_chains() for chain in chains: if (chain.get_id() == helix[2]): helixSequences.append(self.getResiduesFromChain(chain, helix[0], helix[1])) return helixSequences
def parse_atoms_infile(filename): ''' Parse a PDB file and return atom list.\n parse_atoms_infile(filename):\n File needs to be a PDB file format (*.ent or *.pdb) ''' p = PDBParser(QUIET=True) s = p.get_structure("X", filename) atom_list = [atom for atom in s.get_atoms() if atom.name == 'CB'] return atom_list