def pdb_to_fasta(pdb_filename, fasta_filename, id_filename, separate): """Write a pdb file as a fasta file.""" flat_map = {} pdb_name = db.get_pdb_name(pdb_filename) structure = pd.read_pickle(pdb_filename) fasta_name_to_chain = {} for (chain, residues) in struct.get_chain_to_valid_residues(structure): fasta_name = pdb_name + '-' + chain[-2] + '-' + chain[-1] flat_map[fasta_name] = residues fasta_name_to_chain[fasta_name] = chain names = [] filenames = [] id_filenames = [] if not separate: write_fasta(flat_map, fasta_filename, id_out=id_filename) filenames.append(fasta_filename) id_filenames.append(id_filename) names.append('all') else: for (name, seq) in flat_map.items(): new_dict = {} new_dict[name] = seq filename = fasta_filename.format(name) filename2 = id_filename.format(name) write_fasta(new_dict, filename, id_out=filename2) names.append(fasta_name_to_chain[name]) filenames.append(filename) id_filenames.append(filename2) return (names, filenames, id_filenames)
def _get_seq_and_atoms(filename): """Form dictionaries mapping from chain to their sequence and atoms.""" seqs = {} all_atoms = {} structure = struct.parse_structure(filename) pdb_name = db.get_pdb_name(filename) for (chain, residues) in \ struct.get_chain_to_valid_residues(structure, pdb_name): atoms = [] for residue in residues: atoms.append(np.array(residue[['x', 'y', 'z']], dtype='f4')) if len(residues) != 0: # Ignore zero-length peptides. seqs[chain] = residues all_atoms[chain] = np.array(atoms) return all_atoms, seqs
def _generate_reference(pdb_filename, s2r_chain, s2r_res, output_filename, style): """Transform PDB structure to a reference structure.""" biopy_structure = db.parse_biopython_structure(pdb_filename) pdb_name = db.get_pdb_name(pdb_filename) new_model = Bio.PDB.Model.Model('0') new_structure = Bio.PDB.Structure.Structure('') for (chain, residues) in \ struct.get_chain_to_valid_residues(biopy_structure, pdb_name): if style == 'dockground' and chain not in s2r_chain: # If we are in dockground, we allow ourselves to remove unmapped # chains. continue ref_chain = s2r_chain[chain] if chain in s2r_res: # If we have an alignment for this chain. new_chain = Bio.PDB.Chain.Chain(ref_chain) for i, residue in enumerate(residues): if residue.id[0] != ' ': continue residue.segid = "" residue.id = (' ', s2r_res[chain][i], residue.id[2]) new_chain.add(residue) else: # Else, just remove segment ID. new_chain = Bio.PDB.Chain.Chain(ref_chain) for i, residue in enumerate(residues): residue.segid = "" new_model.add(new_chain) new_structure.add(new_model) w = Bio.PDB.PDBIO() w.set_structure(new_structure) w.save(output_filename)