コード例 #1
0
ファイル: sequence.py プロジェクト: NabinGiri/atom3
def pdb_to_fasta(pdb_filename, fasta_filename, id_filename, separate):
    """Write a pdb file as a fasta file."""
    flat_map = {}
    pdb_name = db.get_pdb_name(pdb_filename)
    structure = pd.read_pickle(pdb_filename)
    fasta_name_to_chain = {}
    for (chain, residues) in struct.get_chain_to_valid_residues(structure):
        fasta_name = pdb_name + '-' + chain[-2] + '-' + chain[-1]
        flat_map[fasta_name] = residues
        fasta_name_to_chain[fasta_name] = chain
    names = []
    filenames = []
    id_filenames = []
    if not separate:
        write_fasta(flat_map, fasta_filename, id_out=id_filename)
        filenames.append(fasta_filename)
        id_filenames.append(id_filename)
        names.append('all')
    else:
        for (name, seq) in flat_map.items():
            new_dict = {}
            new_dict[name] = seq
            filename = fasta_filename.format(name)
            filename2 = id_filename.format(name)
            write_fasta(new_dict, filename, id_out=filename2)
            names.append(fasta_name_to_chain[name])
            filenames.append(filename)
            id_filenames.append(filename2)
    return (names, filenames, id_filenames)
コード例 #2
0
ファイル: case.py プロジェクト: stephaniewankowicz/atom3
def _get_seq_and_atoms(filename):
    """Form dictionaries mapping from chain to their sequence and atoms."""
    seqs = {}
    all_atoms = {}
    structure = struct.parse_structure(filename)
    pdb_name = db.get_pdb_name(filename)
    for (chain, residues) in \
            struct.get_chain_to_valid_residues(structure, pdb_name):
        atoms = []
        for residue in residues:
            atoms.append(np.array(residue[['x', 'y', 'z']], dtype='f4'))
        if len(residues) != 0:
            # Ignore zero-length peptides.
            seqs[chain] = residues
            all_atoms[chain] = np.array(atoms)
    return all_atoms, seqs
コード例 #3
0
ファイル: case.py プロジェクト: stephaniewankowicz/atom3
def _generate_reference(pdb_filename, s2r_chain, s2r_res, output_filename,
                        style):
    """Transform PDB structure to a reference structure."""
    biopy_structure = db.parse_biopython_structure(pdb_filename)
    pdb_name = db.get_pdb_name(pdb_filename)

    new_model = Bio.PDB.Model.Model('0')
    new_structure = Bio.PDB.Structure.Structure('')
    for (chain, residues) in \
            struct.get_chain_to_valid_residues(biopy_structure, pdb_name):
        if style == 'dockground' and chain not in s2r_chain:
            # If we are in dockground, we allow ourselves to remove unmapped
            # chains.
            continue
        ref_chain = s2r_chain[chain]

        if chain in s2r_res:
            # If we have an alignment for this chain.
            new_chain = Bio.PDB.Chain.Chain(ref_chain)
            for i, residue in enumerate(residues):
                if residue.id[0] != ' ':
                    continue
                residue.segid = ""
                residue.id = (' ', s2r_res[chain][i], residue.id[2])
                new_chain.add(residue)
        else:
            # Else, just remove segment ID.
            new_chain = Bio.PDB.Chain.Chain(ref_chain)
            for i, residue in enumerate(residues):
                residue.segid = ""
        new_model.add(new_chain)

    new_structure.add(new_model)
    w = Bio.PDB.PDBIO()
    w.set_structure(new_structure)
    w.save(output_filename)