Example #1
0
def add_com_to_pdb(mhc_com, vtcr_com, sample_structure):
    """
    Function to add pseudoatoms at MHC-CoM, TCR-CoM, and XYZ axise to the output PDB file
    """
    # mhc_com
    mhc_com_chain = "X"
    sample_structure.add(Chain.Chain(mhc_com_chain))
    res_id = (" ", 1, " ")
    new_residue = Residue.Residue(res_id, "MCM", " ")
    new_atom = Atom.Atom("C", mhc_com, 0, 0.0, " ", "C", 1, "C")
    new_residue.add(new_atom)
    sample_structure.child_dict[mhc_com_chain].add(new_residue)
    # tcr com
    tcr_com_chain = "Y"
    sample_structure.add(Chain.Chain(tcr_com_chain))
    res_id = (" ", 1, " ")
    new_residue = Residue.Residue(res_id, "TCM", " ")
    new_atom = Atom.Atom("C", vtcr_com, 0, 0.0, " ", "C", 1, "C")
    new_residue.add(new_atom)
    sample_structure.child_dict[tcr_com_chain].add(new_residue)
    # X,Y,Z atoms
    pos = [[50, 0, 0], [0, 50, 0], [0, 0, 50]]
    resn = ["X", "Y", "Z"]
    xyz_chain = "Z"
    sample_structure.add(Chain.Chain(xyz_chain))
    for i in [0, 1, 2]:
        res_id = (" ", i + 1, " ")
        new_residue = Residue.Residue(res_id, resn[i], " ")
        new_atom = Atom.Atom("O", pos[i], 0, 0.0, " ", "O", 1, "O")
        new_residue.add(new_atom)
        sample_structure.child_dict[xyz_chain].add(new_residue)
    return sample_structure
Example #2
0
 def get_het_id(hetname, chain: Chain):
     residues = chain.get_residues()
     for residue in residues:
         residue_id = residue.get_id()
         hetfield = residue_id[0]
         if hetfield != " " and hetfield != "W":
             if residue_id[0] == hetname:
                 return residue_id
     else:
         return None
Example #3
0
 def get_het_ids(chain: Chain):
     residues = chain.get_residues()
     residue_list = []
     for residue in residues:
         residue_id = residue.get_id()
         het_field = residue_id[0]
         het_name = residue.get_resname()
         if het_field != " " and het_field != "W":
             residue_list.append(het_name)
     return residue_list
Example #4
0
def get_structure_slice_by_residues(struct: Structure, domain_name: str,
                                    chain_order: int, start: int,
                                    finish: int) -> Structure:
    """
    Return new structure that contains new model (id=1), new chain (id=1) with residues
    from 'start' to 'finish' of specified chain of input structure
    :param struct: input structure to slice
    :param chain_order: order of chain to extract residues
    :param start: start residue
    :param finish: finish residues
    :param domain_name: new structure name
    :return: new structure
    """
    new_chain = Chain.Chain(1)
    chain = list(struct.get_chains())[chain_order]
    for i in range(start, finish + 1):
        new_chain.add(chain[i])

    model = Model.Model(1)
    model.add(new_chain)
    domain = Structure.Structure(domain_name)
    domain.add(model)
    return domain
def initialize_res(residue):
    '''Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0.'''

    if isinstance(residue, Geo):
        geo = residue
    else:
        geo = geometry(residue)

    segID = 1
    AA = geo.residue_name
    CA_N_length = geo.CA_N_length
    CA_C_length = geo.CA_C_length
    N_CA_C_angle = geo.N_CA_C_angle

    CA_coord = [0., 0., 0.]
    C_coord = [CA_C_length, 0, 0]
    N_coord = [
        CA_N_length * math.cos(N_CA_C_angle * (math.pi / 180.0)),
        CA_N_length * math.sin(N_CA_C_angle * (math.pi / 180.0)), 0
    ]

    N = Atom("N", N_coord, 0.0, 1.0, " ", " N", 0, "N")
    CA = Atom("CA", CA_coord, 0.0, 1.0, " ", " CA", 0, "C")
    C = Atom("C", C_coord, 0.0, 1.0, " ", " C", 0, "C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length = geo.C_O_length
    CA_C_O_angle = geo.CA_C_O_angle
    N_CA_C_O_diangle = geo.N_CA_C_O_diangle

    carbonyl = calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle,
                                    N_CA_C_O_diangle)
    O = Atom("O", carbonyl, 0.0, 1.0, " ", " O", 0, "O")

    if (AA == 'G'):
        res = makeGly(segID, N, CA, C, O, geo)
    elif (AA == 'A'):
        res = makeAla(segID, N, CA, C, O, geo)
    elif (AA == 'S'):
        res = makeSer(segID, N, CA, C, O, geo)
    elif (AA == 'C'):
        res = makeCys(segID, N, CA, C, O, geo)
    elif (AA == 'V'):
        res = makeVal(segID, N, CA, C, O, geo)
    elif (AA == 'I'):
        res = makeIle(segID, N, CA, C, O, geo)
    elif (AA == 'L'):
        res = makeLeu(segID, N, CA, C, O, geo)
    elif (AA == 'T'):
        res = makeThr(segID, N, CA, C, O, geo)
    elif (AA == 'R'):
        res = makeArg(segID, N, CA, C, O, geo)
    elif (AA == 'K'):
        res = makeLys(segID, N, CA, C, O, geo)
    elif (AA == 'D'):
        res = makeAsp(segID, N, CA, C, O, geo)
    elif (AA == 'E'):
        res = makeGlu(segID, N, CA, C, O, geo)
    elif (AA == 'N'):
        res = makeAsn(segID, N, CA, C, O, geo)
    elif (AA == 'Q'):
        res = makeGln(segID, N, CA, C, O, geo)
    elif (AA == 'M'):
        res = makeMet(segID, N, CA, C, O, geo)
    elif (AA == 'H'):
        res = makeHis(segID, N, CA, C, O, geo)
    elif (AA == 'P'):
        res = makePro(segID, N, CA, C, O, geo)
    elif (AA == 'F'):
        res = makePhe(segID, N, CA, C, O, geo)
    elif (AA == 'Y'):
        res = makeTyr(segID, N, CA, C, O, geo)
    elif (AA == 'W'):
        res = makeTrp(segID, N, CA, C, O, geo)
    else:
        res = makeGly(segID, N, CA, C, O, geo)

    cha = Chain('A')
    cha.add(res)

    mod = Model(0)
    mod.add(cha)

    struc = Structure('X')
    struc.add(mod)
    return struc
Example #6
0
def compare_interactions(interaction1, interaction2, similar_sequences):
    """
    This function takes two structures with two chains each one and a dictionary with chains as keys and keys as
    values relating them if they have more than a 95% of similarity and returns 1 if the two interactions are
    different and 0 if they are the same interaction.
    :param interaction1: one of the interactions you want to compare.
    :param interaction2: the other interaction you want to compare.
    :param similar_sequences: dictionary which relates sequences by similiarity.
    :return: returns true if they are the same and false if they ar enot.
    """

    homodimer = False  # This variable will be true if the chains in the interaction are more than a 95% similar

    chain_list1 = []
    chain_list2 = []

    for chain in interaction1:
        chain_id = similar_sequences[chain].get_id(
        )  # To identify similar chains in the superimposition we name them
        #  as the main chain of its type
        if chain_id in [x.get_id() for x in chain_list1]:
            homodimer = True  # if the second chain is similar to the first we change homodimer to true and
        chain_list1.append(Chain.Chain(chain_id))
        res_counter = 0
        for residue in chain:
            if 'CA' in [
                    x.get_id() for x in residue.get_atoms()
            ]:  # for every residue in chain that have an alpha carbon
                atom = residue['CA']  # storing the alpha carbon
                chain_list1[-1].add(
                    Residue.Residue(
                        ('', res_counter, ''), residue.get_resname(),
                        residue.get_segid()))  # adding the
                #  residue
                chain_list1[-1][('', res_counter, '')].add(
                    atom.copy())  # adding a copy of the atom to avoid
                #  modifiying the original ones
                res_counter += 1
            if 'P' in [
                    x.get_id() for x in residue.get_atoms()
            ]:  # for every residue in chain that have an alpha carbon
                atom = residue['P']  # storing the alpha carbon
                chain_list1[-1].add(
                    Residue.Residue(
                        ('', res_counter, ''), residue.get_resname(),
                        residue.get_segid()))  # adding the
                #  residue
                chain_list1[-1][('', res_counter, '')].add(
                    atom.copy())  # adding a copy of the atom to avoid
                #  modifiying the original ones
                res_counter += 1

    for chain in interaction2:  # Doing the same for the structure 2

        chain_id = similar_sequences[chain].get_id()

        chain_list2.append(Chain.Chain(chain_id))
        res_counter = 0
        for residue in chain:
            if 'CA' in [x.get_id() for x in residue.get_atoms()]:
                atom = residue['CA']
                chain_list2[-1].add(
                    Residue.Residue(
                        ('', res_counter, ''), residue.get_resname(),
                        residue.get_segid()))

                chain_list2[-1][('', res_counter, '')].add(atom.copy())
                res_counter += 1
            if 'P' in [x.get_id() for x in residue.get_atoms()]:
                atom = residue['P']
                chain_list2[-1].add(
                    Residue.Residue(
                        ('', res_counter, ''), residue.get_resname(),
                        residue.get_segid()))

                chain_list2[-1][('', res_counter, '')].add(atom.copy())
                res_counter += 1

    if homodimer:  # if the chain is an homodimer we remove different residues from chains in the same interaction
        for int in [chain_list1, chain_list2]:
            trim_to_superimpose(int[0], int[1])

    for chain1 in chain_list1:  # Removing different residues betwen similar chains in different interactions
        for chain2 in chain_list2:
            if chain1.get_id() != chain2.get_id():
                continue
            trim_to_superimpose(chain1, chain2)

    result = str_comparison_superimpose(chain_list1, chain_list2)

    return result
Example #7
0
                if invalid:
                    continue

                num_items = {"O": 0, "T": 0}
                for hit in item:
                    num_items[hit[5]] += 1

                for hit in item:

                    if len(hit[1]) > 1:

                        structure = PDBParser(QUIET=False).get_structure(
                            "dimer", path_structure + "dimer.pdb")
                        new_model = Model(2)
                        new_chain = Chain("C")
                        new_model.add(new_chain)
                        new_chain_mark = Chain("M")
                        add_markers(new_chain_mark)
                        new_model.add(new_chain_mark)
                        structure.add(new_model)

                        # mutations = {}     [sorted IDs] : [cobalt], [residues], num_HIS, clash_info_list, num, type, coords

                        res_co = Residue((" ", runtime_mark_id, " "), "Co3",
                                         " ")
                        res_co.add(hit[0])
                        new_chain.add(res_co)  # Add cobalt in its own residue

                        for res in hit[1]:  # Add rotamers
                            if res not in new_chain:
Example #8
0
def initialize_res(residue):
    '''Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0.'''
    
    if isinstance( residue, Geo ):
        geo = residue
    else:
        geo=geometry(residue) 
    
    segID=1
    AA= geo.residue_name
    CA_N_length=geo.CA_N_length
    CA_C_length=geo.CA_C_length
    N_CA_C_angle=geo.N_CA_C_angle
    
    CA_coord= [0.,0.,0.]
    C_coord= [CA_C_length,0,0]
    N_coord = [CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0]

    N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N")
    CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C")
    C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length=geo.C_O_length
    CA_C_O_angle=geo.CA_C_O_angle
    N_CA_C_O_diangle=geo.N_CA_C_O_diangle
    
    carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle)
    O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O")

    if(AA=='G'):
        res=makeGly(segID, N, CA, C, O, geo)
    elif(AA=='A'):
        res=makeAla(segID, N, CA, C, O, geo)
    elif(AA=='S'):
        res=makeSer(segID, N, CA, C, O, geo)
    elif(AA=='C'):
        res=makeCys(segID, N, CA, C, O, geo)
    elif(AA=='V'):
        res=makeVal(segID, N, CA, C, O, geo)
    elif(AA=='I'):
        res=makeIle(segID, N, CA, C, O, geo)
    elif(AA=='L'):
        res=makeLeu(segID, N, CA, C, O, geo)
    elif(AA=='T'):
        res=makeThr(segID, N, CA, C, O, geo)
    elif(AA=='R'):
        res=makeArg(segID, N, CA, C, O, geo)
    elif(AA=='K'):
        res=makeLys(segID, N, CA, C, O, geo)
    elif(AA=='D'):
        res=makeAsp(segID, N, CA, C, O, geo)
    elif(AA=='E'):
        res=makeGlu(segID, N, CA, C, O, geo)
    elif(AA=='N'):
        res=makeAsn(segID, N, CA, C, O, geo)
    elif(AA=='Q'):
        res=makeGln(segID, N, CA, C, O, geo)
    elif(AA=='M'):
        res=makeMet(segID, N, CA, C, O, geo)
    elif(AA=='H'):
        res=makeHis(segID, N, CA, C, O, geo)
    elif(AA=='P'):
        res=makePro(segID, N, CA, C, O, geo)
    elif(AA=='F'):
        res=makePhe(segID, N, CA, C, O, geo)
    elif(AA=='Y'):
        res=makeTyr(segID, N, CA, C, O, geo)
    elif(AA=='W'):
        res=makeTrp(segID, N, CA, C, O, geo)
    else:
        res=makeGly(segID, N, CA, C, O, geo)

    cha= Chain('A')
    cha.add(res)
    
    mod= Model(0)
    mod.add(cha)

    struc= Structure('X')
    struc.add(mod)
    return struc
def add_dummies_to_pdb(pdb_structure, chain_a_res_n, chain_b_res_n, target_len, max_angle):
    """
    Takes a PDB structure with two associated chains and one dummy chain
    Adds another dummy chain identical to the first one
    "Draws" a line from chain_a_res_n to chain_b_res_n (basis line)
    "Draws" a cone with _max_angle_ aperture and _target_len_ height
    Sample two vectors of from the cone
    Moves dummy residues so that their CA atoms locate on sampled positions
    :param pdb_structure: Bio.PDB.Structure.Structure
    :param chain_a_res_n: int, residue number on the first chain
    :param chain_b_res_n: int, residue number on the second chain
    :param target_len: distance from the CA atoms to dummy CA positions
    :param max_angle: aperture of the cone
    :return: side-effects
    """
    logger = logging.getLogger()
    if len([c for c in pdb_structure.get_chains()]) != 3:
        logger.error("Wrong PDB structures passed, expected exactly 3 chains.")
        sys.exit(1)

    chain_a, chain_b, chain_c = pdb_structure.get_chains()

    def get_ca_coord(chain_name, chain, res_n):
        chain_res_list = [r for r in chain.get_residues()]
        if len(chain_res_list) < res_n or res_n <= 0:
            logger.error("Wrong number of residue from chain {}: {}".format(chain_name, res_n))
            sys.exit(1)

        res = chain_res_list[res_n - 1]
        if "CA" not in res.child_dict:
            logger.error("No CA atom in the residue {} in chain {}.".format(res_n, chain_name))
            sys.exit(1)
        ca = res.child_dict["CA"]

        return ca.get_coord()

    chain_a_ca_coord = get_ca_coord("A", chain_a, chain_a_res_n)
    chain_b_ca_coord = get_ca_coord("B", chain_b, chain_b_res_n)
    diff_vector = chain_a_ca_coord - chain_b_ca_coord
    diff_vector_target_len = diff_vector * (target_len / np.linalg.norm(diff_vector))

    chain_a_dummy_basic = chain_a_ca_coord + diff_vector_target_len
    chain_b_dummy_basic = chain_b_ca_coord - diff_vector_target_len

    c_dummy_perpendicular_vector = None
    while c_dummy_perpendicular_vector is None:
        c_dummy_perpendicular_vector = sample_perpendicular_vector(diff_vector_target_len, max_angle)
    c_dummy_ca_coord = chain_a_dummy_basic + c_dummy_perpendicular_vector

    d_dummy_perpendicular_vector = None
    while d_dummy_perpendicular_vector is None:
        d_dummy_perpendicular_vector = sample_perpendicular_vector(diff_vector_target_len, max_angle)
    d_dummy_ca_coord = chain_b_dummy_basic + d_dummy_perpendicular_vector

    chain_d = Chain.Chain("D")
    if len(chain_c.child_list) != 1:
        logger.error("Wrong number of children in chain_c: {}".format(len(chain_c.child_list)))
        sys.exit(1)
    chain_c_child = chain_c.child_list[0]
    chain_d.add(deepcopy(chain_c_child))
    chain_d_child = chain_d.child_list[0]
    model = pdb_structure.child_dict[0]
    model.add(chain_d)
    chain_c.transform(np.eye(3), c_dummy_ca_coord - chain_c_child.child_dict["CA"].get_coord())
    chain_d.transform(np.eye(3), d_dummy_ca_coord - chain_d_child.child_dict["CA"].get_coord())
Example #10
0
def compare_interactions(interaction1, interaction2):
    structure1 = Structure.Structure('1')
    structure2 = Structure.Structure('2')

    structure1.add(Model.Model(0))
    structure2.add(Model.Model(0))

    homodimer = False

    for chain in interaction1:
        if len(list(structure1[0].get_chains())) == 1 and compare_chains(
                chain,
                list(structure1[0].get_chains())[0]):
            homodimer = True

        structure1[0].add(Chain.Chain(chain.get_id()))
        res_counter = 0
        for residue in chain:
            if 'CA' in [x.get_id() for x in residue.get_atoms()]:
                atom = residue['CA']
                structure1[0][chain.get_id()].add(
                    Residue.Residue(
                        ('', res_counter, ''), residue.get_resname(),
                        residue.get_segid()))

                structure1[0][chain.get_id()][('', res_counter,
                                               '')].add(atom.copy())
                res_counter += 1

    for chain in interaction2:

        structure2[0].add(Chain.Chain(chain.get_id()))
        res_counter = 0
        for residue in chain:
            if 'CA' in [x.get_id() for x in residue.get_atoms()]:
                atom = residue['CA']
                structure2[0][chain.get_id()].add(
                    Residue.Residue(
                        ('', res_counter, ''), residue.get_resname(),
                        residue.get_segid()))

                structure2[0][chain.get_id()][('', res_counter,
                                               '')].add(atom.copy())
                res_counter += 1

    if homodimer:
        for int in [structure1[0], structure2[0]]:
            trim_to_superimpose(
                list(int.get_chains())[0],
                list(int.get_chains())[1])

    for chain1 in structure1[0]:
        for chain2 in structure2[0]:
            if chain1.get_id() != chain2.get_id():
                continue
            trim_to_superimpose(chain1, chain2)

            # print(list(chain1.get_residues())[0])
            # print(list(chain2.get_residues())[0])

    # print(list(structure1.get_chains()))
    # print(list(structure2.get_chains()))
    result = str_comparison_superimpose(structure1, structure2)

    return result
Example #11
0
def assemble_multiscale_visualization(topology_fn, rmf_fn, pdb_dir,
                                      outprefix=None, chimerax=True,
                                      xl_fn=None):
    """
    Render multiscale versions of rigid bodies from PDB files + flexible
    beads from RMF files w/o mapped crosslinks.
    
    Args: 
    topology_fn (str): Topolgy file in pipe-separated-value (PSV) format
    as required in integrative modeling using IMP. For details on how
    to write a topology file, see:
    https://integrativemodeling.org/2.13.0/doc/ref/classIMP_1_1pmi_1_1topology_1_1TopologyReader.html
        
    rmf_fn (str): Name of the RMF file.
    
    pdb_dir (str): Directory containing all the PDB files for the rigid
    bodies used in modeling.
    
    outprefix (str, optional): Prefix for output files. Defaults to None.
    
    chimerax (bool, optional): If true, a Chimerax script will be written (extension ".cxc"). Defaults to True.
    
    xl_fn (str, optional): A file containing a XL dataset. Defaults to None.
    If this dataset is supplied, then it will be mapped on to the overall 
    structure with satisfied XLs drawn in blue and violated XLs drawn in red.
    A XL dataset should be supplied in a comma-separated-value (CSV) format
    containing at least the following fields
    
    protein1, residue1, protein2, residue2, sat
    
    where the last field <sat> is a boolean 1 or 0 depending on whether
    the particular XL is satisfied (in the ensemble sense) as a result of the
    integrative modeling exercise.
    """
    
    # -------------------------------------------
    # read the RMF file and extract all particles
    # -------------------------------------------
    of = RMF.open_rmf_file_read_only(rmf_fn)
    rmf_model = IMP.Model()
    hier = IMP.rmf.create_hierarchies(of, rmf_model)[0]
    IMP.rmf.load_frame(of, 0)
    particles = IMP.core.get_leaves(hier)
    rmf_ps = {}
    for p in particles:
        molname = p.get_parent().get_parent().get_parent().get_name().strip()
        name = p.get_name().strip()
        coord = IMP.core.XYZ(p).get_coordinates()
        rmf_ps[(molname, name)] = coord
        
    # --------------------------------------------------------------
    # map pdb residues to rmf particles for each rigid body pdb file
    # --------------------------------------------------------------
    # read the topology file
    t = TopologyReader(topology_fn, pdb_dir=pdb_dir)
    components = t.get_components()

    map_pdb2rmf = {}
    rigid_body_models = {}
    rigid_body_residues = {}
    chain_ids = {} # these are matched to the chimerax rmf plugin
    chain_id_count = 0
    for c in components:
        # ignore unstructured residues
        if c.pdb_file == "BEADS": continue
        mol = c.molname
        pdb_prefix = os.path.basename(c.pdb_file).split(".pdb")[0]
        chain_id = c.chain
        resrange = c.residue_range
        offset = c.pdb_offset
        
        r0 = resrange[0] + offset
        r1 = resrange[1] + 1 + offset
        
        if mol not in chain_ids:
            chain_ids[mol] = string.ascii_uppercase[chain_id_count]
            chain_id_count += 1
        
        if pdb_prefix not in map_pdb2rmf:
            map_pdb2rmf[pdb_prefix] = {}
            this_rigid_body_model = PDBParser().get_structure("x", c.pdb_file)[0]
            this_rigid_body_residues = {(r.full_id[2], r.id[1]): r for r in this_rigid_body_model.get_residues()}
            rigid_body_models[pdb_prefix] = this_rigid_body_model
            rigid_body_residues[pdb_prefix] = this_rigid_body_residues
            
        for r in range(r0, r1):
            key = (chain_id, r)
            val = (mol, r)
            if key in rigid_body_residues[pdb_prefix]:
                map_pdb2rmf[pdb_prefix][key] = val
    
    # --------------------------------
    # align all pdb files with the rmf
    # --------------------------------
    print("\nAligning all rigid body structures...")
    align = SVDSuperimposer()
    for pdb_prefix, mapper in map_pdb2rmf.items():
        pdb_coords = []
        pdb_atoms = []
        rmf_coords = []
        
        residues = rigid_body_residues[pdb_prefix]
        for (chain, pdb_res), (mol, rmf_res) in mapper.items():
            r = residues[(chain, pdb_res)]
            pdb_coords.append(r["CA"].coord)
            pdb_atoms.extend([a for a in r.get_atoms()])
            rmf_coords.append(rmf_ps[(mol, str(rmf_res))])        
                 
        pdb_coords = np.array(pdb_coords)
        rmf_coords = np.array(rmf_coords)
        align.set(rmf_coords, pdb_coords)
        align.run()
        rotmat, vec = align.get_rotran()
        [a.transform(rotmat, vec) for a in pdb_atoms]
  
    # --------------------------
    # assemble the composite pdb
    # --------------------------
    mols = set(sorted([c.molname for c in components]))
    print("\nChain IDs by molecule:")
    for k, v in chain_ids.items():
        print("molecule %s, chain ID %s" % (k, v))
    
    reslists = {mol: [] for mol in mols}
    for pdb_prefix, mapper in map_pdb2rmf.items():
        residues = rigid_body_residues[pdb_prefix]
        for (chain, pdb_res), (mol, rmf_res) in mapper.items():
            r = residues[(chain, pdb_res)] ; resid = rmf_res
            new_id = (r.id[0], resid, r.id[2])
            new_resname = r.resname
            new_segid = r.segid
            new_atoms = r.get_atoms()
            new_residue = Residue.Residue(id=new_id, resname=new_resname, segid=new_segid)
            [new_residue.add(a) for a in new_atoms]
            reslists[mol].append(new_residue)
    
    composite_model = Model.Model(0)
    for mol, chain_id in chain_ids.items():
        this_residues = sorted(reslists[mol], key=lambda r: r.id[1])
        this_chain = Chain.Chain(chain_id)
        [this_chain.add(r) for r in this_residues]
        composite_model.add(this_chain)
    
    # save the composite pdb to file
    io = PDBIO()
    io.set_structure(composite_model)
    if outprefix is None:
        outprefix = "centroid_model"
    io.save(outprefix + ".pdb")

    # -------------------------------------------------------------------
    # chimerax rendering (hide most of the rmf except unstructured beads)
    # -------------------------------------------------------------------
    if not chimerax: exit()
    print("\nWriting UCSF Chimerax script...")
    s = ""
    s += "open %s\n" % (outprefix + ".pdb")
    s += "open %s\n" % rmf_fn
    s += "hide\n"
    s += "show cartoon\n"
    s += "color #%d %s\n" % (CHIMERAX_PDB_MODEL_NUM, STRUCT_COLOR)
    s += "color #%d %s\n" % (CHIMERAX_RMF_MODEL_NUM, UNSTRUCT_COLOR)
    s += "hide #%d\n" % CHIMERAX_RMF_MODEL_NUM
    
    struct_residues = []
    for key, val in map_pdb2rmf.items():
        struct_residues.extend(list(val.values()))
    
    unstruct_atomspec = {}
    for p in rmf_ps:
        molname, particle_name = p
        rmf_chain_id = chain_ids[molname]
        if "bead" in particle_name:
            r0, r1 = particle_name.split("_")[0].split("-")
            r0 = int(r0) ; r1 = int(r1)
            this_atomspec = "#%d/%s:%d-%d" % \
                            (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r0, r1)
            for r in range(r0, r1+1):
                unstruct_atomspec[(molname, r)] = this_atomspec
        else:
            if (molname, int(particle_name)) not in struct_residues:
                r = int(particle_name)
                this_atomspec = "#%d/%s:%d" % \
                (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r)
                unstruct_atomspec[(molname, r)] = this_atomspec
                
    s += "show %s\n" % (" ".join(set(unstruct_atomspec.values())))

    # ----------------------------------------------------------
    # if crosslink data is supplied, write out a pseudobond file
    # ----------------------------------------------------------
    if xl_fn is not None:
        # parse XL data
        df = pd.read_csv(os.path.abspath(xl_fn))
        xls = []
        for i in range(len(df)):
            this_df = df.iloc[i]
            p1 = this_df["protein1"] ; r1 = this_df["residue1"]
            p2 = this_df["protein2"] ; r2 = this_df["residue2"]
            sat = this_df["sat"]
            xls.append((p1, r1, p2, r2, sat))
        
        # get lists of struct atomspecs
        atomspec = {}
        for (mol, particle_name) in rmf_ps:
            if "bead" in particle_name: continue
            if (mol, int(particle_name)) in unstruct_atomspec: continue
            chain_id = chain_ids[mol]
            resid = int(particle_name)
            atomspec[(mol, resid)] = "#%d/%s:%d@CA" % \
                                     (CHIMERAX_PDB_MODEL_NUM, chain_id, resid)
        
        # now add in all the unstruct atomspecs
        atomspec.update(unstruct_atomspec)

        # write pseudobond script
        s_pb = ""
        s_pb += "; radius = %2.2f\n" % XL_RADIUS
        s_pb += "; dashes = 0\n"
        for xl in xls:
            p1, r1, p2, r2, sat = xl
            atomspec_1 = atomspec[(p1, r1)]
            atomspec_2 = atomspec[(p2, r2)]
            if atomspec_1 == atomspec_2:
                continue
            color = SAT_XL_COLOR if sat else VIOL_XL_COLOR
            s_pb += "%s %s %s\n" % (atomspec_1, atomspec_2, color)
        s_pb += "\n"
        pb_fn = outprefix + "_XLs.pb"
        with open(pb_fn, "w") as of:
            of.write(s_pb)        
        s += "open %s\n" % pb_fn
            
    s += "preset 'overall look' publication\n"
    chimerax_out_fn = outprefix + ".cxc"
    with open(chimerax_out_fn, "w") as of:
        of.write(s)
Example #12
0
def main():
    usage = './align_stems.py [stem_length]'
    usage += 'Do diagnostics on the stem model'
    parser = OptionParser()

    parser.add_option('-i',
                      '--iterations',
                      dest='iterations',
                      default=1,
                      help="The number of times to repeat the alignment",
                      type='int')
    parser.add_option('-l',
                      '--length',
                      dest='length',
                      default=2,
                      help="The length of the stem",
                      type='int')
    parser.add_option('-o',
                      '--output-pdb',
                      dest='output_pdb',
                      default=False,
                      help="Output the structures to pdb files",
                      action='store_true')
    parser.add_option(
        '-f',
        '--from',
        dest='from_file',
        default=None,
        help='Specify a file to align from. Invalidates the -l option.',
        type='str')
    parser.add_option(
        '-t',
        '--to',
        dest='to_file',
        default=None,
        help='Specify a file to align to. Invalidates the -l option.',
        type='str')
    parser.add_option(
        '-m',
        '--method',
        dest='method',
        default='e',
        help=
        'Specify which method to use for the helix fitting. e = estimate (original, least accurate method), a = align (better, more accurate method), t = template (best, most accurate method)'
    )
    parser.add_option(
        '-a',
        '--average-twist',
        dest='use_average_method',
        default=False,
        action='store_true',
        help='Use the average of the two twists to align the stems.')

    (options, args) = parser.parse_args()

    if len(args) < 0:
        parser.print_help()
        sys.exit(1)

    stem_length = options.length
    if len(args) == 1:
        stem_length = int(args[0])

    if options.from_file == None or options.to_file == None:
        sss = cbs.get_stem_stats(
            os.path.join(cbc.Configuration.base_dir,
                         'fess/stats/temp.1jj2.stats'))

    rmsds = []

    for i in range(options.iterations):
        if options.from_file != None:
            filename = options.from_file
            stem_def = stem_def_from_filename(filename)
        else:
            stem_def = random.choice(sss[stem_length])
            filename = '%s_%s.pdb' % (stem_def.pdb_name, "_".join(
                map(str, stem_def.define)))

        pdb_file = os.path.join(cbc.Configuration.stem_fragment_dir, filename)

        # Extract the PDB coordinates of the original chain
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            try:
                chain = list(bpdb.PDBParser().get_structure(
                    'temp', pdb_file).get_chains())[0]
                chain = cbm.extract_stem_from_chain(chain, stem_def)
            except IOError as ie:
                cud.pv('ie')

        # Convert the chain into a stem model
        # This is where the method for fitting a helix is applied
        #m = cbm.define_to_stem_model(chain, stem_def.define)
        stem = cbm.StemModel(name=stem_def.define)
        define = stem_def.define
        mids = cgg.get_mids(chain, define, options.method)

        stem.mids = tuple([m.get_array() for m in mids])
        stem.twists = cgg.get_twists(chain, define)
        m = stem

        # Create a new chain by aligning the stem from the sampled define
        # to the model created from the original stem
        new_chain = bpdbc.Chain(' ')
        try:
            if options.to_file != None:
                new_stem_def = stem_def_from_filename(options.to_file)
            else:
                new_stem_def = random.choice(sss[stem_def.bp_length])

            cbm.reconstruct_stem_core(new_stem_def, stem_def.define, new_chain,
                                      dict(), m, options.use_average_method)
        except IOError as ie:
            cud.pv('ie')

        if options.output_pdb:
            rtor.output_chain(chain, 'out1.pdb')
            rtor.output_chain(new_chain, 'out3.pdb')

        unsuperimposed_rmsd = cup.pdb_rmsd(chain,
                                           new_chain,
                                           sidechains=False,
                                           superimpose=False)
        superimposed_rmsd = cup.pdb_rmsd(chain,
                                         new_chain,
                                         sidechains=False,
                                         superimpose=True,
                                         apply_sup=True)
        rmsds += [[superimposed_rmsd[1], unsuperimposed_rmsd[1]]]

        #cud.pv('(superimposed_rmsd, unsuperimposed_rmsd)')

        if options.output_pdb:
            rtor.output_chain(new_chain, 'out2.pdb')
            pp = cvp.PymolPrinter()
            (p, n) = m.mids
            pp.add_stem_like_core(m.mids, m.twists, stem_def.bp_length + 1, '')
            pp.stem_atoms(m.mids, m.twists, stem_def.bp_length + 1)
            pp.dump_pymol_file('ss')

        print stem_length, superimposed_rmsd[1], unsuperimposed_rmsd[
            1], unsuperimposed_rmsd[1] / superimposed_rmsd[1]