Exemple #1
0
 def __init__(self, hit: Hit):
     qpdb_path = get_SCOP_domain(hit.query)
     spdb_path = get_SCOP_domain(hit.sbjct)
     logger.info(f'Loading {qpdb_path} as a chimera object') 
     self.qPDB = Chimera(qpdb_path, validateElements=False)
     os.remove(qpdb_path)
     if self.qPDB.numFrames > 1:
         self.qPDB.dropFrames(keep=0)
         logger.info("Query protein contains more than one model. Keeping only the first one")
     logger.info(f'Loading {spdb_path} as a chimera object')
     self.sPDB = Chimera(spdb_path, validateElements=False)
     os.remove(spdb_path)
     if self.sPDB.numFrames > 1:
         self.sPDB.dropFrames(keep=0)
         logger.info("Subject protein contains more than one model. Keeping only the first one")
     self.qaPDB, self.saPDB = {}, {}
     self.qpairs,self.spairs = [], []
     self.dst = []
     self.chim_positions = {}
Exemple #2
0
def calc_dssp(chimera: Chimera = None, filename: str = None, simplified: bool = True):
    """
    Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments.
    This funcion uses the MDtraj compute_dssp implementation as a basis.
    :param chimera: A Chimera object.
    :param filename: path to a pdb file
    :param simplified: Use the simplified 3-category assignment scheme. Otherwise the original 8-category scheme is used.
    :return: assignments np.ndarray. The secondary structure assignment for each residue
    """
    if chimera and filename:
        raise ValueError("Only a Chimera object or the path to a pdb file must be specified")
    if not chimera and not filename:
        raise ValueError("At least a Chimera object or the path to a pdb file must be specified")
    if chimera:
        filename = "/tmp/structure.pdb"
        chimera.write(filename)
    structure = md.load(filename)
    dssp = md.compute_dssp(structure, simplified=simplified)
    return dssp
Exemple #3
0
    def _construct_chimera(self, qmol, smol, qstart, qend, sstart, send, combination):
        """
        :param qmol: Molecule. The query protein
        :param smol: Molecule. The subject protein in any of its positions
        :param qstart: int. Position to start the cut in the query
        :param qend: int. Position to end the cut in the query
        :param sstart: int. Position to start the cut in the sbjct
        :param send: int. Position to end the cut in the sbjct.
        :return: Molecule, DataFrame Objects.
        chim1: The resulting chimera
        mapping: The mapping from the old residue numbering to the new one
        """
        qmol_copy = qmol.copy()
        smol_copy = smol.copy()
        qmol_copy.filter(f"(protein and same residue as index '{qstart}' to '{qend}')\
         or (not protein and same residue as within 4 of protein and same residue as index '{qstart}' to '{qend}')")
        smol_copy.filter(f"(protein and same residue as index '{sstart}' to '{send}')\
         or (not protein and same residue as within 4 of protein and same residue as index '{qstart}' to '{qend}')")
        # Avoid chimeras that only have a few mutations from
        # one of the parents
        qmol_resid = qmol_copy.get("resid", sel="protein and name CA")
        smol_resid = smol_copy.get("resid", sel="protein and name CA")
        if qmol_resid.size < 10 or smol_resid.size < 10:
            raise NotDiverseChimeraError

        bbq = qmol_copy.get("coords", sel=f"protein and backbone")
        bbs = smol_copy.get("coords", sel=f"protein and backbone")

        distances = cdist(bbq, bbs)
        idx1, idx2 = np.where(distances < 1.3)
        if idx1.any() or idx2.any():
            raise BackboneClashError
        else:
            chim1 = Chimera()
            qmol_copy.renumberResidues()
            smol_copy.renumberResidues()

            if combination == 1:
                last_id = smol_resid[-1] + 1
                new_ids = get_new_resIDs(qmol_copy, last_id)
                qmol_copy.set("resid", new_ids)
                chim1.append(smol_copy)
                chim1.append(qmol_copy)
            else:
                last_id = qmol_resid[-1] + 1
                new_ids = get_new_resIDs(smol_copy, last_id)
                smol_copy.set("resid", new_ids)
                chim1.append(qmol_copy)
                chim1.append(smol_copy)
            chim1.set("chain", "A", "all")
        return chim1, last_id
Exemple #4
0
def calc_dist_matrix(chimera: Chimera = None, filename: str = None, selection: str = 'residue', type='contacts',
                     plot=False):
    """
    Returns a matrix of C-alpha distances for a given pdb
    :param chimera: A Chimera object with n residues.
    :param filename: path to a pdb file
    :param selection: How to compute the distance. 'residue' (the closest two
    :param type: between contacts (contact map when distances are below 8 armstrongs) or distances atoms between two residues) or 'alpha' distance of the alpha carbons.
    :param plot: whether to plot the distance matrix. Default is False
    :return: matrix. np.array. An n by n distance matrix.
    """
    if chimera and filename:
        raise ValueError("Only a Chimera object or the path to a pdb file must be specified")
    if not chimera and not filename:
        raise ValueError("At least a Chimera object or the path to a pdb file must be specified")
    if filename:
        chimera = Chimera(filename=filename)

    if selection == 'residue':
        metr = MetricSelfDistance("protein", groupsel="residue", metric="distances", pbc=False)
        mapping = metr.getMapping(chimera)
        a = metr.project(chimera)
        matrix, _, _ = contactVecToMatrix(a[0], mapping.atomIndexes)
    elif selection == 'alpha':
        metr = MetricSelfDistance("protein and name CA", metric="distances", pbc=False)
        a = metr.project(chimera)
        mapping = metr.getMapping(chimera)
        matrix, _, _ = contactVecToMatrix(a, mapping.atomIndexes)
    else:
        raise ValueError("Specify a selection type: 'residue' or 'atom'")
    if type == "contacts":
        matrix = matrix < 8
    elif type != "contacts" and type != "distances":
        raise ValueError("Please select contact type between 'contacts' or distances")

    if plot:
        fig = plt.figure(figsize=(12, 12))
        ax = fig.add_subplot(111)
        cmap = 'binary'
        cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower")
        if type == 'distances':
            cmap = 'gist_rainbow'
            cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower")
            cbar = fig.colorbar(cax, cmap=matplotlib.cm.get_cmap(cmap))
        plt.xlabel('xlabel', fontsize=24)
        plt.ylabel('ylabel', fontsize=24)
        plt.xticks(fontsize=22)
        plt.yticks(fontsize=22)
        plt.xlabel("Residue index")
        plt.ylabel("Residue index")

    return matrix
Exemple #5
0
def calc_sasa(chimera: Chimera = None, filename: str = None,
              probe_radius: float = 0.14, n_sphere_points: int = 960, sasa_type='total'):
    """

    Computes the Solvent Accessible Surface Area of the protein.
    This funcion uses the MDtraj shrake_rupley implementation as a basis.
    :param chimera: A Chimera object.
    :param filename: Path to a pdb file
    :param probe_radius: The radius of the probe, in nm.
    :param n_sphere_points: the number of points representing the sufrace of each atom. Higher values lead to more accuracy.
    :param sasa_type: Type of calculation to perform. To select from polar, apolar, or total.
    :return: areas: np.array containing the area of the chimera in Angstrom^2
    """
    sasa_types = ["polar", "apolar", "total"]
    if sasa_type not in sasa_types:
        raise ValueError(f"Invalid type. Expected one of {sasa_types}")
    if chimera and filename:
        raise ValueError("Only a Chimera object or the path to a pdb file must be specified")
    if not chimera and not filename:
        raise ValueError("At least a Chimera object or the path to a pdb file must be specified")
    if chimera:
        filename = "/tmp/structure.pdb"
        chimera.write(filename)

    polars = ['SER', 'THR', 'CYS', 'TYR', 'ASN', 'GLN', 'ASP', 'GLU', 'LYS', 'ARG', 'HIS']
    apolars = ['GLY', 'ALA', 'VAL', 'LEU', 'ILE', 'MET', 'TRP', 'PHE', 'PRO']
    structure = md.load(filename)
    if sasa_type == 'polar':
        indices = [index for index, residue in enumerate(structure.topology.residues) if residue.name in polars]
    elif sasa_type == 'apolar':
        indices = [index for index, residue in enumerate(structure.topology.residues) if residue.name in apolars]
    else:
        indices = [index for index, residue in enumerate(structure.topology.residues)]

    sasa = md.shrake_rupley(structure, probe_radius=probe_radius, n_sphere_points=n_sphere_points, mode="residue")
    area = sasa[0][indices].sum()
    logger.info(f"Area is {area} (nm)^2")
    return area
Exemple #6
0
 def show_vertex(self, vertex: Graph.vertex) -> Chimera:
     """
     Shows the protein that corresponds to that specific vertex with the
     fragment colored in red
     :param vertex: A Graph.vertex object. The domain to be shown,
     :return: A Chimera object with an internal representation of the fragment
     """
     graph = self.graph
     domain = graph.vp.domain[vertex]
     start = int(round(np.mean(graph.vp.start[vertex])))
     end = int(round(np.mean(graph.vp.end[vertex])))
     domain_path = get_SCOP_domain(domain)
     mol = Chimera(filename=domain_path, validateElements=False)
     mol.renumberResidues()
     mol.reps.add(sel='protein', style='NewCartoon', color=8)
     mol.reps.add(sel=f"protein and resid '{start}' to '{end}'",
                  style='NewCartoon',
                  color=1)
     mol.view(name=domain)
     return mol
Exemple #7
0
def minimize_potential_energy(
        chimera,
        ff: str,
        output: str = "/tmp/build",
        keep_output_files=True,
        cuda=False,
        restraint_backbone: bool = True
) -> Tuple[unit.quantity.Quantity, Chimera]:
    """
    :param chimera: A chimera object where to perform the minimization
    :param forcefield: The forcefield to use for the minimization. Select between "amber" and "charmm"
    :param output: A folder where to keep the files. If not provided they will be stored in the /tmp folder and later removed.
    :param cuda: Whether to use GPU acceleration
    :param restraint_backbone: Keep the backbone atoms constraint in space

    :return: The chimera object that was minimized and the potential energy value.
    """

    if not os.path.exists(output):
        os.mkdir(output)

    smol = prepare_protein(chimera)
    smol.write(f"{output}/protein.pdb")
    pdb = PDBFile(f"{output}/protein.pdb")
    parm = load_file(f"{output}/protein.pdb")
    modeller = Modeller(pdb.topology, pdb.positions)

    if ff == 'amber':
        forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
    if ff == 'charmm':
        forcefield = ForceField('charmm36.xml', 'charmm36/tip3p-pme-b.xml')

    modeller.addSolvent(forcefield, padding=1.0 * unit.nanometer)
    system = forcefield.createSystem(modeller.topology,
                                     nonbondedMethod=PME,
                                     nonbondedCutoff=1 * unit.nanometer,
                                     constraints=HBonds)
    if restraint_backbone:
        # Applies an external force on backbone atoms
        # This allows the backbone to stay rigid, while severe clashes can still be resolved
        force = mm.CustomExternalForce("k*((x-x0)^2+(y-y0)^2+(z-z0)^2)")
        force.addGlobalParameter(
            "k", 5.0 * unit.kilocalories_per_mole / unit.angstroms**2)
        force.addPerParticleParameter("x0")
        force.addPerParticleParameter("y0")
        force.addPerParticleParameter("z0")
        for idx, atom_crd in enumerate(parm.positions):
            if idx >= len(parm.atoms): continue
            if parm.atoms[idx] in ('CA', 'C', 'N'):
                force.addParticle(idx, atom_crd.value_in_unit(unit.nanometers))
        system.addForce(force)

    integrator = mm.LangevinIntegrator(temperature, friction, error_tolerance)
    simulation = Simulation(modeller.topology, system, integrator)
    simulation.context.setPositions(modeller.positions)

    # Get pre-minimization energy (scoring)
    state = simulation.context.getState(getEnergy=True, getForces=True)
    pre_energy = state.getPotentialEnergy().in_units_of(
        unit.kilocalories_per_mole)
    logger.info(f"Energy before minimization {pre_energy}")

    # Setup CPU minimization
    integrator.setConstraintTolerance(distance_tolerance)
    simulation.minimizeEnergy()
    post_position = simulation.context.getState(
        getPositions=True).getPositions()
    post_state = simulation.context.getState(getEnergy=True, getForces=True)
    if cuda:
        min_coords = simulation.context.getState(getPositions=True)
        platform = mm.Platform.getPlatformByName('CUDA')
        properties = {'CudaPrecision': 'mixed'}
        gpu_integrator = mm.VariableLangevinIntegrator(temperature, friction,
                                                       error_tolerance)
        gpu_integrator.setConstraintTolerance(distance_tolerance)
        gpu_min = Simulation(modeller.topology, system, gpu_integrator,
                             platform, properties)
        gpu_min.context.setPositions(min_coords.getPositions())
        gpu_min.minimizeEnergy()
        post_position = gpu_min.context.getState(
            getPositions=True).getPositions()
        post_state = gpu_min.context.getState(getEnergy=True, getForces=True)

    post_energy = post_state.getPotentialEnergy().in_units_of(
        unit.kilocalories_per_mole)
    logger.info(f"Energy after minimization {post_energy}")

    PDBFile.writeFile(modeller.topology,
                      post_position,
                      open(f"{output}/structure_minimized.pdb", 'w'),
                      keepIds=True)
    min_mol = Chimera(filename=f"{output}/structure_minimized.pdb")

    if keep_output_files is False:
        shutil.rmtree(output)

    return post_energy, min_mol
Exemple #8
0
def _mol_chimera_wrapper(molecule: Molecule, chimera: Chimera) -> Chimera:
    molecule.write("/tmp/molecule.pdb")
    new_chimera = Chimera(filename="/tmp/molecule.pdb")
    os.remove("/tmp/molecule.pdb")

    return new_chimera
Exemple #9
0
class Builder():
    """
    Graph constructor and visualizer

    Examples
    --------
    myhit= fetch_id('10002347')
    a=Builder(myhit)
    aln=a.get_alignment(myhit.query,myhit.no)
    qPDB, sPDB = a.superimpose_structures(aln,partial_alignment=True)        
    chimeras=a.build_chimeras(partial_alignment=True)
    """
    def __init__(self, hit: Hit):
        qpdb_path = get_SCOP_domain(hit.query)
        spdb_path = get_SCOP_domain(hit.sbjct)
        logger.info(f'Loading {qpdb_path} as a chimera object') 
        self.qPDB = Chimera(qpdb_path, validateElements=False)
        os.remove(qpdb_path)
        if self.qPDB.numFrames > 1:
            self.qPDB.dropFrames(keep=0)
            logger.info("Query protein contains more than one model. Keeping only the first one")
        logger.info(f'Loading {spdb_path} as a chimera object')
        self.sPDB = Chimera(spdb_path, validateElements=False)
        os.remove(spdb_path)
        if self.sPDB.numFrames > 1:
            self.sPDB.dropFrames(keep=0)
            logger.info("Subject protein contains more than one model. Keeping only the first one")
        self.qaPDB, self.saPDB = {}, {}
        self.qpairs,self.spairs = [], []
        self.dst = []
        self.chim_positions = {}

    def get_alignment(self, query: str, no: str) -> HHpredHitAlignment:
        """ Obtain the HHS alignment 'no' for query 'query'.
        Only the alignment from the fragment region is retrieved.
        This implies that when the fragment is not located in the
        N-terminus hit.q_start and the position in the output won't
        be the same. For example, if q_start = 20, that aminoacid is
        in position 0 in aln.query.

        :param query: str. Domain query
        :param no: int. Specifies the position in the file (alignment with subject)

        :return: HHpredHitAlignment. Alignment between query and subject for the fragment region.
        """

        hhF = get_FUZZLE_hhs(query)
        try:
            hh = HHOutputParser().parse_file(hhF)
            pair = hh[int(no) - 1]
            aln = pair.alignment
            return aln
        except Exception as e:
            logger.error(f"Parsing of {hhF} failed. Error follows: {e}")

    @staticmethod
    def remove_residue(pdb: Molecule, resid):
        """
        Removes a specific residue by its name
        :param pdb: The pdb to mutate the residues to
        :param resid:
        :return: The pdb with the filtered residues
        """

        pdb.filter(f"not {resid}")
        return pdb

    @staticmethod
    def find_nonstandards(pdb: Molecule) -> list:
        """
        Finds non-standard aminoacids
        :param pdb: Molecule or Chimera object where to find non-standard aminoacids.
        :return: list of non-standard residues
        """
        non_standards = [aa for aa in np.unique(pdb.resname) if (aa in aa_keys or aa not in standard_aas)]
        if non_standards:
            for i in non_standards:
                if i != 'UNK':
                    logger.info(f"Found the following non-standar residue: {i}. "
                                f"Preserving in the original PDB")
                else:
                    logger.warning("Protein presents unknown residue UNK."
                                   " Call remove_residue() to remove it or provide parameters"
                                   " if you want to minimize it with AMBER or CHARMM.")
        return non_standards

    @staticmethod
    def mutate_nonstandards(pdb: Molecule) -> Molecule:
        """

        :param pdb: The pdb to mutate the residues to
        :return: The pdb with the mutated residues -if there are any-
        """
        non_standards = Builder.find_nonstandards(pdb)
        if non_standards:
            [pdb.mutateResidue(f"resname {i}", f"{special2standard[i]}") for i in non_standards]

        return pdb

    def seq2pdbaln(self, sequence: str, pdb: Molecule) -> List:
        """ Obtains the resid (positions in the PDB) of the aminoacids involved in the fragment.

        :param sequence: str. sequence of the fragment
        :param pdb: Molecule. PDB from where to obtain the residues
        :return: mapping. The the PDB Positions of the fragment. A List of tuples of the form: (sequence aminoacid, pdb resid)
        """

        # Mutate non standard residues
        copy_pdb = pdb.copy()  # making a copy to ensure the possible mutations don't affect the pdb
        copy_pdb = self.mutate_nonstandards(copy_pdb)

        # Mapping of residue to number
        try:
            pdb_sequence = copy_pdb.sequence()['0']
        except:
            raise NotCorrectPDBError(f"PDB structure from protein {pdb.viewname} not correct")

        copy_pdb = pdb.copy()  # second copy to preserve indexing
        pdb_indices = copy_pdb.get("index", sel="protein and name CA")

        # sequence from the HHS alignment (fragment)
        seq_positions = [(x,) for x in sequence]

        # obtain mapping
        matcher = SequenceMatcher(None, sequence, pdb_sequence, autojunk=False)
        for block in matcher.get_matching_blocks():
            i = 0
            for pos in range(block.a, block.a + block.size):
                seq_positions[pos] += (pdb_indices[block.b + i],)
                i += 1
        return seq_positions

    def _get_pairs(self, aln: HHpredHitAlignment):
        """ Obtain the positions of the pdb which are present in both alignments.
        Thus the number of atoms is the same

        :param aln: HHpredHitAlignment. The fragment sequence alignment
        """

        qpairs = []
        spairs = []
        preqpairs = []
        prespairs = []

        # Obtain the corresponding PDB residues to the fragment seq. alignment
        query_seq2pdb = self.seq2pdbaln(aln.query, self.qPDB)
        sbjct_seq2pdb = self.seq2pdbaln(aln.subject, self.sPDB)
        
        # Retrieving correct residue sequence numbers for each aligned position
        # i.e positions that both query and subject contain aminoacids
        for index, resi in enumerate(aln.columns):
            index += 1  # the first residue in HHpredHitAlignment is 1
            if aln.gap_at(index) is False:
                if len(query_seq2pdb[index - 1]) == 2 and len(sbjct_seq2pdb[index - 1]) == 2:
                    preqpairs.append(query_seq2pdb[index - 1][1])
                    prespairs.append(sbjct_seq2pdb[index - 1][1])
            else:
                # then this corresponds to a partial-alignment chunk and we can save it
                if preqpairs:
                    qpairs.append(preqpairs)
                    spairs.append(prespairs)
                    preqpairs = []
                    prespairs = []

        if preqpairs:
            qpairs.append(preqpairs)
            spairs.append(prespairs)
        self.qpairs = qpairs
        self.spairs = spairs

        self.global_qpairs = [[item for chunk in qpairs for item in chunk]]
        self.global_spairs = [[item for chunk in spairs for item in chunk]]



    def superimpose_structures(self, aln: HHpredHitAlignment, partial_alignment: bool = False):
        """ Moves the two molecules to the origin of coordinates.
            Aligns the full structures according the fragment and obtains RMSDs and distances.

        :param qpairs: List of CA to be aligned in the query structure
        :param spairs: List of CA to be aligned in the subject structure
        :return:
        """

        self._get_pairs(aln)

        # Re-align if command is called twice
        if self.qaPDB:
            self.qaPDB = {}
            self.saPDB = {}
            self.dst = []

        if partial_alignment is False:
            qpairs = self.global_qpairs
            spairs = self.global_spairs
        else:
            qpairs = self.qpairs
            spairs = self.spairs

        # Print info if the alignment was intended partial but there's only one chunk
        if len(qpairs) == 1 and partial_alignment is True:
            logger.info("The sequence alignment only contains one chunk. Performing global alignment")

        # We only need one query, center to the origin of coordinates.
        # It is the subject that aligns to this template.
        qmol = self.qPDB.copy()
        smol = self.sPDB.copy()
        qmol.center()
        smol.center()
        self.qaPDB[0] = qmol.copy()

        for index, qpair_chunk in enumerate(qpairs):
            logger.info(f"Performing alignment {index+1} with TMalign")
            saPDB, distance = self._superimpose_chunk(qpair_chunk, spairs[index], qmol, smol)
            self.dst.append(distance)
            self.saPDB[index] = saPDB.copy()

        self.global_dst = [[item for chunk in self.dst for item in chunk]]
        return self.qaPDB, self.saPDB

    def _superimpose_chunk(self, qpairs: list, spairs: list, qmol: Molecule, smol: Molecule) -> Tuple[
        Molecule, np.ndarray]:
        """
        Superimposes the part of the total sequence alignment defined
        by the indexes qpairs/spairs
        :param qpairs: list of indexes to align from the pdb query
        :param spairs: list of indexes to align from the pdb subject
        :param qmol: the query pdb
        :param smol: the subject pdb
        :return: smol: the subject molecule aligned.
                distances: a list of the distances between the alpha Carbons after alignment.
        """

        # Copying because we are going to cut the pdbs into the chunks
        copyq = qmol.copy()
        copys = smol.copy()

        copyq.filter('protein and backbone and same residue as index %s' % ' '.join(map(str, qpairs)))
        copys.filter('protein and backbone and same residue as index %s' % ' '.join(map(str, spairs)))
        copyq.write('/tmp/copyq.pdb')
        copys.write('/tmp/copys.pdb')

        # Matrix for VMD
        try:
            # We align subject onto the query
            tm_matrix = get_tmalign_output('/tmp/copys.pdb', '/tmp/copyq.pdb', "matrix")
        except Exception as e:
            raise ChildProcessError(f"TMalign cannot align the PDBs. Error follows: {e}")
        vectran, matrot = tm2vmd(tm_matrix)

        # remove copy files
        os.remove('/tmp/copyq.pdb')
        os.remove('/tmp/copys.pdb')

        # align the whole subject domain and fragment.
        # Copying so that the original smol does not lose the origin of coordinates
        s1mol = smol.copy()
        s1mol.rotateBy(matrot)
        s1mol.moveBy(vectran)
        copys.rotateBy(matrot)
        copys.moveBy(vectran)

        # Compute RMSD for the fragments
        rmsd = MetricRmsd(copyq, 'protein and name CA', pbc=False)
        data = rmsd.project(copys)
        logger.info(f"The RMSD between the fragments is {data} over {len(spairs)} alpha carbons")

        # Compute distances between the two selections
        bbq = copyq.get("coords", sel="protein and name CA")
        bbs = copys.get("coords", sel="protein and name CA")
        distances = np.diagonal(cdist(bbq, bbs))
        return s1mol, distances

    def _construct_chimera(self, qmol, smol, qstart, qend, sstart, send, combination):
        """
        :param qmol: Molecule. The query protein
        :param smol: Molecule. The subject protein in any of its positions
        :param qstart: int. Position to start the cut in the query
        :param qend: int. Position to end the cut in the query
        :param sstart: int. Position to start the cut in the sbjct
        :param send: int. Position to end the cut in the sbjct.
        :return: Molecule, DataFrame Objects.
        chim1: The resulting chimera
        mapping: The mapping from the old residue numbering to the new one
        """
        qmol_copy = qmol.copy()
        smol_copy = smol.copy()
        qmol_copy.filter(f"(protein and same residue as index '{qstart}' to '{qend}')\
         or (not protein and same residue as within 4 of protein and same residue as index '{qstart}' to '{qend}')")
        smol_copy.filter(f"(protein and same residue as index '{sstart}' to '{send}')\
         or (not protein and same residue as within 4 of protein and same residue as index '{qstart}' to '{qend}')")
        # Avoid chimeras that only have a few mutations from
        # one of the parents
        qmol_resid = qmol_copy.get("resid", sel="protein and name CA")
        smol_resid = smol_copy.get("resid", sel="protein and name CA")
        if qmol_resid.size < 10 or smol_resid.size < 10:
            raise NotDiverseChimeraError

        bbq = qmol_copy.get("coords", sel=f"protein and backbone")
        bbs = smol_copy.get("coords", sel=f"protein and backbone")

        distances = cdist(bbq, bbs)
        idx1, idx2 = np.where(distances < 1.3)
        if idx1.any() or idx2.any():
            raise BackboneClashError
        else:
            chim1 = Chimera()
            qmol_copy.renumberResidues()
            smol_copy.renumberResidues()

            if combination == 1:
                last_id = smol_resid[-1] + 1
                new_ids = get_new_resIDs(qmol_copy, last_id)
                qmol_copy.set("resid", new_ids)
                chim1.append(smol_copy)
                chim1.append(qmol_copy)
            else:
                last_id = qmol_resid[-1] + 1
                new_ids = get_new_resIDs(smol_copy, last_id)
                smol_copy.set("resid", new_ids)
                chim1.append(qmol_copy)
                chim1.append(smol_copy)
            chim1.set("chain", "A", "all")
        return chim1, last_id

    def build_chimeras(self, partial_alignment: bool = False, cutoff_distance: float = 1) -> Dict[str, Chimera]:
        """ Build all possible chimeras between the two proteins that fulfill
        these two criteria:
        1) That the distance between the fusion points is below the cutoff distance
        2) That the resulting chimera does not present any backbone clashes

        :return: A dictionary with all the possible chimeras
        """
        if self.dst is None:
            logger.error("You need to align the structures before building the chimeras")

        chimeras = {}
        outcomes = ['Query N-terminal', 'Subject N-terminal', 'Not enough mutations Query N-terminal',
                    'Not enough mutations Subject N-terminal', 'Backbone clash']
        self.chim_positions = dict(zip(outcomes, [[] for i in range(len(outcomes))]))

        q_indices = self.qPDB.get("index", sel="protein and name CA")
        qstart = min(q_indices)
        qend = max(q_indices)

        s_indices = self.sPDB.get("index", sel="protein and name CA")
        sstart = min(s_indices)
        send = max(s_indices)

        if partial_alignment is False:
            qpairs = self.global_qpairs
            spairs = self.global_spairs
            dst = self.global_dst
        else:
            qpairs = self.qpairs
            spairs = self.spairs
            dst = self.dst

        # Get the positions in the fragment closer than the cutoff
        for aln_index, chunk in enumerate(dst):
            if aln_index not in self.saPDB:
                logger.error(f"Alignment {aln_index+1} was not produced. Skipping to next alignment.")
                continue
            fusion_points = [index for index, distance in enumerate(chunk) if distance < cutoff_distance]
            # Build query-subject chimera
            for index in fusion_points:
                qMOL = self.qaPDB[0].copy()
                sMOL = self.saPDB[aln_index].copy()
                xo_query = qpairs[aln_index][index]
                xo_subject = spairs[aln_index][index]
                xo_index = [index for index, number in enumerate(self.global_qpairs[0]) if number == xo_query][0]
                residues = self.qPDB.get("resid", sel="index %s" % ' '.join(map(str, self.global_qpairs[0])))
                xo_resid = residues[xo_index]
                try:
                    xo_query_1 = qpairs[aln_index][index + 1]
                    xo_subject_1 = spairs[aln_index][index + 1]
                except:
                    # Position corresponds to C-terminus limit of the fragment
                    xo_query_1 = [i + 1 for i, qindex in enumerate(q_indices) if qindex == xo_query][0]
                    xo_subject_1 = [i + 1 for i, sindex in enumerate(s_indices) if sindex == xo_subject][0]

                # Combination query-subject
                try:
                    chimera1, xo = self._construct_chimera(qMOL, sMOL, qstart, xo_query,
                                                           xo_subject_1, send, 0)

                    self.chim_positions['Query N-terminal'].append(xo_query)
                    chimeras[f"comb1_{xo_resid}"] = chimera1
                    chimeras[f"comb1_{xo_resid}"].add_crossover(xo)
                except NotDiverseChimeraError:
                    self.chim_positions['Not enough mutations Query N-terminal'].append(xo_query)
                except BackboneClashError:
                    self.chim_positions['Backbone clash'].append(xo_query)

                # Combination subject-query
                try:
                    chimera2, xo = self._construct_chimera(qMOL, sMOL, xo_query_1, qend,
                                                           sstart, xo_subject, 1)

                    self.chim_positions['Subject N-terminal'].append(xo_query)
                    chimeras[f"comb2_{xo_resid}"] = chimera2
                    chimeras[f"comb2_{xo_resid}"].add_crossover(xo)
                except NotDiverseChimeraError:
                    self.chim_positions['Not enough mutations Subject N-terminal'].append(xo_query)
                except BackboneClashError:
                    self.chim_positions['Backbone clash'].append(xo_query)

        if not chimeras:
            logger.warning("No combination of query and subject produced a chimera that matched the criteria")

        return chimeras

    def plot_curves(self, query: str):
        """ Plots the distance between the alpha carbons in the structure for the fragment region along with the number of backbone clashes of the resulting chimera for each position

        :param dst: np.ndarray: an array containign the distance for each fragment position
        :param bbContacts1: an array containing the number of bb contacts for each fragment position for the resulting chimera of combination query-subject
        :param bbContacts2: an array containing the number of bb contacts for each fragment position for the resulting chimera of combination subject - query
        
        :return: a matplotlib.pyplot figure.
        """
        if self.chim_positions is None:
            logger.error("You need to build the chimeras before plotting. Call build_chimeras()")
            return

        dst = self.global_dst[0]

        residues = self.qPDB.get("resid", sel="index %s" % ' '.join(map(str, self.global_qpairs[0])))

        resids = {}
        distances = {}

        for key, value in self.chim_positions.items():
            if value:
                resids[key] = self.qPDB.get("resid", sel="index %s" % ' '.join(map(str, value)))
            else:
                resids[key] = np.zeros(0)
        for key, value in resids.items():
            if value.any():
                indices = [index for index, resi in enumerate(residues) if resi in value]
                distances[key] = [distance for index, distance in enumerate(dst) if index in indices]
            else:
                distances[key] = []

        color = [('#FCB711', "X"), ('#CC004C', "x"), ('gray', 'o'), ('gray', "o"), ('black', ".")]
        colors = dict(zip(resids.keys(), color))

        fig, ax = plt.subplots(figsize=(12, 9))
        ax.plot(residues, dst, '-', color='black', label='distance q-s')
        ax.set_xlabel(f"Residue in the fragment relative to domain {query}", fontsize=24)
        ax.set_ylabel(r'Distance ($\AA$)', fontsize=24)
        i = 0
        for key, value in sorted(resids.items()):
            if value.any():
                i += 1
                ax.plot(value, distances[key], colors[key][1], markersize=18, color=colors[key][0], label=key)

        ax.tick_params(labelsize=20)
        ax.tick_params(labelsize=20)
        ax.legend(loc=9, bbox_to_anchor=(0.5, 1.35), fontsize=18)
        plt.show()