Example #1
0
    def fetch_all_by_trgm_sim(self, smiles, *expr, **kwargs):
        """
        Returns all fragments that are similar to the given SMILES string
        using trigam similarity (similar to LINGO).

        Parameters
        ----------
        threshold : float, default=0.6
            Similarity threshold that will be used for searching.
        limit : int, default=25
            Maximum number of hits that will be returned.

        Returns
        -------
        resultset : list
            List of tuples (Fragment, similarity) containing the chemical components
            and the calculated trigram similarity.

        Queried Entities
        ----------------
        Fragment

        Examples
        --------
        >>>> FragmentAdaptor().fetch_all_by_trgm_sim('Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C')
        [(<Fragment(STI)>, 0.883721), (<Fragment(NIL)>, 0.73913),
        (<Fragment(PRC)>, 0.738095), (<Fragment(406)>, 0.666667),
        (<Fragment(J07)>, 0.604167), (<Fragment(AD5)>, 0.6),
        (<Fragment(AAX)>, 0.6), (<Fragment(VX6)>, 0.6)]

        Requires
        --------
        .. important:: `pg_trgm  <http://www.postgresql.org/docs/current/static/pgtrgm.html>`_ PostgreSQL extension.
        """
        session = Session()

        threshold = kwargs.get('threshold', 0.6)

        # SET THE SIMILARITY THRESHOLD FOR THE INDEX
        session.execute(
            text("SELECT set_limit(:threshold)").execution_options(
                autocommit=True).params(threshold=threshold))

        similarity = func.similarity(Fragment.ism, smiles).label('similarity')
        sim_thresh = func.show_limit().label('sim_thresh')

        query = self.query.add_columns(similarity, sim_thresh)
        query = query.filter(and_(Fragment.like(smiles), *expr))

        # KNN-GIST
        query = query.order_by(Fragment.ism.op('<->')(smiles))

        if kwargs.get('limit'):
            query = query.limit(kwargs['limit'])

        results = query.all()
        #session.close()

        return results  #query
Example #2
0
    def pdb_atom_names(self):
        """
        """
        session = Session()

        query = session.query(
            chem_comp_fragment_atoms.c.hit,
            func.array_agg(chem_comp_fragment_atoms.c.pdb_name))
        query = query.filter(chem_comp_fragment_atoms.c.chem_comp_fragment_id
                             == self.chem_comp_fragment_id)
        query = query.group_by(chem_comp_fragment_atoms.c.hit)

        return query.all()
Example #3
0
    def abstracts(self):
        """
        Returns the abstract(s) of the journal articles that are associated with
        this PDB entry.
        """
        session = Session()

        statement = select([citations],
            and_(citations.c.pubmed_id==cast(XRef.xref, Integer),
                 XRef.source=='PubMed', XRef.entity_type=='Structure',
                 XRef.entity_id==self.structure_id))

        return session.execute(statement).fetchall()
Example #4
0
    def disordered_regions(self, *expr):
        """
        Returns a list of disordered regions inside this Chain (if any).
        """
        session = Session()

        statement = select([disordered_regions],
            and_(disordered_regions.c.pdb==self.Biomolecule.Structure.pdb,
                 disordered_regions.c.pdb_chain_id==self.pdb_chain_asu_id,
                 *expr))

        result = session.execute(statement).fetchall()

        return result
Example #5
0
    def pdbstring(self, **kwargs):
        """
        Returns the binding site environment of the ligand as PDB string.

        :param biomolecule_id: The biomolecule_id of the assembly that this
                               binding site is part of - required to pick the
                               right atom partition table. The biomolecule_id
                               of the parent ligand will be used if missing.
        """
        biomolecule_id = kwargs.get('biomolecule_id', self.Ligand.biomolecule_id)

        fn = func.credo.binding_site_pdbstring(biomolecule_id, self.ligand_id)

        with closing(Session()) as session:
            return session.query(fn).scalar()
Example #6
0
    def fetch_all_by_sim_oe(self, smiles, *expr, **kwargs):
        """
        Returns all Chemical Components that match the given SMILES string with at
        least the given similarity threshold using chemical fingerprints.

        Parameters
        ----------
        smi : str
            The query rdmol in SMILES format.
        threshold : float, default=0.5
            The similarity threshold that will be used for searching.
        fp : {'circular','atompair','torsion'}
            RDKit fingerprint type to be used for similarity searching.
        *expr : BinaryExpressions, optional
            SQLAlchemy BinaryExpressions that will be used to filter the query.

        Queried Entities
        ----------------
        ChemComp, ChemCompOEFP

        Returns
        -------
        hits : list
            List of tuples in the form (ChemComp, similarity)

        Examples
        --------

        Requires
        --------
        .. important:: OpenEye cartridge.
        """
        session = Session()

        threshold = kwargs.get('threshold')
        metric = kwargs.get('metric', 'tanimoto')
        fp = kwargs.get('fp', 'circular')
        limit = kwargs.get('limit', 100)

        # set the similarity threshold for the selected metric
        if threshold:
            statement = text(
                "SELECT openeye.set_oefp_similarity_limit(:threshold, :metric)"
            )
            session.execute(
                statement.params(threshold=threshold, metric=metric))

        if fp == 'circular':
            query = func.openeye.make_circular_fp(smiles)
            target = ChemCompOEFP.circular_fp

        elif fp == 'maccs166':
            query = func.openeye.make_maccs166_fp(smiles)
            target = ChemCompOEFP.maccs166_fp

        elif fp == 'path':
            query = func.openeye.make_path_fp(smiles)
            target = ChemCompOEFP.path_fp

        elif fp == 'tree':
            query = func.openeye.make_tree_fp(smiles)
            target = ChemCompOEFP.tree_fp

        else:
            raise ValueError(
                "cannot create fingerprint: type {0} does not exist.".format(
                    fp))

        # compile similarity metric and the correspoding GIST index / KNN-GIST
        if metric == 'tanimoto':
            similarity = func.openeye.tanimoto(query, target)
            index = func.openeye.tanimoto_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<%%>)')(query)  # escape %

        elif metric == 'dice':
            similarity = func.openeye.dice(query, target)
            index = func.openeye.dice_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<#>)')(query)

        elif metric == 'manhattan':
            similarity = func.openeye.manhattan(query, target)
            index = func.openeye.manhattan_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<~>)')(query)

        elif metric == 'cosine':
            similarity = func.openeye.cosine(query, target)
            index = func.openeye.cosine_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<@>)')(query)

        elif metric == 'euclidean':
            similarity = func.openeye.euclidean(query, target)
            index = func.openeye.euclidean_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<->)')(query)

        else:
            raise ValueError(
                "{} is not a valid similarity metric.".format(metric))

        query = ChemComp.query.add_column(similarity)
        query = query.join('OEFP').filter(and_(index, *expr))
        query = query.order_by(orderby)

        return query
Example #7
0
def do(controller):
    """
    """
    # timer to clock functions and parts of the program
    timer = Timer()
    timer.start("app")

    # get the controller command
    cmd = controller.command

    # get the command line arguments and options
    args = controller.pargs

    insert = binding_site_fuzcav.insert()
    tracker = fuzcav.get_tracker()

    # get the fuzcav side chain representative table from the credoscript metadata
    metadata.reflect(schema='bio', only=('fuzcav_rep_sc_atoms', ))
    fuzcav_rep_sc_atoms = Table('bio.fuzcav_rep_sc_atoms',
                                metadata,
                                autoload=True)

    timer.start()

    session = Session()

    # get all ligands that have more than 7 heavy atoms and no clashes
    query = session.query(Ligand.ligand_id, Ligand.biomolecule_id)
    query = query.filter(
        and_(Ligand.num_hvy_atoms >= 7, Ligand.is_clashing == False))

    if args.incremental:

        # subquery to get the current max ligand_id from the binding_site_fuzcav table
        sq = session.query(
            func.max(binding_site_fuzcav.c.ligand_id).label(
                'ligand_id')).subquery('sq')

        # only include new ligands
        query = query.filter(Ligand.ligand_id > sq.c.ligand_id)

    ligand_ids = query.order_by(Ligand.ligand_id).all()

    # debug how much time it took to get all contacts
    app.log.debug(
        "all new ligand identifiers retrieved in {0:.2f} seconds.".format(
            timer.elapsed()))

    #
    query = BindingSiteResidue.query.join('Peptide', 'Atoms')
    #query = query.join(Peptide, Peptide.residue_id==BindingSiteResidue.residue_id)
    #query = query.join(Atom, Atom.residue_id==Peptide.residue_id)
    query = query.outerjoin(
        fuzcav_rep_sc_atoms,
        and_(fuzcav_rep_sc_atoms.c.res_name == Peptide.res_name,
             fuzcav_rep_sc_atoms.c.atom_name == Atom.atom_name))
    query = query.filter(
        and_(
            Peptide.is_non_std == False,
            or_(Atom.atom_name == 'CA',
                fuzcav_rep_sc_atoms.c.atom_name != None)))
    query = query.with_entities(Peptide.res_name, Atom)

    if args.progressbar:
        bar = ProgressBar(widgets=[
            'Binding Sites: ',
            SimpleProgress(), ' ',
            Percentage(),
            Bar()
        ],
                          maxval=len(ligand_ids)).start()

    # iterate through ligands
    for counter, row in enumerate(ligand_ids, 1):
        if args.progressbar: bar.update(counter)
        ligand_id, biomolecule_id = row.ligand_id, row.biomolecule_id

        timer.start()

        # get all the fuzcav atoms (either CA or representative)
        # important to use the proper atom partition!
        atoms = query.filter(
            and_(BindingSiteResidue.ligand_id == ligand_id,
                 Atom.biomolecule_id == biomolecule_id)).all()

        # debug how much time it took to get all contacts
        app.log.debug("all FuzCav atoms retrieved in {0:.2f} seconds.".format(
            timer.elapsed()))

        # ignore hits with too few peptides
        if len(atoms) < 14:
            app.log.debug("Ligand {} has only {} FuzCav atoms and will be "
                          "ignored.".format(ligand_id, len(atoms)))
            continue

        # get the calpha atom and its features for each residue
        calphas = ((np.array(atom.coords,
                             dtype=float), (fuzcav.FEATURES[res_name]))
                   for res_name, atom in atoms if atom.atom_name == 'CA')

        # get the representative atom and its features for each residue
        representatives = (
            (np.array(atom.coords, dtype=float), (fuzcav.FEATURES[res_name]))
            for res_name, atom in atoms
            if atom.atom_name == fuzcav.REPRESENTATIVES[res_name])

        timer.start()

        calphafp = fuzcav.make_fp(calphas, tracker)
        repfp = fuzcav.make_fp(representatives, tracker)

        # debug how much time it took to get all contacts
        app.log.debug("fingerprints generated in {0:.2f} seconds.".format(
            timer.elapsed()))

        # insert the fingerprints into the table
        if not args.dry_run:
            engine.execute(insert,
                           ligand_id=ligand_id,
                           calphafp=calphafp.tolist(),
                           repfp=repfp.tolist())

    # finish the optional progress bar
    if args.progressbar: bar.finish()

    session.close()
Example #8
0
class Base(object):
    """
    Declarative base model that is inherited by all CREDO models.
    """
    # automatically reflect the table
    __table_args__ = {'autoload':True}

    # attach a query object to every model that queries itself
    query = Session.query_property(BaseQuery)

    @ClassProperty
    @classmethod
    def __meta__(cls):
        """
        Returns the metadata information of this class as ordered dictionary.
        """
        mapper = cls.__mapper__
        meta = []

        # get the column data type for every column name
        # this has to be done in a for loop to catch the error that might occur
        # if the entity has data type stemming from an extension
        for key in mapper.c.keys():
            try:
                meta.append((str(key), str(mapper.c[key].type)))
            except (NotImplementedError, CompileError):
                meta.append((str(key), "CUSTOM"))

        return meta

    def _repr_list_(self):
        """
        Returns a list of values for this entity in proper order.
        """
        return [getattr(self,k) for k in self._sa_class_manager.mapper.c.keys()]

    def _repr_dict_(self):
        """
        Returns a dictionary (column name, data) representation of this entity.
        """
        return dict((k, getattr(self,k))
                    for k in self._sa_class_manager.mapper.c.keys())

    def _repr_html_(self):
        """
        Returns a HTML representation (table) of the entity. Only used in IPython
        notebooks.
        """
        data = self._repr_dict_().items()
        rows = ''.join("<tr><th>{}</th><td>{}</td></tr>".format(k,v) for k,v in data)
        table = "<table>{}</table>".format(rows)

        return table
        
    @classmethod
    def get_cls(cls):
        return cls
        
    @property
    def __data__(self):
        """
        Returns a list of values for this entity in proper order.
        """
        return self._repr_list_()

    @property
    def _pkey(self):
        """
        Returns the value of the primary key. Also works for composite keys.
        """
        return tuple(getattr(self, c.name) for c in self.__mapper__.primary_key)

    @property
    def _entity_id(self):
        """
        Returns the first column of the primary key as scalar value. Used in the
        PyMOL API.
        """
        return self._pkey[0]
Example #9
0
    def fetch_all_by_sim(self, smi, *expr, **kwargs):
        """
        Returns all fragments that match the given SMILES string with at
        least the given similarity threshold using chemical fingerprints.

        Parameters
        ----------
        smi : str
            The query rdmol in SMILES format.
        threshold : float, default=0.5
            The similarity threshold that will be used for searching.
        fp : {'circular','atompair','torsion','maccs','layered','avalon'}
            RDKit fingerprint type to be used for similarity searching.
        *expr : BinaryExpressions, optional
            SQLAlchemy BinaryExpressions that will be used to filter the query.

        Queried Entities
        ----------------
        Fragments, FragmentRDFP

        Returns
        -------
        hits : list
            List of tuples in the form (Fragment, similarity)

        Examples
        --------
        >>> #PENDING

        Requires
        --------
        .. important:: `RDKit  <http://www.rdkit.org>`_ PostgreSQL cartridge.
        """

        session = Session()

        threshold = kwargs.get('threshold', 0.5)
        metric = kwargs.get('metric', 'tanimoto')
        fp = kwargs.get('fp', 'circular')

        if fp == 'circular':
            query = func.rdkit.morganbv_fp(smi, 2).label('queryfp')
            target = FragmentRDFP.circular_fp

        elif fp == 'torsion':
            query = func.rdkit.torsionbv_fp(smi).label('queryfp')
            target = FragmentRDFP.torsion_fp

        elif fp == 'atompair':
            query = func.rdkit.atompairbv_fp(smi).label('queryfp')
            target = FragmentRDFP.atompair_fp

        elif fp == 'maccs':
            query = func.rdkit.maccs_fp(smi).label('queryfp')
            target = FragmentRDFP.maccs_fp

        elif fp == 'layered':
            query = func.rdkit.layered_fp(smi).label('queryfp')
            target = FragmentRDFP.layered_fp

        elif fp == 'avalon':
            query = func.rdkit.avalon_fp(smi).label('queryfp')
            target = FragmentRDFP.avalon_fp

        else:
            msg = "The fingerprint type [{0}] does not exist.".format(fp)
            raise RuntimeError(msg)

        # set the similarity threshold for the index
        if metric == 'tanimoto':
            session.execute(
                text("SET rdkit.tanimoto_threshold=:threshold").
                execution_options(autocommit=True).params(threshold=threshold))
            sim_thresh = func.current_setting(
                'rdkit.tanimoto_threshold').label('sim_thresh')

            similarity = func.rdkit.tanimoto_sml(query,
                                                 target).label('similarity')
            index = func.rdkit.tanimoto_sml_op(query, target)
        elif metric == 'dice':
            session.execute(
                text("SET rdkit.dice_threshold=:threshold").execution_options(
                    autocommit=True).params(threshold=threshold))
            sim_thresh = func.current_setting('rdkit.dice_threshold').label(
                'sim_thresh')

            similarity = func.rdkit.dice_sml(query, target).label('similarity')
            index = func.rdkit.dice_sml_op(query, target)

        query = self.query.add_columns(similarity, sim_thresh)
        query = query.join('RDFP').filter(and_(index, *expr))
        query = query.order_by('similarity DESC')

        if kwargs.get('limit'):
            query = query.limit(kwargs['limit'])  #.all(

        #print query.statement

        results = query.all()
        #session.close()

        return results  # query