Beispiel #1
0
    def fetch_all_by_trgm_sim(self, smiles, *expr, **kwargs):
        """
        Returns all fragments that are similar to the given SMILES string
        using trigam similarity (similar to LINGO).

        Parameters
        ----------
        threshold : float, default=0.6
            Similarity threshold that will be used for searching.
        limit : int, default=25
            Maximum number of hits that will be returned.

        Returns
        -------
        resultset : list
            List of tuples (Fragment, similarity) containing the chemical components
            and the calculated trigram similarity.

        Queried Entities
        ----------------
        Fragment

        Examples
        --------
        >>>> FragmentAdaptor().fetch_all_by_trgm_sim('Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C')
        [(<Fragment(STI)>, 0.883721), (<Fragment(NIL)>, 0.73913),
        (<Fragment(PRC)>, 0.738095), (<Fragment(406)>, 0.666667),
        (<Fragment(J07)>, 0.604167), (<Fragment(AD5)>, 0.6),
        (<Fragment(AAX)>, 0.6), (<Fragment(VX6)>, 0.6)]

        Requires
        --------
        .. important:: `pg_trgm  <http://www.postgresql.org/docs/current/static/pgtrgm.html>`_ PostgreSQL extension.
        """
        session = Session()

        threshold = kwargs.get('threshold', 0.6)

        # SET THE SIMILARITY THRESHOLD FOR THE INDEX
        session.execute(
            text("SELECT set_limit(:threshold)").execution_options(
                autocommit=True).params(threshold=threshold))

        similarity = func.similarity(Fragment.ism, smiles).label('similarity')
        sim_thresh = func.show_limit().label('sim_thresh')

        query = self.query.add_columns(similarity, sim_thresh)
        query = query.filter(and_(Fragment.like(smiles), *expr))

        # KNN-GIST
        query = query.order_by(Fragment.ism.op('<->')(smiles))

        if kwargs.get('limit'):
            query = query.limit(kwargs['limit'])

        results = query.all()
        #session.close()

        return results  #query
Beispiel #2
0
    def abstracts(self):
        """
        Returns the abstract(s) of the journal articles that are associated with
        this PDB entry.
        """
        session = Session()

        statement = select([citations],
            and_(citations.c.pubmed_id==cast(XRef.xref, Integer),
                 XRef.source=='PubMed', XRef.entity_type=='Structure',
                 XRef.entity_id==self.structure_id))

        return session.execute(statement).fetchall()
Beispiel #3
0
    def disordered_regions(self, *expr):
        """
        Returns a list of disordered regions inside this Chain (if any).
        """
        session = Session()

        statement = select([disordered_regions],
            and_(disordered_regions.c.pdb==self.Biomolecule.Structure.pdb,
                 disordered_regions.c.pdb_chain_id==self.pdb_chain_asu_id,
                 *expr))

        result = session.execute(statement).fetchall()

        return result
Beispiel #4
0
    def fetch_all_by_sim_oe(self, smiles, *expr, **kwargs):
        """
        Returns all Chemical Components that match the given SMILES string with at
        least the given similarity threshold using chemical fingerprints.

        Parameters
        ----------
        smi : str
            The query rdmol in SMILES format.
        threshold : float, default=0.5
            The similarity threshold that will be used for searching.
        fp : {'circular','atompair','torsion'}
            RDKit fingerprint type to be used for similarity searching.
        *expr : BinaryExpressions, optional
            SQLAlchemy BinaryExpressions that will be used to filter the query.

        Queried Entities
        ----------------
        ChemComp, ChemCompOEFP

        Returns
        -------
        hits : list
            List of tuples in the form (ChemComp, similarity)

        Examples
        --------

        Requires
        --------
        .. important:: OpenEye cartridge.
        """
        session = Session()

        threshold = kwargs.get('threshold')
        metric = kwargs.get('metric', 'tanimoto')
        fp = kwargs.get('fp', 'circular')
        limit = kwargs.get('limit', 100)

        # set the similarity threshold for the selected metric
        if threshold:
            statement = text(
                "SELECT openeye.set_oefp_similarity_limit(:threshold, :metric)"
            )
            session.execute(
                statement.params(threshold=threshold, metric=metric))

        if fp == 'circular':
            query = func.openeye.make_circular_fp(smiles)
            target = ChemCompOEFP.circular_fp

        elif fp == 'maccs166':
            query = func.openeye.make_maccs166_fp(smiles)
            target = ChemCompOEFP.maccs166_fp

        elif fp == 'path':
            query = func.openeye.make_path_fp(smiles)
            target = ChemCompOEFP.path_fp

        elif fp == 'tree':
            query = func.openeye.make_tree_fp(smiles)
            target = ChemCompOEFP.tree_fp

        else:
            raise ValueError(
                "cannot create fingerprint: type {0} does not exist.".format(
                    fp))

        # compile similarity metric and the correspoding GIST index / KNN-GIST
        if metric == 'tanimoto':
            similarity = func.openeye.tanimoto(query, target)
            index = func.openeye.tanimoto_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<%%>)')(query)  # escape %

        elif metric == 'dice':
            similarity = func.openeye.dice(query, target)
            index = func.openeye.dice_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<#>)')(query)

        elif metric == 'manhattan':
            similarity = func.openeye.manhattan(query, target)
            index = func.openeye.manhattan_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<~>)')(query)

        elif metric == 'cosine':
            similarity = func.openeye.cosine(query, target)
            index = func.openeye.cosine_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<@>)')(query)

        elif metric == 'euclidean':
            similarity = func.openeye.euclidean(query, target)
            index = func.openeye.euclidean_is_above_limit(target, query)
            orderby = target.op('OPERATOR(openeye.<->)')(query)

        else:
            raise ValueError(
                "{} is not a valid similarity metric.".format(metric))

        query = ChemComp.query.add_column(similarity)
        query = query.join('OEFP').filter(and_(index, *expr))
        query = query.order_by(orderby)

        return query
Beispiel #5
0
    def fetch_all_by_sim(self, smi, *expr, **kwargs):
        """
        Returns all fragments that match the given SMILES string with at
        least the given similarity threshold using chemical fingerprints.

        Parameters
        ----------
        smi : str
            The query rdmol in SMILES format.
        threshold : float, default=0.5
            The similarity threshold that will be used for searching.
        fp : {'circular','atompair','torsion','maccs','layered','avalon'}
            RDKit fingerprint type to be used for similarity searching.
        *expr : BinaryExpressions, optional
            SQLAlchemy BinaryExpressions that will be used to filter the query.

        Queried Entities
        ----------------
        Fragments, FragmentRDFP

        Returns
        -------
        hits : list
            List of tuples in the form (Fragment, similarity)

        Examples
        --------
        >>> #PENDING

        Requires
        --------
        .. important:: `RDKit  <http://www.rdkit.org>`_ PostgreSQL cartridge.
        """

        session = Session()

        threshold = kwargs.get('threshold', 0.5)
        metric = kwargs.get('metric', 'tanimoto')
        fp = kwargs.get('fp', 'circular')

        if fp == 'circular':
            query = func.rdkit.morganbv_fp(smi, 2).label('queryfp')
            target = FragmentRDFP.circular_fp

        elif fp == 'torsion':
            query = func.rdkit.torsionbv_fp(smi).label('queryfp')
            target = FragmentRDFP.torsion_fp

        elif fp == 'atompair':
            query = func.rdkit.atompairbv_fp(smi).label('queryfp')
            target = FragmentRDFP.atompair_fp

        elif fp == 'maccs':
            query = func.rdkit.maccs_fp(smi).label('queryfp')
            target = FragmentRDFP.maccs_fp

        elif fp == 'layered':
            query = func.rdkit.layered_fp(smi).label('queryfp')
            target = FragmentRDFP.layered_fp

        elif fp == 'avalon':
            query = func.rdkit.avalon_fp(smi).label('queryfp')
            target = FragmentRDFP.avalon_fp

        else:
            msg = "The fingerprint type [{0}] does not exist.".format(fp)
            raise RuntimeError(msg)

        # set the similarity threshold for the index
        if metric == 'tanimoto':
            session.execute(
                text("SET rdkit.tanimoto_threshold=:threshold").
                execution_options(autocommit=True).params(threshold=threshold))
            sim_thresh = func.current_setting(
                'rdkit.tanimoto_threshold').label('sim_thresh')

            similarity = func.rdkit.tanimoto_sml(query,
                                                 target).label('similarity')
            index = func.rdkit.tanimoto_sml_op(query, target)
        elif metric == 'dice':
            session.execute(
                text("SET rdkit.dice_threshold=:threshold").execution_options(
                    autocommit=True).params(threshold=threshold))
            sim_thresh = func.current_setting('rdkit.dice_threshold').label(
                'sim_thresh')

            similarity = func.rdkit.dice_sml(query, target).label('similarity')
            index = func.rdkit.dice_sml_op(query, target)

        query = self.query.add_columns(similarity, sim_thresh)
        query = query.join('RDFP').filter(and_(index, *expr))
        query = query.order_by('similarity DESC')

        if kwargs.get('limit'):
            query = query.limit(kwargs['limit'])  #.all(

        #print query.statement

        results = query.all()
        #session.close()

        return results  # query