コード例 #1
0
    def __init__(self, xmlQuery):

        results = post_query(xmlQuery)

        self.entityLevel = (len(results) > 0) and (":" in results[0])
        self.structureIds = list(set(results))
        self.exclusive = False
コード例 #2
0
    def __init__(self,
                 smiles,
                 match_type=SUBSTRUCTURE_STEREOSPECIFIC,
                 percentSimilarity=0.0):
        '''Constructor to setup filter that matches any entry with at least one
        chemical component that matches the specified SMILES string using the
        specified query type.

        For details see:
        `Chemical Structure Search <http://www.rcsb.org/pdb/staticHelp.do?p=help/advancedsearch/chemSmiles.html>`_

        Parameters
        ----------
        smiles : str
           SMILES string representing chemical structure
        match_type : str
           One of the 5 supported types
        percentSimilarity : float
           percent similarity for similarity search. This parameter is ignored
           for all other query types [default: 0.0]
        '''

        max_rows = 1000

        query = ('{'
                 '"query": {'
                 '"type": "terminal",'
                 '"service": "chemical",'
                 '"parameters": {'
                 f'"value": "{smiles}",'
                 '"type": "descriptor",'
                 '"descriptor_type": "SMILES",'
                 f'"match_type": "{match_type}"'
                 '}'
                 '},'
                 '"return_type": "entry",'
                 '"request_options": {'
                 '"pager": {'
                 '"start": 0,'
                 f'"rows": {max_rows}'
                 '},'
                 '"scoring_strategy": "combined",'
                 '"sort": ['
                 '{'
                 '"sort_by": "score",'
                 '"direction": "desc"'
                 '}'
                 ']'
                 '}'
                 '}')

        result_type, identifiers, scores = post_query(query)
        self.result_type = result_type

        self.structureIds = set()
        for identifier, score in zip(identifiers, scores):
            if (score * 100.0 >= percentSimilarity):
                self.structureIds.add(identifier)
コード例 #3
0
    def __init__(self, query):

        result_type, results, scores = post_query(query)
        self.result_type = result_type
        #self.entityLevel = (len(results) > 0) and (":" in results[0])
        self.entityLevel = result_type == 'polymer_entity'

        #print('result_type:', result_type, 'entityLevel:', self.entityLevel)
        self.structureIds = list(set(results))
        #print('structureIds:', self.structureIds)
        self.exclusive = False
コード例 #4
0
def get_dataset(xmlQuery):
    """
    Runs an RCSB PDB Advanced Search web service using an XML query description.
    See https://www.rcsb.org/pdb/staticHelp.do?p=help/advancedSearch.html Advanced Search
    The returned dataset contains the following field dependent on the query type:
    # structureId, e.g., 1STP
    # structureChainId, e.g., 4HHB.A
    # ligandId, e.g., HEM

    :param xmlQuery: RCSB PDB advanced query xml string
    :return: dataset with matching ids
    """

    # run advanced query
    ids = post_query(xmlQuery)

    # convert list of ids to a list of lists (required for dataframe creation below)
    id_list = [[i] for i in ids]

    # convert list of lists to a dataframe
    spark = SparkSession.builder.getOrCreate()

    # distinguish 3 types of results based on length of string
    # structureId: 4 (e.g., 4HHB)
    # structureEntityId: > 4 (e.g., 4HHB:1)
    # entityId: < 4 (e.g., HEM)

    if len(ids[0]) > 4:
        ds: DataFrame = spark.createDataFrame(id_list, ['pdbEntityId'])
        # if results contain an entity id, e.g., 101M:1, then map entityId to pdbChainId
        ds = ds.withColumn("pdbId", substring_index(ds.pdbEntityId, ':', 1))
        ds = ds.withColumn("entityId", substring_index(ds.pdbEntityId, ':',
                                                       -1))
        mapping = __get_entity_to_chain_id()
        ds = ds.join(mapping, (ds.pdbId == mapping.structureId) &
                     (ds.entityId == mapping.entity_id))
        ds = ds.select(ds.pdbChainId)
    elif len(ids[0]) < 4:
        ds: DataFrame = spark.createDataFrame(id_list, ['ligandId'])
    else:
        ds: DataFrame = spark.createDataFrame(id_list, ['pdbId'])

    return ds