def __init__(self, xmlQuery): results = post_query(xmlQuery) self.entityLevel = (len(results) > 0) and (":" in results[0]) self.structureIds = list(set(results)) self.exclusive = False
def __init__(self, smiles, match_type=SUBSTRUCTURE_STEREOSPECIFIC, percentSimilarity=0.0): '''Constructor to setup filter that matches any entry with at least one chemical component that matches the specified SMILES string using the specified query type. For details see: `Chemical Structure Search <http://www.rcsb.org/pdb/staticHelp.do?p=help/advancedsearch/chemSmiles.html>`_ Parameters ---------- smiles : str SMILES string representing chemical structure match_type : str One of the 5 supported types percentSimilarity : float percent similarity for similarity search. This parameter is ignored for all other query types [default: 0.0] ''' max_rows = 1000 query = ('{' '"query": {' '"type": "terminal",' '"service": "chemical",' '"parameters": {' f'"value": "{smiles}",' '"type": "descriptor",' '"descriptor_type": "SMILES",' f'"match_type": "{match_type}"' '}' '},' '"return_type": "entry",' '"request_options": {' '"pager": {' '"start": 0,' f'"rows": {max_rows}' '},' '"scoring_strategy": "combined",' '"sort": [' '{' '"sort_by": "score",' '"direction": "desc"' '}' ']' '}' '}') result_type, identifiers, scores = post_query(query) self.result_type = result_type self.structureIds = set() for identifier, score in zip(identifiers, scores): if (score * 100.0 >= percentSimilarity): self.structureIds.add(identifier)
def __init__(self, query): result_type, results, scores = post_query(query) self.result_type = result_type #self.entityLevel = (len(results) > 0) and (":" in results[0]) self.entityLevel = result_type == 'polymer_entity' #print('result_type:', result_type, 'entityLevel:', self.entityLevel) self.structureIds = list(set(results)) #print('structureIds:', self.structureIds) self.exclusive = False
def get_dataset(xmlQuery): """ Runs an RCSB PDB Advanced Search web service using an XML query description. See https://www.rcsb.org/pdb/staticHelp.do?p=help/advancedSearch.html Advanced Search The returned dataset contains the following field dependent on the query type: # structureId, e.g., 1STP # structureChainId, e.g., 4HHB.A # ligandId, e.g., HEM :param xmlQuery: RCSB PDB advanced query xml string :return: dataset with matching ids """ # run advanced query ids = post_query(xmlQuery) # convert list of ids to a list of lists (required for dataframe creation below) id_list = [[i] for i in ids] # convert list of lists to a dataframe spark = SparkSession.builder.getOrCreate() # distinguish 3 types of results based on length of string # structureId: 4 (e.g., 4HHB) # structureEntityId: > 4 (e.g., 4HHB:1) # entityId: < 4 (e.g., HEM) if len(ids[0]) > 4: ds: DataFrame = spark.createDataFrame(id_list, ['pdbEntityId']) # if results contain an entity id, e.g., 101M:1, then map entityId to pdbChainId ds = ds.withColumn("pdbId", substring_index(ds.pdbEntityId, ':', 1)) ds = ds.withColumn("entityId", substring_index(ds.pdbEntityId, ':', -1)) mapping = __get_entity_to_chain_id() ds = ds.join(mapping, (ds.pdbId == mapping.structureId) & (ds.entityId == mapping.entity_id)) ds = ds.select(ds.pdbChainId) elif len(ids[0]) < 4: ds: DataFrame = spark.createDataFrame(id_list, ['ligandId']) else: ds: DataFrame = spark.createDataFrame(id_list, ['pdbId']) return ds