def parsepdb(iqmain, iqart): #general pipeline, tentative """fetch IDs from pdb that contain zinc, returns two list of IDs, one for zinc and one for zinc x-ray artifacts""" qmain = pd.make_query(iqmain, querytype='AdvancedKeywordQuery') qart = pd.make_query(iqart, querytype='AdvancedKeywordQuery') qmainl = [] qartfinal = [] for hit in pd.do_search(qmain): qmainl.append(hit) for hit in pd.do_search(qart): qartfinal.append(hit) return qmainl, qartfinal
def pdblistfilter(qmainl, iqnmr, iqxray): """filters out zinc hits for NMR or x-ray methodology""" qnmr = pd.make_query(iqnmr, querytype='ExpTypeQuery') qxray = pd.make_query(iqxray, querytype='ExpTypeQuery') qnmrl = [] qxrayl = [] for hit in pd.do_search(qnmr): qnmrl.append(hit) for hit in pd.do_search(qxray): qxrayl.append(hit) qnmrfinal = [] qxrayfinal = [] for element in qmainl: if element in qnmrl: qnmrfinal.append(element) if element in qxrayl: qxrayfinal.append(element) return qnmrfinal, qxrayfinal
def searchPDB(searchTerm): # use these entries for test purposes #pdbtest = '3vtv' #BUG: this structure uses entityNr instead of nr_entities pdbtest = '4xkl' #pdbtest = '5v4k' searchStructures = make_query(pdbtest, querytype='AdvancedKeywordQuery') #searchStructures = make_query(searchTerm, querytype='AdvancedKeywordQuery') foundStructures = do_search(searchStructures) print(foundStructures) return (foundStructures)
def find_matches(query, df): """ Search the PDB database for matches to a given query using pypdb, then cross-reference the results with the dataframe provided, and return a subset of the dataframe with matching pdbid's. Assumes that the provided which contains a column called 'pdbid' Returns: ------- df : pandas.DataFrame A subset of the provided dataframe, which only includes the pdbid's which matched the query """ # make a PDB database query and perform a search, # then convert the results to lower case search_results = [x.lower() for x in pdb.do_search(pdb.make_query(query))] return df[df['pdbid'].isin(search_results)]
shutil.rmtree(check) os.makedirs(check) break else: check=input("Please re-write the output directory again: ") # ex: /home/a/Desktop/pdb_data2 os.makedirs(check) else: os.makedirs(check) # Let me change the directory os.chdir(check) for each in my_query: search_dict = make_query(each) found_pdbs = do_search(search_dict) if len(found_pdbs)>number: found_pdbs = found_pdbs[0:number] for ids in found_pdbs: # I created all the paths for each pdb ID ext="{}.pdb".format(ids) search_path= 'https://files.rcsb.org/view' full_path=os.path.join(search_path,ext) # I created a new folder for each gene name in the given list my_new_folder= os.path.join(check,each) if not os.path.isdir(my_new_folder): os.makedirs(my_new_folder) new_full = os.path.join(my_new_folder,ext)