def findStrucs(query: str) -> pd.DataFrame: ''' Finds structures matching a RCSB PDB query, and returns the dataframe with their information. ''' try: search_dict = pypdb.Query( query, query_type="sequence" ) # create a dictionary containing search information # NOTE: ONLY finds the first 500 for now, to limit download size! found = search_dict.search( search_dict)[:500] # create a list of these PDBs by searching RCSB metadata = [] # create a list with the information and the metadata for proteins in found: # for items in # for the items in the list, metadata.append( pypdb.describe_pdb(proteins)) # append the dictionary return pd.DataFrame(metadata) # convert, return a Pandas DF except: # if no search results are found, return an empty df to be caught in downstream functions. print("There were no search results found for the query: " + query) return pd.DataFrame()
# %% query = input("Supply a query (term, accession number, etc.) :") query = "nuclear receptor" # Tag the time right when the query is entered now = datetime.datetime.now() def now_dir_ts(): now_ts = str(now.year)+"_"+str(now.month)+"_"+str(now.day)+"_"+str(now.hour)+"_"+str(now.minute)+"_"+str(now.second) return now_ts now = now_dir_ts() PDB_dl_dir = "ds_"+now search_dict = pypdb.Query(query) # create a dictionary containing search information found = search_dict.search(search_dict)[:500] # create a list of these PDBs by searching RCSB # create a list with the information and the metadata metadata = [] for proteins in found: # for items in # for the items in the list, metadata.append(pypdb.describe_pdb(proteins)) # append the dictionary # Save the metadata list as a CSV file dfm = pd.DataFrame(metadata) # convert to a Pandas DF dfm.to_csv('metadata_'+now+'.csv') # save as a CSV file # %% parser = PDBParser() # create a parser pdbs = list()