def parsepdb(iqmain, iqart):  #general pipeline, tentative
    """fetch IDs from pdb that contain zinc, returns two list of IDs, one for zinc and one for zinc x-ray artifacts"""
    qmain = pd.make_query(iqmain, querytype='AdvancedKeywordQuery')
    qart = pd.make_query(iqart, querytype='AdvancedKeywordQuery')
    qmainl = []
    qartfinal = []
    for hit in pd.do_search(qmain):
        qmainl.append(hit)
    for hit in pd.do_search(qart):
        qartfinal.append(hit)
    return qmainl, qartfinal
def pdblistfilter(qmainl, iqnmr, iqxray):
    """filters out zinc hits for NMR or x-ray methodology"""
    qnmr = pd.make_query(iqnmr, querytype='ExpTypeQuery')
    qxray = pd.make_query(iqxray, querytype='ExpTypeQuery')
    qnmrl = []
    qxrayl = []
    for hit in pd.do_search(qnmr):
        qnmrl.append(hit)
    for hit in pd.do_search(qxray):
        qxrayl.append(hit)
    qnmrfinal = []
    qxrayfinal = []
    for element in qmainl:
        if element in qnmrl:
            qnmrfinal.append(element)
        if element in qxrayl:
            qxrayfinal.append(element)
    return qnmrfinal, qxrayfinal
Ejemplo n.º 3
0
def searchPDB(searchTerm):

    # use these entries for test purposes
    #pdbtest = '3vtv' #BUG: this structure uses entityNr instead of nr_entities
    pdbtest = '4xkl'
    #pdbtest = '5v4k'
    searchStructures = make_query(pdbtest, querytype='AdvancedKeywordQuery')

    #searchStructures = make_query(searchTerm, querytype='AdvancedKeywordQuery')
    foundStructures = do_search(searchStructures)
    print(foundStructures)
    return (foundStructures)
Ejemplo n.º 4
0
def find_matches(query, df):
    """
    Search the PDB database for matches to a given query using pypdb,
    then cross-reference the results with the dataframe provided,
    and return a subset of the dataframe with matching pdbid's.
    Assumes that the provided which contains a column called 'pdbid'

    Returns:
    -------
    df : pandas.DataFrame
        A subset of the provided dataframe, which only includes the
        pdbid's which matched the query
    """
    # make a PDB database query and perform a search,
    # then convert the results to lower case
    search_results = [x.lower() for x in pdb.do_search(pdb.make_query(query))]
    return df[df['pdbid'].isin(search_results)]
Ejemplo n.º 5
0
            shutil.rmtree(check)
            os.makedirs(check)
            break
        else:
            check=input("Please re-write the output directory again: ") # ex: /home/a/Desktop/pdb_data2
            os.makedirs(check)

    else:
        os.makedirs(check)

# Let me change the directory
os.chdir(check)

for each in my_query:
    search_dict = make_query(each)
    found_pdbs = do_search(search_dict)
    if len(found_pdbs)>number:
        found_pdbs = found_pdbs[0:number]
    for ids in found_pdbs:
        # I created all the paths for each pdb ID
        ext="{}.pdb".format(ids)
        search_path= 'https://files.rcsb.org/view'
        full_path=os.path.join(search_path,ext)

        # I created a new folder for each gene name in the given list

        my_new_folder= os.path.join(check,each)
        if not os.path.isdir(my_new_folder):
            os.makedirs(my_new_folder)

        new_full = os.path.join(my_new_folder,ext)