def requires(self): conn, c = db_functions.connectDB() exists = db_functions.table_exists(c, 'soakdb_files') if not exists: return TransferAllFedIDsAndDatafiles() else: return FindSoakDBFiles()
def requires(self): exists = db_functions.column_exists('proasis_hits', 'ligand_list') if not exists: try: conn, c = db_functions.connectDB() c.execute( 'ALTER TABLE proasis_hits ADD COLUMN ligand_list text;') conn.commit() return StartLigandSearches() except: pass try: run_list = self.get_list() return database_operations.FindProjects( ), database_operations.CheckFiles(), StartLigandSearches(), [ data_in_proasis.HitTransfer(bound_pdb=pdb, crystal=crystal_name, protein_name=protein_name, smiles=smiles_string, mod_date=modification_string, ligands=ligand_list) for (pdb, crystal_name, protein_name, smiles_string, modification_string, ligand_list) in run_list ], database_operations.FindProjects() except: return data_in_proasis.CleanUpHits()
def get_list(self): bound_list = [] crystal_list = [] protein_list = [] smiles_list = [] modification_list = [] ligand_list = [] conn, c = db_functions.connectDB() c.execute( "SELECT bound_conf, crystal_name, protein, smiles, modification_date, ligand_list, exists_2fofc, exists_fofc, exists_pdb, exists_mtz FROM proasis_hits WHERE modification_date not like '' and ligand_list not like 'None' and bound_conf not like ''" ) rows = c.fetchall() for row in rows: if '0' in [str(row[6]), str(row[7]), str(row[8]), str(row[9])]: continue #if not os.path.isfile(str('./hits/' + str(row[1]) + '_' + str(row[4]) + '.added')): bound_list.append(str(row[0])) crystal_list.append(str(row[1])) protein_list.append(str(row[2])) smiles_list.append(str(row[3])) modification_list.append(str(row[4])) ligand_list.append(str(row[5])) run_list = list( zip(bound_list, crystal_list, protein_list, smiles_list, modification_list, ligand_list)) return run_list
def find_proasis_repeats(protein): project_strucids = paf.get_strucids_from_project(protein) project_titles = [paf.get_strucid_json(strucid)['allStrucs'][0]['TITLE'].split()[-1] for strucid in project_strucids] counts = dict(Counter(project_titles)) repeats = {'crystal': [], 'strucids': [], 'bound_confs':[]} for key in counts.keys(): if counts[key] > 1: repeats['crystal'].append(key) repeats['strucids'].append([project_strucids[i] for i, x in enumerate(project_titles) if x == key]) conn, c = dbf.connectDB() for strucid_list in repeats['strucids']: bound_list = [] for struc in strucid_list: c.execute('select bound_conf from proasis_hits where strucid=%s', (struc,)) rows = c.fetchall() for row in rows: bound_list.append(str(row[0])) repeats['bound_confs'].append(bound_list) return repeats
def run(self): db_functions.transfer_data(self.data_file) conn, c = db_functions.connectDB() c.execute( 'UPDATE soakdb_files SET status_code=2 where filename like %s;', (self.data_file, )) conn.commit()
def requires(self): conn, c = dbf.connectDB() c.execute('select root_dir from proasis_out') run_list = [] for row in c.fetchall(): run_list.append(str(row[0]).replace('comp_chem', '')) return [RemoveADFiles(root_dir=direc) for direc in run_list]
def update_apo_field(): conn, c = dbf.connectDB() c.execute('SELECT root_dir FROM proasis_out') rows = c.fetchall() for row in rows: apo_file = str(str(row[0]).split('/')[-2] + '_apo.pdb') if os.path.isfile(os.path.join(str(row[0]), apo_file)): c.execute('UPDATE proasis_out SET apo_name = %s WHERE root_dir = %s', (apo_file, str(row[0]))) conn.commit()
def get_to_dock(): out_list = [] conn, c = dbf.connectDB() c.execute('SELECT root_dir FROM proasis_out') rows = c.fetchall() for row in rows: out_list.append(str(row[0])) return out_list
def run(self): out_file = self.output().path print(out_file) mol_sdf = paf.get_struc_file(self.strucid, out_file, 'sdf') conn, c = dbf.connectDB() c.execute('UPDATE proasis_out SET mol_name = %s WHERE strucid = %s', (mol_sdf.split('/')[-1], self.strucid)) conn.commit()
def requires(self): conn, c = db_functions.connectDB() exists = db_functions.column_exists('proasis_hits', 'ligand_list') if not exists: conn, c = db_functions.connectDB() c.execute('ALTER TABLE proasis_hits ADD COLUMN ligand_list text;') conn.commit() c.execute( "select bound_conf from proasis_hits where ligand_list is NULL and bound_conf is not NULL" ) rows = c.fetchall() conf_list = [] for row in rows: conf_list.append(str(row[0])) print((str(row[0]))) return database_operations.FindProjects( ), database_operations.CheckFiles(), [ data_in_proasis.FindLigands(bound_conf=conf) for conf in conf_list ]
def run(self): out_mtz = paf.get_struc_mtz( self.strucid, os.path.join(self.root_dir, self.docking_dir)) out_mtz = os.path.join(self.root_dir, self.docking_dir, out_mtz) shutil.move(out_mtz, self.output().path) conn, c = dbf.connectDB() c.execute('UPDATE proasis_out SET mtz_name = %s WHERE strucid = %s', (self.output().path.split('/')[-1], self.strucid)) conn.commit()
def run(self): out_2fofc = paf.get_struc_map( self.strucid, os.path.join(self.root_dir, self.docking_dir), '2fofc') out_2fofc = os.path.join(self.root_dir, self.docking_dir, out_2fofc) shutil.move(out_2fofc, self.output().path) conn, c = dbf.connectDB() c.execute( 'UPDATE proasis_out SET twofofc_name = %s WHERE strucid = %s', (self.output().path.split('/')[-1], self.strucid)) conn.commit()
def run(self): self.ligands = eval(self.ligands) print((len(self.ligands))) # if len(list(self.ligands))>1: # raise Exception('Structures containing more than 1 ligand are currently unsupported') conn, c = dbf.connectDB() c.execute('SELECT curated_name from proasis_out WHERE strucid=%s', (self.strucid, )) rows = c.fetchall() print((len(rows))) if len(rows) > 1: raise Exception('Multiple files where found for this structure: ' + str(rows)) if len(rows) > 0 and len(rows[0]) == 0: # raise Exception('No entries found for this strucid... check the datasource!') c.execute('DELETE from proasis_out WHERE curated_name=%s', (str(self.crystal + '_' + 'curated.pdb'), )) conn.commit() shutil.rmtree(os.path.join(self.root_dir, self.docking_dir)) raise Exception( 'DB problem... resetting the datasource and files for this crystal' ) for row in rows: curated_pdb = str(row[0]) try: print(curated_pdb) except: c.execute('DELETE from proasis_out WHERE curated_name=%s', (str(self.crystal + '_' + 'curated.pdb'), )) conn.commit() shutil.rmtree(os.path.join(self.root_dir, self.docking_dir)) raise Exception( 'DB problem... resetting the datasource and files for this crystal' ) ligand_string = paf.get_lig_strings(self.ligands) working_dir = os.getcwd() os.chdir(os.path.join(self.root_dir, self.docking_dir)) try: pdb_file = open(curated_pdb, 'r') except: raise Exception(str(rows)) for line in pdb_file: if any(lig in line for lig in ligand_string): continue else: with open(self.output().path, 'a') as f: f.write(line) os.chdir(working_dir)
def get_file_list(self, status_code): datafiles = [] fileids = [] conn, c = db_functions.connectDB() c.execute( 'SELECT filename, id FROM soakdb_files WHERE status_code = %s', (str(status_code), )) rows = c.fetchall() for row in rows: datafiles.append(str(row[0])) fileids.append(str(row[1])) out_list = list(zip(datafiles, fileids)) return out_list
def get_comp_chem_ready(): bound_list = [] run_list = [] conn, c = dbf.connectDB() c.execute("SELECT bound_conf FROM proasis_hits WHERE strucid != ''") rows = c.fetchall() for row in rows: bound_list.append(str(row[0])) c.execute('SELECT bound_conf FROM refinement WHERE bound_conf IN %s AND outcome SIMILAR TO %s', (tuple(bound_list), '(%4%|%5%)')) results = c.fetchall() for result in results: if len(result) > 0: run_list.append(str(result[0])) return run_list
def run(self): conn, c = db_functions.connectDB() c.execute('delete from lab where file_id=%s', (self.file_id, )) conn.commit() c.execute('delete from refinement where file_id=%s', (self.file_id, )) conn.commit() c.execute('delete from dimple where file_id=%s', (self.file_id, )) conn.commit() c.execute('delete from data_processing where file_id=%s', (self.file_id, )) conn.commit() db_functions.transfer_data(self.data_file) c.execute( 'UPDATE soakdb_files SET status_code=2 where filename like %s;', (self.data_file, )) conn.commit()
def get_list(self): path_list = [] protein_list = [] reference_list = [] conn, c = db_functions.connectDB() c.execute( "SELECT pandda_path, protein, reference_pdb FROM proasis_leads WHERE pandda_path !='' and pandda_path !='None' and reference_pdb !='' and reference_pdb !='None' " ) rows = c.fetchall() for row in rows: # if not os.path.isfile(str('logs/leads/' + str(row[1]) + '_' + misc_functions.get_mod_date(str(row[1])) + '.added')): path_list.append(str(row[0])) protein_list.append(str(row[1])) reference_list.append(str(row[2])) out_list = list(zip(path_list, protein_list, reference_list)) return out_list
def requires(self): conn, c = dbf.connectDB() c.execute("select crystal_name, strucid from proasis_hits where strucid !=''") rows = c.fetchall() crystal_list = [] strucid_list = [] for row in rows: crystal = str(row[0]) strucid = str(row[1]) crystal_list.append(crystal) strucid_list.append(strucid) run_list = list(zip(crystal_list, strucid_list)) return (data_in_proasis.StartHitTransfers(), EdstatsScores(crystal=crystal_name, strucid=strucid_no) for (crystal_name, strucid_no) in run_list)
def get_strucids(run_list): out_dict = {'strucid': [], 'crystal': [], 'directory': [], 'ligands': []} conn, c = dbf.connectDB() for struc in run_list: c.execute("SELECT strucid, crystal_name, ligand_list FROM proasis_hits WHERE bound_conf=%s AND " "ligand_list != 'None'", (struc,)) rows = c.fetchall() for row in rows: out_dict['strucid'].append(str(row[0])) out_dict['crystal'].append(str(row[1])) out_dict['ligands'].append(str(row[2])) if 'Refine' in struc.split('/')[-2]: pdb = str(struc.split('/')[-2] + '/' + struc.split('/')[-1]) else: pdb = struc.split('/')[-1] directory = struc.replace(pdb, '') out_dict['directory'].append(directory) return out_dict
def run(self): # connect to central postgres db conn, c = db_functions.connectDB() # use list from previous step as input to write to postgres with self.input().open('r') as database_list: for database_file in database_list.readlines(): database_file = database_file.replace('\n', '') out, err, proposal = db_functions.pop_soakdb(database_file) proposal_list = [] c.execute('SELECT proposal FROM soakdb_files') rows = c.fetchall() for row in rows: proposal_list.append(str(row[0])) for proposal_number in set(proposal_list): db_functions.pop_proposals(proposal_number) c.close() with self.output().open('w') as f: f.write('TransferFeDIDs DONE')
def export_ligand_edstats(filename): current_path = os.getcwd() path = os.path.join(current_path, filename) conn, c = dbf.connectDB() c.execute("COPY ligand_edstats TO %s DELIMITER ',' CSV HEADER;", (path, ))
def run(self): # all data necessary for uploading hits crystal_data_dump_dict = { 'crystal_name': [], 'protein': [], 'smiles': [], 'bound_conf': [], 'modification_date': [], 'strucid': [] } # all data necessary for uploading leads project_data_dump_dict = { 'protein': [], 'pandda_path': [], 'reference_pdb': [], 'strucid': [] } outcome_string = '(%3%|%4%|%5%|%6%)' conn, c = db_functions.connectDB() c.execute( '''SELECT crystal_id, bound_conf, pdb_latest FROM refinement WHERE outcome SIMILAR TO %s''', (str(outcome_string), )) rows = c.fetchall() print((str(len(rows)) + ' crystals were found to be in refinement or above')) for row in rows: c.execute( '''SELECT smiles, protein FROM lab WHERE crystal_id = %s''', (str(row[0]), )) lab_table = c.fetchall() if len(str(row[0])) < 3: continue if len(lab_table) > 1: print(('WARNING: ' + str(row[0]) + ' has multiple entries in the lab table')) # print lab_table for entry in lab_table: if len(str(entry[1])) < 2 or 'None' in str(entry[1]): protein_name = str(row[0]).split('-')[0] else: protein_name = str(entry[1]) crystal_data_dump_dict['protein'].append(protein_name) crystal_data_dump_dict['smiles'].append(entry[0]) crystal_data_dump_dict['crystal_name'].append(row[0]) crystal_data_dump_dict['bound_conf'].append(row[1]) crystal_data_dump_dict['strucid'].append('') try: modification_date = misc_functions.get_mod_date(str( row[1])) except: modification_date = '' crystal_data_dump_dict['modification_date'].append( modification_date) c.execute( '''SELECT pandda_path, reference_pdb FROM dimple WHERE crystal_id = %s''', (str(row[0]), )) pandda_info = c.fetchall() for pandda_entry in pandda_info: project_data_dump_dict['protein'].append(protein_name) project_data_dump_dict['pandda_path'].append(pandda_entry[0]) project_data_dump_dict['reference_pdb'].append(pandda_entry[1]) project_data_dump_dict['strucid'].append('') project_table = pandas.DataFrame.from_dict(project_data_dump_dict) crystal_table = pandas.DataFrame.from_dict(crystal_data_dump_dict) protein_list = set(list(project_data_dump_dict['protein'])) print(protein_list) for protein in protein_list: self.add_to_postgres(project_table, protein, ['reference_pdb'], project_data_dump_dict, 'proasis_leads') self.add_to_postgres(crystal_table, protein, ['crystal_name', 'smiles', 'bound_conf'], crystal_data_dump_dict, 'proasis_hits') with self.output().open('wb') as f: f.write('')
def run(self): conn, c = db_functions.connectDB() exists = db_functions.table_exists(c, 'soakdb_files') checked = [] # Status codes:- # 0 = new # 1 = changed # 2 = not changed if exists: with self.input().open('r') as f: files = f.readlines() for filename in files: filename_clean = filename.rstrip('\n') c.execute( 'select filename, modification_date, status_code from soakdb_files where filename like %s;', (filename_clean, )) for row in c.fetchall(): if len(row) > 0: data_file = str(row[0]) checked.append(data_file) old_mod_date = str(row[1]) current_mod_date = misc_functions.get_mod_date( data_file) if current_mod_date > old_mod_date: c.execute( 'UPDATE soakdb_files SET status_code = 1 where filename like %s;', (filename_clean, )) c.execute( 'UPDATE soakdb_files SET modification_date = %s where filename like %s;', (current_mod_date, filename_clean)) conn.commit() if filename_clean not in checked: out, err, proposal = db_functions.pop_soakdb( filename_clean) db_functions.pop_proposals(proposal) c.execute( 'UPDATE soakdb_files SET status_code = 0 where filename like %s;', (filename_clean, )) conn.commit() c.execute('select filename from soakdb_files;') # for row in c.fetchall(): # if str(row[0]) not in checked: # data_file = str(row[0]) exists = db_functions.table_exists(c, 'lab') if not exists: c.execute('UPDATE soakdb_files SET status_code = 0;') conn.commit() with self.output().open('w') as f: f.write('')
tr:nth-of-type(odd) { background: #eee; } th { background: #3498db; color: white; font-weight: bold; } td, th { padding: 10px; border: 1px solid #ccc; text-align: left; font-size:12px; }''' conn, c = dbf.connectDB() c.execute('select crystal_name from proasis_hits') rows = c.fetchall() crystal_list = [] for row in rows: crystal_list.append(str(row[0])) crystal_list = list(set(crystal_list)) protein_list = [] c.execute('select protein from proasis_hits') rows = c.fetchall() for row in rows: protein_list.append(str(row[0]))
def get_project_counts(): protein_list = [] conn, c = dbf.connectDB() c.execute('select protein from lab') rows = c.fetchall() for row in rows: if not 'null' or 'None' or 'test' in str(row[0]): protein_list.append(str(row[0])) protein_list = list(set(protein_list)) counts_dict = { 'protein': [], 'mounted': [], 'pandda_hit': [], 'refinement': [], 'comp_chem': [], 'depo': [] } for protein in protein_list: if len(protein) < 1: continue if 'None' in protein: continue if 'test' in protein: continue if 'null' in protein: continue if 'QC' in protein: continue counts_dict['protein'].append(protein) crystal_list = [] c.execute( 'select crystal_name from lab where protein = %s and mounting_result similar to %s', (str(protein), '(%Mounted%|%OK%)')) rows = c.fetchall() for row in rows: crystal_list.append(str(row[0])) crystal_list = list(set(crystal_list)) hit = 0 hits_list = [] for crystal in crystal_list: c.execute( 'select pandda_hit, crystal_name from dimple where crystal_name = %s', (crystal, )) rows2 = c.fetchall() for row2 in rows2: if str(row2[0]) == 'True': hit += 1 hits_list.append(str(row2[1])) hits_list = list(set(hits_list)) refinement = [] comp_chem = [] depo = [] for hit_name in hits_list: c.execute( 'select outcome, crystal_name from refinement where outcome similar to %s and crystal_name = %s', ('(%3%|%4%|%5%)', hit_name)) rows3 = c.fetchall() for row3 in rows3: if '3' in str(row3[0]): refinement.append(str(row3[1])) if '4' in str(row3[0]): comp_chem.append(str(row3[1])) if '5' in str(row3[0]): depo.append(str(row3[1])) refinement = list(set(refinement)) counts_dict['refinement'].append(len(refinement)) comp_chem = list(set(comp_chem)) counts_dict['comp_chem'].append(len(comp_chem)) depo = list(set(depo)) counts_dict['depo'].append(len(depo)) counts_dict['mounted'].append(len(crystal_list) - hit) counts_dict['pandda_hit'].append(hit - len(refinement) - len(comp_chem) - len(depo)) dataframe = pd.DataFrame.from_dict(counts_dict) return dataframe
def run(self): project_strucids = paf.get_strucids_from_project(self.protein) # get crystal names for protein according to db conn, c = dbf.connectDB() c.execute('select crystal_name from proasis_hits where protein=%s', (self.protein,)) rows = c.fetchall() crystal_list = [] for row in rows: crystal_list.append(str(row[0])) crystal_list = list(set(crystal_list)) db_strucids = [] # status_list = [] # good_list = [] file_checks = {'crystal': [], 'bound_state': [], 'mod_date': [], 'pdb': [], 'mtz': [], '2fofc': [], 'fofc': [], 'ligs': []} # get info for crystals identified for crystal in list(set(crystal_list)): c.execute( "select strucid, bound_conf, modification_date from proasis_hits " "where crystal_name like %s and strucid NOT LIKE ''", (crystal,)) bound_list = [] strucid_list = [] mod_date_list = [] rows = c.fetchall() for row in rows: strucid = str(row[0]) strucid_list.append(strucid) bound_conf = str(row[1]) bound_list.append(bound_conf) modification_date = str(row[2]) mod_date_list.append(modification_date) unique_bound = list(set(bound_list)) unique_modification_date = list(set(mod_date_list)) unique_strucids = list(set(strucid_list)) for ids in unique_strucids: db_strucids.append(ids) c.execute("select strucid from proasis_leads where protein=%s and strucid!=''", (self.protein,)) rows = c.fetchall() for row in rows: db_strucids.append(str(row[0])) # # get info about crystals in proasis_hits (ones identified as in refinement) that haven't made it to # # proasis # # if sum([len(unique_modification_date), len(unique_bound), len(unique_strucids)]) == 0: # # c.execute( # "select bound_conf, modification_date, exists_pdb, exists_mtz, exists_2fofc, exists_fofc, " # "ligand_list from proasis_hits where crystal_name like %s", # (crystal,)) # rows = c.fetchall() # # for row in rows: # file_checks['crystal'].append(crystal) # file_checks['bound_state'].append(str(row[0])) # file_checks['mod_date'].append(str(row[1])) # file_checks['pdb'].append(str(row[2])) # file_checks['mtz'].append(str(row[3])) # file_checks['2fofc'].append(str(row[4])) # file_checks['fofc'].append(str(row[5])) # file_checks['ligs'].append(str(row[6])) # clear up mismatching entries in_common = list(set(db_strucids) & set(project_strucids)) for strucid in db_strucids: if strucid not in in_common: print(self.protein + ': ' + strucid + ' found in database but not in proasis') print('removing entry from db...') c.execute("UPDATE proasis_hits set strucid='' where strucid=%s", (strucid,)) conn.commit() c.execute("UPDATE proasis_leads set strucid='' where strucid=%s", (strucid,)) conn.commit() print('\n') for strucid in project_strucids: if strucid not in in_common: print(self.protein + ': ' + strucid + ' found in proasis but not in db') print('removing entry from proasis...') paf.delete_structure(strucid) print('\n') # good_structures = {'crystal': [], 'bound_state': [], 'mod_date': [], 'strucid': []} # if len(set([len(unique_modification_date), len(unique_bound), len(unique_strucids)])) == 1: # status_list.append(0) # good_list.append(crystal) # for i in range(0, len(unique_bound)): # good_structures['crystal'].append(crystal) # good_structures['bound_state'].append(unique_bound[i]) # good_structures['mod_date'].append(unique_modification_date[i]) # good_structures['strucid'].append(unique_strucids[i]) # for i in range(0, len(good_structures['strucid'])): # if good_structures['strucid'][i] not in project_strucids: # print('missing or incorrect strucid in db for ' # + str(good_structures['crystal'][i] + ' (' + str(good_structures['strucid'][i]) + ')')) # for key in file_checks.keys(): # if '0' in file_checks[key]: # error_list.append(str('missing ' + str(key) + ' file!')) # # if 'None' in file_checks[key]: # error_list.append(str('None value found for ' + str(key))) # # elif len(set([len(unique_modification_date), len(unique_bound), len(unique_strucids)])) > 1: # status_list.append(1) # # error_frame = pd.DataFrame.from_dict(file_checks) # cols = ['crystal', 'bound_state', 'mod_date', 'ligs', 'mtz', 'pdb', '2fofc', 'fofc'] # error_frame = error_frame[cols] # error_frame.sort_values(by=['crystal'], inplace=True) # # good_frame = pd.DataFrame.from_dict(good_structures) # cols = ['crystal', 'bound_state', 'mod_date', 'strucid'] # good_frame = good_frame[cols] # good_frame.sort_values(by=['crystal'], inplace=True) # # # clean up repeats repeats = find_proasis_repeats(self.protein) for i, x in enumerate(repeats['crystal']): bound_list = repeats['bound_confs'][i] strucids = repeats['strucids'][i] if len(bound_list)==len(strucids): if len(list(set(bound_list)))==1: print(str('identical uploaded structures: ' + str(strucids)) + ' (' + x + ')') print('removing repeat structures from proasis, and updating database...') to_delete_strucs=strucids[1:] to_delete_confs=bound_list[1:] for j in range(0, len(to_delete_strucs)): c.execute('DELETE FROM proasis_hits WHERE strucid=%s and bound_conf=%s', (to_delete_strucs[j], to_delete_confs[j])) conn.commit() paf.delete_structure(to_delete_strucs[j]) pd.DataFrame.from_dict(repeats).to_csv('test.csv')