#filesPDBRoot ='C:/Dev/Github/ProteinDataFiles/pdb_data/' #filesADJRoot ='C:/Dev/Github/ProteinDataFiles/pdb_out/Fov2_ADJ/' #adjusted on Fo at 3 degrees thevenaz #loadPath = 'C:/Dev/Github/BbkProject/PhDThesis/5.Chapters/1_Summer/CSV/' #printPath = 'C:/Dev/Github/BbkProject/PhDThesis/5.Chapters/1_Summer/Data/' geos = [ 'TAU', 'TAU+1', 'TAU-1', 'CA:C:O', 'O:C:N+1', 'CA-1:CA:CA+1', 'N:CA:O', 'CA:O:N+1', 'O-1:N:CA', 'O-1:C-1', 'C-1:N', 'N:CA', 'CA:C', 'C:O', 'C:N+1', 'N+1:CA+1', 'CA+1:C+1', 'C+1:O+1', 'PHI', 'PSI', 'OMEGA', 'CA-1:C-1:N:CA', 'CA-1:CA', 'CA:CA+1', 'C-1:C', 'C:C+1', 'N-1:N', 'N:N+1', 'CA-1:N', 'CA-1:O-1', 'O-1:N', 'C-1:CA', 'N:C', 'CA:O', 'CA:N+1', 'O:N+1', 'C:CA+1', 'N+1:C+1', 'O-1:CA', 'N:O', 'O:CA+1', 'N+1:O+1', 'N-1:O-1' ] print('### CREATING cut csv file ###') pdbListIn = help.getPDBList() print("---- Getting bad atom list--------") badAtoms = help.getBadAtomsListFromFile( ) # Get the bad atoms list we will use to reduce the list further print("---- Making unrestricted--------") #dataPdbRes = help.makeCsv('PDB', pdbListIn, geos, [],False) dataPdbRes = pd.read_csv(help.loadPath + "bb_restricted_a.csv") dataPdbRes.to_csv(help.loadPath + "bb_restricted_a.csv", index=False) dataPdbRes = help.applyRestrictions(dataPdbRes, True, True, True, True, False) dataPdbRes.to_csv(help.loadPath + "bb_restricted_b.csv", index=False) dataPdbRes = help.embellishCsv(dataPdbRes) print("---- Save to", help.loadPath + "bb_restricted.csv", '-------')
import pandas as pd import Ch000_Functions as help from PsuGeometry import GeoReport as psu pdblist = help.getPDBList100() pdblist.sort() #pdblist = pdblist[:10] hueList = ['aa', 'rid', 'bfactor', 'pdbCode', 'bfactorRatio', 'disordered','occupancy','dssp'] dsspPrintPath = '../../PdbLists/' georep = psu.GeoReport(pdblist, help.pdbDataPathLx, help.edDataPath, dsspPrintPath, ed=False, dssp=True, includePdbs=False, keepDisordered=True) datacsv = georep.getGeoemtryCsv(['N:CA'],hueList) datacsv = datacsv[['pdbCode','chain','rid','aa','dssp']] print(datacsv) if False:#don;t accidentally run this and replace it datacsv.to_csv(dsspPrintPath + 'dssp.csv', index=False) print(datacsv)
import pandas as pd import Ch000_Functions as help from PsuGeometry import GeoPdb as geopdb from PsuGeometry import CloseContact as geocc from PsuGeometry import GeoReport as psu pdbListIn = help.getPDBList() contactslist = [] for pdb in pdbListIn: print(pdb) try: import os.path iscc = os.path.isfile((help.loadPath + "CloseContacts/CloseContacts_" + pdb + ".csv").lower()) if iscc: ccdata = pd.read_csv(help.loadPath + "CloseContacts/CloseContacts_" + pdb + ".csv") ccdata['ridA'] = ccdata['ridA'].astype(str) ccdata['ChRid'] = ccdata['chainA'] + ccdata['ridA'] cc = ccdata[['pdbCode', 'ChRid']].groupby('ChRid').agg('count') cc['ChRid'] = cc.index cc.columns = ['Contacts', 'ChRid'] cc['pdbCode'] = pdb cc['CID'] = cc['pdbCode'] + cc['ChRid'] contactslist.append(cc) except: print('Error with', pdb) ccall = pd.concat(contactslist) ccall.to_csv(help.loadPath + "Contacts_List.csv", index=False) print('Merging')
#printPath = 'C:/Dev/Github/BbkProject/PhDThesis/5.Chapters/1_Summer/Data/' geos = [ 'TAU', 'TAU+1', 'TAU-1', 'CA:C:O', 'O:C:N+1', 'CA-1:CA:CA+1', 'N:CA:O', 'CA:O:N+1', 'O-1:N:CA', 'O-1:C-1', 'C-1:N', 'N:CA', 'CA:C', 'C:O', 'C:N+1', 'N+1:CA+1', 'CA+1:C+1', 'C+1:O+1', 'PHI', 'PSI', 'OMEGA', 'CA-1:C-1:N:CA', 'CA-1:CA', 'CA:CA+1', 'C-1:C', 'C:C+1', 'N-1:N', 'N:N+1', 'CA-1:N', 'CA-1:O-1', 'O-1:N', 'C-1:CA', 'N:C', 'CA:O', 'CA:N+1', 'O:N+1', 'C:CA+1', 'N+1:C+1', 'O-1:CA', 'N:O', 'O:CA+1', 'N+1:O+1', 'N-1:O-1' ] title = 'Backbone Report' fileName = 'backbone' print('### CREATING csv files ###') pdbListIn = help.getPDBList() #we want to look at ALL adjusted without the bad list print("---- Making adjusted--------") dataPdbAdj = help.makeCsv('ADJUSTED', pdbListIn, geos, [], False) dataPdbAdj = help.applyRestrictions(dataPdbAdj) dataPdbAdj = help.embellishCsv(dataPdbAdj) # embellish with dssp - the dssp file was created ages ago from the linux laptop pdbdssp = pd.read_csv( 'C:/Dev/Github/BbkProject/PhDThesis/5.Chapters/1_Summer/CSV/CsvGeos_BEST_Set0DSSPALL.csv' ) pdbdata = pd.read_csv('../../PdbLists/Pdbs_100.csv') #embellish with dssp, resolution and software
import pandas as pd import Ch000_Functions as help import matplotlib print(matplotlib.__version__) geos = [ 'TAU', 'TAU+1', 'TAU-1', 'CA:C:O', 'O:C:N+1', 'CA-1:CA:CA+1', 'N:CA:O', 'CA:O:N+1', 'O-1:N:CA', 'O-1:C-1', 'C-1:N', 'N:CA', 'CA:C', 'C:O', 'C:N+1', 'N+1:CA+1', 'CA+1:C+1', 'C+1:O+1', 'PHI', 'PSI', 'OMEGA', 'CA-1:C-1:N:CA', 'CA-1:CA', 'CA:CA+1', 'C-1:C', 'C:C+1', 'N-1:N', 'N:N+1', 'CA-1:N', 'CA-1:O-1', 'O-1:N', 'C-1:CA', 'N:C', 'CA:O', 'CA:N+1', 'O:N+1', 'C:CA+1', 'N+1:C+1', 'O-1:CA', 'N:O', 'O:CA+1', 'N+1:O+1', 'N-1:O-1' ] print('### CREATING csv files ###') pdbListIn = help.getPDBList() print("---- Getting bad atom list--------") badAtoms = help.getBadAtomsListFromFile( ) # Get the bad atoms list we will use to reduce the list further print("---- Making adjusted--------") dataPdbAdj = help.makeCsv('ADJUSTEDLAP', pdbListIn, geos, badAtoms, False) #dataPdbAdj = pd.read_csv(help.loadPath + "bblap_adjusted_a.csv") dataPdbAdj.to_csv(help.loadPath + "bblap_adjusted_a.csv", index=False) dataPdbAdj = help.embellishCsv(dataPdbAdj) dataPdbAdj = help.applyRestrictions(dataPdbAdj, True, True, True, True, False) dataPdbAdj.to_csv(help.loadPath + "bblap_adjusted_b.csv", index=False) print("---- Save to", help.loadPath + "bblap_adjusted.csv", '-------')
In this file we compare individual geos to see if any pdbs are problematic ''' import pandas as pd import Ch000_Functions as help from PsuGeometry import GeoReport as psu print('### LOADING csv files ###') dataPdbUn = pd.read_csv(help.loadPath + "bb_unrestricted.csv") dataPdbRes = pd.read_csv(help.loadPath + "bb_restricted.csv") dataPdbCut = pd.read_csv(help.loadPath + "bb_reduced.csv") dataPdbAdj = pd.read_csv(help.loadPath + "bbden_adjusted.csv") dataPdbLap = pd.read_csv(help.loadPath + "bblap_adjusted.csv") # ensure data is correctly restricted dataPdbUn = help.applyRestrictions(dataPdbUn, True, False, False, False, False) dataPdbRes = help.applyRestrictions(dataPdbRes, True, True, True, True, False) dataPdbCut = help.applyRestrictions(dataPdbCut, True, True, True, True, True) dataPdbAdj = help.applyRestrictions(dataPdbAdj, True, True, True, False, True) dataPdbLap = help.applyRestrictions(dataPdbLap, True, True, True, False, True) tag = '' #SHale we cut on bfactor factor? BFactorFactor = True if BFactorFactor: tag = '_bff' dataPdbRes = dataPdbRes.query('bfactorRatio <= 1.2') dataPdbCut = dataPdbCut.query('bfactorRatio <= 1.2') dataPdbAdj = dataPdbAdj.query('bfactorRatio <= 1.2') dsspList = dataPdbUn["dssp"].unique()
'C-1:C', 'C:C+1', 'N-1:N', 'N:N+1', 'CA-1:N', 'CA-1:O-1', 'O-1:N', 'C-1:CA', 'N:C', 'CA:O', 'CA:N+1', 'O:N+1', 'C:CA+1', 'N+1:C+1', 'O-1:CA', 'N:O', 'O:CA+1', 'N+1:O+1', 'N-1:O-1' ] title = 'Finding evidential residues' fileName = 'evidential' print('### LOADING csv files ###' ) # bit rubbish but we didn;t change the object references with dssp dataPdbCut = pd.read_csv(help.loadPath + "bb_reduced.csv") dataPdbAdj = pd.read_csv(help.loadPath + "bbden_adjusted.csv") dataPdbLap = pd.read_csv(help.loadPath + "bblap_adjusted.csv") # Find restrictions #Reduced On lap-diff <0.02 ev1DataPdbCut = help.applyRestrictions(dataPdbCut, True, True, True, True, True) ev1DataPdbAdj = help.applyRestrictions(dataPdbAdj, True, True, True, False, True) ev1DataPdbLap = help.applyRestrictions(dataPdbLap, True, True, True, False, True) #Reduced on resolution ev2DataPdbCut = ev1DataPdbCut.query('RES < 0.85') ev2DataPdbAdj = ev1DataPdbAdj.query('RES < 0.85') ev2DataPdbLap = ev1DataPdbLap.query('RES < 0.85') print('### Creating scatter files ###') georep = psu.GeoReport([], "", "", help.printPath,
'C-1:CA', 'N:C', 'CA:O', 'CA:N+1', 'O:N+1', 'C:CA+1', 'N+1:C+1', 'O-1:CA', 'N:O', 'O:CA+1', 'N+1:O+1', 'N-1:O-1' ] title = 'Backbone Report' fileName = 'backbone' print('### LOADING csv files ###' ) # bit rubbish but we didn;t change the object references with dssp dataPdbUn = pd.read_csv(help.loadPath + "bb_unrestricted.csv") dataPdbRes = pd.read_csv(help.loadPath + "bb_restricted.csv") dataPdbCut = pd.read_csv(help.loadPath + "bb_reduced.csv") dataPdbAdj = pd.read_csv(help.loadPath + "bbden_adjusted.csv") dataPdbLap = pd.read_csv(help.loadPath + "bblap_adjusted.csv") # ensure data is correctly restricted dataPdbUn = help.applyRestrictions(dataPdbUn, True, False, False, False, False) dataPdbRes = help.applyRestrictions(dataPdbRes, True, True, True, True, False) dataPdbCut = help.applyRestrictions(dataPdbCut, True, True, True, True, True) dataPdbAdj = help.applyRestrictions(dataPdbAdj, True, True, True, False, True) dataPdbLap = help.applyRestrictions(dataPdbLap, True, True, True, False, True) tag = '_lap' #SHale we cut on bfactor factor? BFactorFactor = False if BFactorFactor: tag = '_bff' dataPdbCut = dataPdbCut.query('bfactorRatio <= 1.2') dataPdbRes = dataPdbRes.query('bfactorRatio <= 1.2') dataPdbAdj = dataPdbAdj.query('bfactorRatio <= 1.2') dataPdbLap = dataPdbLap.query('bfactorRatio <= 1.2')
createOrLoad = "CREATE" # CREATE or LOAD if createOrLoad == "LOAD": print('### CREATING csv files ###') pdbdata = pd.read_csv('../../PdbLists/Pdbs_70.csv') pdbListA = pdbdata['PDB'].tolist()[0:] pdbListIn = [] for pdb in pdbListA: import os.path if os.path.isfile((filesADJRoot + 'pdb' + pdb + '.ent').lower()): pdbListIn.append(pdb.lower()) else: print('No file:', (filesADJRoot + 'pdb' + pdb + '.ent').lower()) print(pdbListIn) print("---- Getting bad atom list--------") badAtoms = help.getBadAtomsListFromFile( loadPath, "badatoms.csv" ) # Get the bad atoms list we will use to reduce the list further print("---- Making unrestricted--------") dataPdbUn = help.makeCsv('PDB', pdbListIn, geos, [], True) print("---- Making unrestricted--------") dataPdbRes = help.makeCsv('PDB', pdbListIn, geos, [], False) dataPdbRes = help.applyRestrictions(dataPdbRes) print("---- Making reduced--------") dataPdbCut = help.makeCsv('PDB', pdbListIn, geos, badAtoms, False) dataPdbCut = help.applyRestrictions(dataPdbCut) print("---- Making adjusted--------") dataPdbAdj = help.makeCsv('ADJUSTED', pdbListIn, geos, badAtoms, False) dataPdbAdj = help.applyRestrictions(dataPdbAdj) # embellish with dssp - the dssp file was created ages ago from the linux laptop pdbdssp = pd.read_csv( 'C:/Dev/Github/BbkProject/PhDThesis/5.Chapters/1_Summer/CSV/CsvGeos_BEST_Set0DSSPALL.csv'
#loadPath = 'C:/Dev/Github/BbkProject/PhDThesis/5.Chapters/1_Summer/CSV/' #printPath = 'C:/Dev/Github/BbkProject/PhDThesis/5.Chapters/1_Summer/Data/' title = 'Hydrogen bonding Report' fileName = 'hydrogenbonding' print('### LOADING csv files ###' ) # bit rubbish but we didn;t change the object references with dssp #dataPdbUn = pd.read_csv(help.loadPath + "hb_unrestricted.csv") #dataPdbRes = pd.read_csv(help.loadPath + "hb_restricted.csv") dataPdbCut = pd.read_csv(help.loadPath + "hb_reduced.csv") dataPdbAdj = pd.read_csv(help.loadPath + "hb_adjusted.csv") # ensure data is correctly restricted #dataPdbUn = help.applyRestrictions(dataPdbUn,True,False,False,False) dataPdbCut = help.applyRestrictions(dataPdbCut, True, True, True, True) #dataPdbRes = help.applyRestrictions(dataPdbRes,True,True,True,True) dataPdbAdj = help.applyRestrictions(dataPdbAdj, True, True, True, False) tag = '' #SHale we cut on bfactor factor? BFactorFactor = False if BFactorFactor: tag = '_bff' dataPdbCut = dataPdbCut.query('bfactorRatio <= 1.2') #dataPdbRes = dataPdbRes.query('bfactorRatio <= 1.2') dataPdbAdj = dataPdbAdj.query('bfactorRatio <= 1.2') print('### Creating scatter files ###') ''' geos = ['TAU','TAU+1',
descdataPdbCut = pd.read_csv(help.loadPath + "DescribeGeos_Cut.csv") descdataPdbAdj = pd.read_csv(help.loadPath + "DescribeGeos_AdjustedMax.csv") descdataPdbLap = pd.read_csv(help.loadPath + "DescribeGeos_AdjustedLap.csv") print('### Creating scatter files ###') geoTriosA = [ ['C:O mean'],['C:O 50%'], ['N:CA mean'],['N:CA 50%'], ['CA:C mean'],['CA:C 50%'], ['C:N+1 mean'],['C:N+1 50%'], ['TAU mean'],['TAU 50%'], ['C:O mean', 'C:O count', 'C:O 50%', False], ['C:O mean', 'N:CA mean', 'CA:C mean',False], ] namesCsvs = [] namesCsvs.append(["Unrestricted",descdataPdbUn]) namesCsvs.append(["Restricted",descdataPdbRes]) namesCsvs.append(["Reduced",descdataPdbCut]) namesCsvs.append(["Density Adjusted",descdataPdbAdj]) namesCsvs.append(["Laplacian Adjusted",descdataPdbLap]) help.trioReports(namesCsvs, geoTriosA, title,help.printPath,fileName + "")
import Ch000_Functions as help import matplotlib print(matplotlib.__version__) geos = ['TAU','TAU+1','TAU-1','CA:C:O','O:C:N+1','CA-1:CA:CA+1', 'N:CA:O','CA:O:N+1','O-1:N:CA', 'O-1:C-1','C-1:N','N:CA','CA:C','C:O','C:N+1','N+1:CA+1','CA+1:C+1','C+1:O+1', 'PHI','PSI','OMEGA','CA-1:C-1:N:CA', 'CA-1:CA','CA:CA+1','C-1:C','C:C+1','N-1:N','N:N+1', 'CA-1:N','CA-1:O-1','O-1:N','C-1:CA','N:C','CA:O','CA:N+1','O:N+1','C:CA+1','N+1:C+1', 'O-1:CA','N:O','O:CA+1','N+1:O+1','N-1:O-1'] print('### CREATING cut csv file ###') pdbListIn = help.getPDBList() print("---- Getting bad atom list--------") badAtoms = help.getBadAtomsListFromFile() # Get the bad atoms list we will use to reduce the list further print("---- Making unrestricted--------") dataPdbUn = help.makeCsv('PDB', pdbListIn, geos, [],True) #dataPdbUn = pd.read_csv(help.loadPath + "bb_unrestricted_a.csv") dataPdbUn.to_csv(help.loadPath + "bb_unrestricted_a.csv", index=False) dataPdbUn = help.applyRestrictions(dataPdbUn,True,False,False,False,False) dataPdbUn.to_csv(help.loadPath + "bb_unrestricted_b.csv", index=False) dataPdbUn = help.embellishCsv(dataPdbUn) print("---- Save to",help.loadPath + "bb_unrestricted.csv",'-------') dataPdbUn.to_csv(help.loadPath + "bb_unrestricted.csv", index=False)