cutoff = 0 reloadPDB = False reloadCSV = False extraTag = '3_50' pdbs = help.getList('70', cutoff, pdbSet + '_ADJ') #pdbs = ['1ucs','4zm7'] realCsv, badRealCsv, occRealCsv = help.getMaximaDiffs(pdbSet, pdbs, False) badAtoms = help.getBadList(realCsv, badRealCsv, occRealCsv, 0.05) if not reloadCSV: dataPdbUn = help.getCsv('UNRESTRICTED', pdbs, [], [], reloadPDB, reloadCSV, aa='ALL', includeCis=False, allAtoms=True, bFactorFactor=-1, cutoff=cutoff) dataPdb = help.getCsv('RESTRICTED', pdbs, [], [], reloadPDB, reloadCSV, aa='ALL', includeCis=False, allAtoms=False, bFactorFactor=1.3, cutoff=cutoff) dataPdbCut = help.getCsv('RESTRICTED_CUT', pdbs, [], [],
def createGeosFile(pdbSet,pdbs, badAtoms, cutOff, tag): print('----------start create geos csv----------') startx = time.time() printPath = help.rootPath + '/BbkProject/PhDThesis/0.Papers/3.DefensibleGeometry/EvidencedSet/DataA/' print('Running CreateGeosFile on',pdbSet) allAtoms = False bFactorFactor = 1.3 if pdbSet == 'UNRESTRICTED': allAtoms = True bFactorFactor = -1 geos = ['N:CA', 'CA:C', 'C:O', 'C-1:N', 'C:N+1'] print('Create csv 1', pdbSet) if pdbSet == 'UNRESTRICTED' or pdbSet == 'RESTRICTED': csv1 = help.getCsv('PDB', pdbs,geos,badAtoms,True, True,aa='ALL',includeCis=False,allAtoms=allAtoms, bFactorFactor=bFactorFactor,cutoff=cutOff) else: csv1 = help.getCsv(pdbSet, pdbs, geos, badAtoms, True, True, aa='ALL', includeCis=False, allAtoms=allAtoms,bFactorFactor=bFactorFactor, cutoff=cutOff) csv1.to_csv(printPath + 'CsvGeos_BEST_' + 'Set1BONDALL_' + pdbSet + tag + '.csv', index=False) geos = ['TAU', 'C-1:N:CA', 'CA:C:N+1', 'CA:C:O', 'O:C:N+1', 'CA:C:N+1'] print('Create csv 2', pdbSet) if pdbSet == 'UNRESTRICTED' or pdbSet == 'RESTRICTED': csv2 = help.getCsv('PDB', pdbs,geos,badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms, bFactorFactor=bFactorFactor, cutoff=cutOff) else: csv2 = help.getCsv(pdbSet, pdbs, geos, badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms,bFactorFactor=bFactorFactor, cutoff=cutOff) csv2.to_csv(printPath + 'CsvGeos_BEST_' + 'Set2ANGSALL_' + pdbSet + tag + '.csv', index=False) geos = ['PHI', 'PSI', 'OMEGA', 'CA-1:C-1:N:CA'] print('Create csv 3', pdbSet) if pdbSet == 'UNRESTRICTED' or pdbSet == 'RESTRICTED': csv3 = help.getCsv('PDB', pdbs,geos,badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms, bFactorFactor=bFactorFactor, cutoff=cutOff) else: csv3 = help.getCsv(pdbSet, pdbs, geos, badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms, bFactorFactor=bFactorFactor, cutoff=cutOff) csv3.to_csv(printPath + 'CsvGeos_BEST_' + 'Set3DIHSALL_' + pdbSet + tag + '.csv', index=False) geos = ['N:N+1', 'N:C'] print('Create csv 4', pdbSet) if pdbSet == 'UNRESTRICTED' or pdbSet == 'RESTRICTED': csv4 = help.getCsv('PDB', pdbs,geos,badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms,bFactorFactor=bFactorFactor, cutoff=cutOff) else: csv4 = help.getCsv(pdbSet, pdbs, geos, badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms, bFactorFactor=bFactorFactor, cutoff=cutOff) csv4.to_csv(printPath + 'CsvGeos_BEST_' + 'Set4DISTALL_' + pdbSet + tag + '.csv', index=False) geos = ['N:O-2', 'C:O-2', 'N:CA:C:O-2', 'N:CA:N+1:O-2'] print('Create csv 5', pdbSet) if pdbSet == 'UNRESTRICTED' or pdbSet == 'RESTRICTED': csv5 = help.getCsv('PDB', pdbs,geos,badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms,bFactorFactor=bFactorFactor, cutoff=cutOff) else: csv5 = help.getCsv(pdbSet, pdbs, geos, badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms, bFactorFactor=bFactorFactor, cutoff=cutOff) csv5.to_csv(printPath + 'CsvGeos_BEST_' + 'Set5HBALL_' + pdbSet + tag + '.csv', index=False) geos = ['N:O-3', 'C:O-3', 'N:CA:C:O-3', 'N:CA:N+1:O-3'] print('Create csv 6', pdbSet) if pdbSet == 'UNRESTRICTED' or pdbSet == 'RESTRICTED': csv6 = help.getCsv('PDB', pdbs,geos,badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms,bFactorFactor=bFactorFactor, cutoff=cutOff) else: csv6 = help.getCsv(pdbSet, pdbs, geos, badAtoms, False, True, aa='ALL', includeCis=False, allAtoms=allAtoms, bFactorFactor=bFactorFactor, cutoff=cutOff) csv6.to_csv(printPath + 'CsvGeos_BEST_' + 'Set6HBALL_' + pdbSet + tag + '.csv', index=False) print('----------Finished----------') endx = time.time() time_diff = endx - startx timestring = str(int(time_diff / 60)) + "m " + str(int(time_diff % 60)) + "s" print(timestring)
#create.createGeosFile(pdbSet,0) # B. Merge the csv files together for comparative analaysis #merge.mergeCsvs(pdbSet) ''' Engh&Huber Comparisons ''' # C. Compare distributions with E&H values #compare.EHCompare(pdbSet) ''' SCATTERS AND HISTOGRAMS ''' # I. Scatters geos = ['C:N+1', 'N:N+1','TAU', 'PSI', 'PHI', 'C:O','O:C:N+1','CA:C:N+1','CA:C:O'] pdbs = help.getList('TOP20',0) data = help.getCsv(pdbSet, pdbs,geos,True,True,aa='ALL',includeCis=False,allAtoms=False, bFactorFactor=1.3,cutoff=0) print(data) geoTrios = [['PHI', 'PSI', 'TAU'], ['PSI', 'N:N+1', 'TAU'], ['C:O', 'C:N+1', 'PHI'], ['C:O', 'C:N+1', 'PSI'], ['C:O', 'C:N+1', 'TAU'], ['PSI', 'C:N+1', 'TAU'], ['PSI', 'C:N+1', 'PHI'], ['PSI', 'C:O', 'TAU'], ['PSI', 'C:O', 'PHI'], ['PHI', 'C:N+1', 'TAU'], ['PHI', 'C:N+1', 'PSI'], ['PHI', 'C:O', 'TAU'], ['PHI', 'C:O', 'PSI'], ['C:N+1', 'C:O', 'O:C:N+1'],
['N:CA'], ['CA:C'], ['C:O'], ['C:N+1'], ['TAU'], ['CA:C:N+1'], ['CA:C:O'], ['O:C:N+1'], ['C-1:N:CA'], ] dataPdbUn = help.getCsv('PDB', pdbs, geos, [], True, True, aa='ALL', includeCis=False, allAtoms=True, bFactorFactor=-1, cutoff=0) dataPdb = help.getCsv('PDB', pdbs, geos, [], True, True, aa='ALL', includeCis=False, allAtoms=False, bFactorFactor=1.3, cutoff=0) dataPdbCut = help.getCsv('PDB',
scatter.scatterReports(pdbSet,data,geoTrios,pdbSet+'_CYS_tst') ''' # J. Stats compare stats.statsCompare(pdbSet, 'RESTRICTED') #stats.statsCompare(pdbSet, 'UNRESTRICTED') # K. Stats summary # geos = ['CA:CB', 'CB:SG', 'N:CA:CB', 'CB:CA:C', 'CA:CB:SG', 'SG:{SG}'] # summary.statsSummary(pdbSet, data, geos, '_DISULFIDE') geos = [ 'N:CA', 'CA:C', 'C:O', 'C:N+1', 'TAU', 'CA:C:N+1', 'CA:C:O', 'O:C:N+1', 'C-1:N:CA' ] data = help.getCsv(pdbSet, geos, False, True, 'ALL') summary.statsSummary(pdbSet, data, geos, 'EH') for pdbSet in pdbSets: ''' ELECTRON DENSITY ''' # F. Create density slices for the rejected density # if pdbSet not in ['RESTRICTED', 'UNRESTRICTED']: # bad.createBadDensitySlices(pdbSet, 'CA', 'N', 'C') # bad.createBadDensitySlices(pdbSet, 'C', 'CA', 'O') # G. Create density slcies for the good outliers # geoset = [] # geoset.append([['CA', 'N', 'C'],['N:CA', 'CA:C', 'TAU']]) # geoset.append([['C', 'CA', 'O'], ['C:O', 'CA:C:O']])