Ejemplo n.º 1
0
def calculateAvDlossPerDose(PDBmulti, n):
    # calculated the average Dloss metric over all atoms of each residue/nucleotide type within
    # TRAP complex

    # determine full list of residue types present
    uniq_resis = []
    for atom in PDBmulti:
        if atom.basetype not in uniq_resis:
            uniq_resis.append(atom.basetype)
    uniq_resis.sort()
    x = np.array(uniq_resis)

    # for each residue type, find average Dloss at each dose
    # first group atoms by residue type:
    atomsByRes = {}
    for res in uniq_resis:
        atomsByRes[res] = []
    for atom in PDBmulti:
        atomsByRes[atom.basetype].append(atom.mindensity[0:9])

        # calculate average Dloss over structure for each dose
    avDlossOverall = np.mean([atom.mindensity[0:9] for atom in PDBmulti], 0)
    print "Average Dloss for structure per dose as following:"
    print ",".join([str(val) for val in avDlossOverall])

    # for each residue type, calculate mean Dloss at each dose
    avDlossByRes = {}
    stdDlossByRes = {}
    nthTileDlossByRes = {}
    confIntDlossByRes = {}
    avDlossByResNorm = {}
    for key in atomsByRes.keys():
        avDloss = np.mean(atomsByRes[key], 0)
        stdDloss = np.std(atomsByRes[key], 0)
        nthTileDloss = np.percentile(atomsByRes[key], n, 0)

        avDlossNorm = avDloss - avDlossOverall

        # calculate 95% confidence interval
        confIntDloss = []
        for i in range(0, 9):
            confIntDloss.append(mean_confidence_interval([val[i] for val in atomsByRes[key]]))

        avDlossByRes[key] = avDloss
        stdDlossByRes[key] = stdDloss
        nthTileDlossByRes[key] = nthTileDloss
        confIntDlossByRes[key] = confIntDloss
        avDlossByResNorm[key] = avDlossNorm

        # get results
    csvOutput_stdError = open("avDlossPerDataset_stdError.csv", "w")
    csvOutput_ConfIntError = open("avDlossPerDataset_confIntError.csv", "w")
    csvOutput_nthTile = open("{}thTile_DlossPerDataset.csv".format(n), "w")
    csvOutput_DlossNormalised = open("avDlossPerDataset_DlossNormalised.csv", "w")

    for key in avDlossByRes.keys():
        avDlossList = list([str(element) for element in avDlossByRes[key]])
        stdDlossList = list([str(element) for element in stdDlossByRes[key]])
        nthTileDlossList = list([str(element) for element in nthTileDlossByRes[key]])
        confIntDlossList = list([str(element) for element in confIntDlossByRes[key]])
        avDlossByResNormList = list([str(element) for element in avDlossByResNorm[key]])

        print "\n***\n{}: {}".format(key, "-->".join(avDlossList))
        print "{}: {}".format(key, "-->".join(stdDlossList))
        print "{}: {}".format(key, "-->".join(confIntDlossList))

        csvOutput_stdError.write("{},{}\n".format(key, ",".join(avDlossList)))
        csvOutput_stdError.write("{},{}\n".format(key, ",".join(stdDlossList)))

        csvOutput_nthTile.write("{},{}\n".format(key, ",".join(nthTileDlossList)))

        csvOutput_DlossNormalised.write("{},{}\n".format(key, ",".join(avDlossByResNormList)))

        csvOutput_ConfIntError.write("{},{}\n".format(key, ",".join(avDlossList)))
        csvOutput_ConfIntError.write("{},{}\n".format(key, ",".join(confIntDlossList)))

    csvOutput_stdError.close()
    csvOutput_ConfIntError.close()
    csvOutput_nthTile.close()
    csvOutput_DlossNormalised.close()
Ejemplo n.º 2
0
	def densMetricErrorbarGraphs(self,auto,where,metricTypes,confInt):
		# function to plot density change as function of dataset number
		# for a specific atom in the structure, with the mean value over 
		# all protein chains plotted, along with error bars for the 22 
		# equivalent atoms present. specify auto=False to specify atom type
		# on the command line
		# 'where' specifies where to plot, if doesn't exist, makes directory in
		# current directory
		# metricTypes takes values 1 or 2 if auto == True
		# If 'confInt' is True then error bars are 95% confidence intervals, 
		# otherwise, 1 SD used at each dose

		# get equivalent atoms of specified type (command line input to specify)
		self.getEquivalentAtoms(auto)

		# determine whether dealing with protein or RNA atoms 
		if self.equivAtoms[0].boundOrUnbound() in ('unbound protein','bound protein'):
			protein = True
		else:
			protein = False

		sns.set(style="white", context="talk")
		f = plt.figure(figsize=(16, 8))

		# define x range here (damage set numbers or doses if specified)
		if self.doseList == []:
			x = range(2,len(self.equivAtoms[0].meandensity)+2)[0:10]
			x_label = 'Damage set'
		else:
			x = self.doseList
			x_label = "Dose (MGy)"

		# Determine density metric set to plot here
		# Currently two distinct options given below
		densMets1 = ['loss','net','mean','gain']
		densMets2 = ['loss','net','mean','gain','bfactor','bdamage']
		densMets3 = ['|loss|','loss','net','mean','gain','bfactor']
		densMets4 = ['max-simple','median-simple','gain','median','mean','loss']

		if auto == False:
			print 'Which metrics would you like to plot...'
			userInput = raw_input("1 or 2?: ")
			if userInput == str(1):
				densMets = densMets1
				normTypes = ['Standard','Calpha normalised']
			else:
				densMets = densMets2
				normTypes = ['Standard']
		if auto == True:
			if metricTypes == 1:
				densMets = densMets1
				normTypes = ['Standard','Calpha normalised']
			elif metricTypes == 2:
				densMets = densMets2
				normTypes = ['Standard']
			elif metricTypes == 3:
				densMets = densMets3
				normTypes = ['Standard']
			elif metricTypes == 4:
				densMets = densMets4
				normTypes = ['Standard']
		i = 0
		HotellingTsquareDict = {}
		for densMet in densMets:
			for normType in normTypes:
				if densMet in ('mean','gain') and normType in ('Calpha normalised'):
					continue
				i+=1
				yValue = {}
				# for protein atoms, group by bound and unbound chains
				if protein == True:
					for boundType in ('unbound','bound'):
						yValue[boundType] = {}
						for valType in ('mean','std','95ConfInt'):
							yValue[boundType][valType] = []
						for j in range(0,len(x)):
							yValue[boundType]['mean'].append(np.mean([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.boundOrUnbound() == '{} protein'.format(boundType)]))
							yValue[boundType]['std'].append(np.std([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.boundOrUnbound() == '{} protein'.format(boundType)]))
							yValue[boundType]['95ConfInt'].append(mean_confidence_interval([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.boundOrUnbound() == '{} protein'.format(boundType)]))

				# for RNA atoms, just create a list of density values
				if protein == False:
					yValue['RNA 1'] = {}
					yValue['RNA 2'] = {}
					for valType in ('mean','std'):
						yValue['RNA 1'][valType] = []
						yValue['RNA 2'][valType] = []
					for j in range(0,len(x)):	
						yValue['RNA 1']['mean'].append(np.mean([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.residuenum%5 == self.residueNum]))
						yValue['RNA 1']['std'].append(np.std([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.residuenum%5 == self.residueNum]))
						yValue['RNA 2']['mean'].append(np.mean([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.residuenum%5 != self.residueNum]))
						yValue['RNA 2']['std'].append(np.std([atom.densMetric[densMet][normType]['values'][j] for atom in self.equivAtoms if atom.residuenum%5 != self.residueNum]))

				ax = plt.subplot(2,3,i)
				ax.set_xlim([0, 29])
				if protein == True:
					if confInt == True:
						plt.errorbar(x,yValue['unbound']['mean'],yerr=yValue['unbound']['95ConfInt'], fmt='-o',capthick=2,color='#99ccff',label='Non-bound')
						plt.errorbar(x,yValue['bound']['mean'],yerr=yValue['bound']['95ConfInt'],fmt='-o',capthick=2,color='#f47835',label='Bound')
					else:
						plt.errorbar(x,yValue['unbound']['mean'],yerr=yValue['unbound']['std'], fmt='-o',capthick=2,color='#99ccff',label='Non-bound')
						plt.errorbar(x,yValue['bound']['mean'],yerr=yValue['bound']['std'],fmt='-o',capthick=2,color='#f47835',label='Bound')
				else:
					try:
						plt.errorbar(x,yValue['RNA 1']['mean'],yerr=yValue['RNA 1']['std'], fmt='-o',capthick=2,color='r',label='G1')
						plt.errorbar(x,yValue['RNA 2']['mean'],yerr=yValue['RNA 2']['std'], fmt='-o',capthick=2,color='g',label='G3')
					except KeyError:
						plt.errorbar(x,yValue['RNA 1']['mean'],yerr=yValue['RNA 1']['std'], fmt='-o',capthick=2,color='r')

				# ax.set_xlim([1, 11])
				ax.legend(loc='best')
				plt.xlabel(x_label)
				if normType == 'Calpha normalised':
					plt.ylabel('Normalised D{} change'.format(densMet))
				else:
					plt.ylabel('{} D{} change'.format(normType,densMet))

				# # perform Hotelling T-squared test if protein atoms
				# if protein == True:
				# 	keyVal = '{} D{}'.format(normType,densMet)
				# 	HotellingTsquareDict[keyVal] = {}
				# 	F,p_value,reject = self.hotellingTsquareTest(densMet,normType) # run Hotelling's T squared test to distinguish between bound and unbound TRAP rings
				# 	HotellingTsquareDict[keyVal]['F value'] = F 
				# 	HotellingTsquareDict[keyVal]['p value'] = p_value 
				# 	HotellingTsquareDict[keyVal]['reject?'] = reject 

		plt.subplots_adjust(top=0.90)
		f.subplots_adjust(hspace=0.4)
		f.subplots_adjust(wspace=0.5)

		f.suptitle('damage metrics vs damage set: {} {} {}'.format(atom.basetype,atom.residuenum,atom.atomtype),fontsize=20)

		# check if directory exists to save graphs to and make if not:
		if not os.path.exists(where):
			os.makedirs(where)

    	# save graphs to directly specified by 'where'
		if confInt == True:
			f.savefig('{}/6DamageSubplots_{}_{}_{}_95confIntErrorbars.png'.format(where,atom.basetype,atom.residuenum,atom.atomtype))
		else:
			f.savefig('{}/6DamageSubplots_{}_{}_{}_SDerrorbars.png'.format(where,atom.basetype,atom.residuenum,atom.atomtype))