Esempio n. 1
0
#import ljh_dose_analysis_tool as dose

#plot tool
import pylab as pl
import matplotlib.pyplot as plt

cellLine = 'MCF7'
timeP = '24H'
gctfile = '/xchip/obelix/pod/brew/pc/ASG001_%s_%s/by_pert_id_pert_dose/ASG001_%s_%s_COMPZ.MODZ_SCORE_LM_n85x978.gctx' % (cellLine,timeP,cellLine,timeP) 

#make a gct object
db = gct.GCT()
db.read(gctfile)

#make sc object for signature strength
sco = sc.SC()
sco.add_sc_from_gctx_meta(gctfile)
ss = sco.s

#make signature for each dose
work_dir = '/xchip/cogs/hogstrom/analysis/scratch'
#work_dir = os.getcwd() #set work_dir var as pwd
fup = '/xchip/cogs/hogstrom/analysis/scratch/tmp_up_list.gmt'
fdn = '/xchip/cogs/hogstrom/analysis/scratch/tmp_dn_list.gmt'
open(fup,'w') #overwrite existing grp file
open(fdn, 'w') #overwrite existing grp file
n_edge = 50
db = gct.GCT()
db.read(gctfile)
cids = db.get_cids()
pertIDs = [x.split(':')[1] for x in cids]
Esempio n. 2
0
def build_SC(args,work_dir):
	'''
	builds SC plots for the dose analysis
	'''
	# instantiate a progress object
	prog = progress.DeterminateProgressBar('Dose Analysis')

	# make an SC object from the given gctx file
	sco = sc.SC()
	sco.add_sc_from_gctx_meta(args.res, verbose=False)
	sco.set_thresh_by_specificity(0.8)

	# find all of the unique pert_ids in the data
	#perts = [':'.join(x.split('::')[0].split(':')[0:2]) for x in sco.pid] $perts is pert_id
	perts = [x.split(':::')[0].split('::')[1] for x in sco.pid] #perts is pert_desc
	pert_ids = [x.split(':')[1] for x in sco.pid]
	unique_perts = set(perts)
	ctl_perts = []
	for i, unique_pert in enumerate(unique_perts):
		#pert_id = unique_pert.split(':')[1]
		#if pert_id == 'DMSO' or pert_id =='CMAP-000':
			#ctl_perts.append(unique_pert)
		if unique_pert == 'DMSO':
			ctl_perts.append(unique_pert)
	unique_perts.difference_update(set(ctl_perts))

	# grab the dose information
	dose = [float(x.split('::')[0].split(':')[2]) for x in sco.pid]

	# grab pert_descs
	desc = [x.split('::')[1].split(':::')[0] for x in sco.pid]

	# write sc plots to file
	num_perts = len(unique_perts)
	for i,unique_pert in enumerate(unique_perts):
		prog.update('making SC plots',i,num_perts)
		sco.plot(include=unique_pert,size=dose,title=unique_pert,pos_con=['None'],out=os.path.join(work_dir,'_'.join([unique_pert.replace(':','_'),'SC.png'])))

	# write SC summary table
	with open(os.path.join(work_dir,'SC_summary.txt'),'w') as f:
		headers = ['pert_id','pert_desc','base_dose','base_ss',
				   'base_cc','best_dose','best_ss','best_cc',
				   'best_ss_lfc','best_cc_lfc','best_sc_lfc_distance']
		f.write('\t'.join(headers) + '\n')
		for i,unique_pert in enumerate(unique_perts):
			prog.update('making SC summary',i,num_perts)
			pert_inds = [i for i,x in enumerate(perts) if unique_pert in x]
			pert_dose = [dose[x] for x in pert_inds]
			pert_desc = desc[pert_inds[0]]
			pert_ss = [sco.s[x] for x in pert_inds]
			pert_cc = [sco.c[x] for x in pert_inds]
			pert_cc = [x if x != -666 else 0 for x in pert_cc]
			
			base_dose = numpy.min(pert_dose)
			base_ind = pert_dose.index(base_dose)
			base_ss = pert_ss[base_ind]
			base_cc = pert_cc[base_ind]
			
			ss_ratio = numpy.log(numpy.array(pert_ss)/base_ss)
			cc_ratio = numpy.log((numpy.array(pert_cc)+1)/(base_cc +1))
			sc_distance = (ss_ratio**2 + cc_ratio**2)**.5
			sc_distance = sc_distance.tolist()
			
			best_ind = sc_distance.index(numpy.max(sc_distance))
			best_dose = pert_dose[best_ind]
			best_ss = pert_ss[best_ind]
			best_cc = pert_cc[best_ind]
			best_ss_ratio = ss_ratio[best_ind]
			best_cc_ratio = cc_ratio[best_ind]
			best_sc_distance = sc_distance[best_ind]

			data = [unique_pert,pert_desc,str(base_dose),str(base_ss),
					str(base_cc),str(best_dose),str(best_ss),str(best_cc),
					str(best_ss_ratio),str(best_cc_ratio),str(best_sc_distance)]
			f.write('\t'.join(data) + '\n')