Python GenomeWideResult Examples

Programming Language: Python

Namespace/Package Name: SNP

Class/Type: GenomeWideResult

Examples at hotexamples.com: 2

Python GenomeWideResult - 2 examples found. These are the top rated real world Python examples of SNP.GenomeWideResult extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

add_one_data_obj(2)

array_id(1)

data_obj_id2index(1)

data_obj_ls(1)

max_value(1)

min_value(1)

name(1)

Example #1

Show file

def fetchIntensityInGWAWithinRBDictGivenArrayIDFromTilingIntensity(tilingIntensityData, array_id, rbDict, gwr_name=None,\
																min_reciprocal_overlap=0.6):
	"""
	2010-3-18
		tilingIntensityData is of type SNPData.
		
	"""
	sys.stderr.write("Getting intensity data within the chosen segments for array %s ..."%array_id)
	col_index = tilingIntensityData.col_id2col_index.get(array_id)
	if col_index is None:
		sys.stderr.write("Error: No tiling intensity.\n")
		return None
	
	from SNP import GenomeWideResult, DataObject
	
	gwr = GenomeWideResult(name=gwr_name)
	# 2010-3-18 custom
	gwr.array_id = array_id
	#gwr.ecotype_id = array.maternal_ecotype_id
	#gwr.nativename = ecotype_nativename
	
	genome_wide_result_id = id(gwr)
		
	no_of_rows = len(tilingIntensityData.row_id_ls)
	for i in range(no_of_rows):
		chr_pos = tilingIntensityData.row_id_ls[i]
		chr, pos = map(int, chr_pos)
		cnvSegmentKey = CNVSegmentBinarySearchTreeKey(chromosome=chr, span_ls=[pos],\
													min_reciprocal_overlap=min_reciprocal_overlap)
		if cnvSegmentKey in rbDict:
			probeIntensity = tilingIntensityData.data_matrix[i][col_index]
			data_obj = DataObject(chromosome=chr, position=pos, value=probeIntensity)
			data_obj.comment = ''
			data_obj.genome_wide_result_name = gwr_name
			data_obj.genome_wide_result_id = genome_wide_result_id
			gwr.add_one_data_obj(data_obj)
	sys.stderr.write(" %s probes. Done.\n"%(len(gwr.data_obj_ls)))
	return gwr

Example #2

Show file

def getCNVDataFromFileInGWA(input_fname_ls, array_id, max_amp=-0.33, min_amp=-0.33, min_size=50, min_no_of_probes=None, \
						report=False):
	"""
	2009-10-31
		get deletion (below max_amp) or duplication (above min_amp) from files (output by RunGADA.py)
	"""
	sys.stderr.write("Getting CNV calls for array %s, min_size %s, min_no_of_probes %s from %s ..."%\
					(array_id, min_size, min_no_of_probes, repr(input_fname_ls)))
	
	gwr_name = "(a-id %s)"%(array_id)
	gwr = GenomeWideResult(name=gwr_name)
	gwr.data_obj_ls = []	#list and dictionary are crazy references.
	gwr.data_obj_id2index = {}
	genome_wide_result_id = id(gwr)
	
	amp_ls = []
	array_id2array = {}
	counter = 0
	real_counter = 0
	no_of_segments = 0
	input_handler = fileinput.input(input_fname_ls)
	header = input_handler.readline().strip().split('\t')
	col_name2index = getColName2IndexFromHeader(header)
	ecotype_id = None
	for line in input_handler:
		if line.find("array_id")!=-1:
			continue
		line = line.strip()
		row = line.split('\t')
		cnv_array_id = int(row[col_name2index['array_id']])
		cnv_ecotype_id = int(row[col_name2index.get('ecotype_id', col_name2index['array_id'])])
		counter += 1
		if cnv_array_id==array_id:
			no_of_segments += 1
			if ecotype_id is None:
				ecotype_id = cnv_ecotype_id
			start_probe = row[col_name2index['start_probe']].split('_')	# split chr_pos
			start_probe = map(int, start_probe)
			start_probe_id = row[col_name2index.get('start_probe_id', col_name2index['start_probe'])]
			
			stop_probe = row[col_name2index['end_probe']].split('_')
			stop_probe = map(int, stop_probe)
			end_probe_id = row[col_name2index.get('end_probe_id', col_name2index['end_probe'])]
			
			no_of_probes = int(row[col_name2index['length']])
			if min_no_of_probes is not None and no_of_probes<min_no_of_probes:
				continue
			amplitude = float(row[col_name2index['amplitude']])
			segment_chromosome = start_probe[0]
			segment_start_pos = start_probe[1]-12
			segment_stop_pos = stop_probe[1]+12
			segment_length = abs(segment_stop_pos-segment_start_pos)
			if min_size is not None and segment_length<min_size:
				continue
			if amplitude<=max_amp or amplitude>=min_amp:
				real_counter += 1
				data_obj = DataObject(chromosome=segment_chromosome, position=segment_start_pos, stop_position=segment_stop_pos, \
									value=amplitude)
				data_obj.comment = 'start probe-id %s, end probe-id %s, no of probes %s'%\
							(start_probe_id, end_probe_id, no_of_probes)
				data_obj.genome_wide_result_id = genome_wide_result_id
				gwr.add_one_data_obj(data_obj)
				
		if report and counter%10000==0:
			sys.stderr.write('%s%s\t%s\t%s'%('\x08'*80, counter, no_of_segments, real_counter))
	sys.stderr.write("\n")
	
	if gwr.max_value<3:	# insertion at y=3
		gwr.max_value=3
	if gwr.min_value>-1:	# deletion at y = -1
		gwr.min_value = -1
	gwr.name = '%s '%ecotype_id +  gwr.name
	setattr(gwr, 'ecotype_id', ecotype_id)
	sys.stderr.write(" %s segments. Done.\n"%(len(gwr.data_obj_ls)))
	return gwr