def getTranslationDataStructureForBackgroundLoci(self, db_250k, cnv_method_id=None, min_MAF=0.1): """ 2011-4-22 1. get all loci whose MAF is above min_MAF 2. construct a (chr,start,stop) 2 cumu_start dictionary 3. construct a (cumu_start, cumu_stop) 2 (chr, start, stop) RBDict """ sys.stderr.write("Getting translation structures between (chr, start, stop) and (cumu_start, cumu_stop) for cnv method %s ..."%\ cnv_method_id) TableClass = Stock_250kDB.CNV query = TableClass.query.filter_by( cnv_method_id=cnv_method_id).order_by( TableClass.chromosome).order_by(TableClass.start) chrSpan2cumuStartRBDict = RBDict() cumuSpan2ChrSpanRBDict = RBDict() cumu_start = 0 counter = 0 real_counter = 0 for row in query: counter += 1 maf = min(row.frequency, 1 - row.frequency) if maf <= min_MAF: continue real_counter += 1 chrSpanKey = CNVSegmentBinarySearchTreeKey(chromosome=row.chromosome, \ span_ls=[row.start, row.stop], \ min_reciprocal_overlap=0.00000000000001,) #2010-8-17 overlapping keys are regarded as separate instances as long as they are not identical. chrSpan2cumuStartRBDict[ chrSpanKey] = cumu_start #cumu_start is 0-based size = row.stop - row.start + 1 span_ls = [cumu_start + 1, cumu_start + size] segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=0, \ span_ls=span_ls, \ min_reciprocal_overlap=0.00000000000001,) #2010-8-17 overlapping keys are regarded as separate instances as long as they are not identical. if segmentKey not in cumuSpan2ChrSpanRBDict: cumuSpan2ChrSpanRBDict[segmentKey] = (row.chromosome, row.start, row.stop) else: sys.stderr.write( "Error: %s of chr %s is already in cumuSpan2ChrSpanRBDict.\n" % (segmentKey, row.chromosome)) cumu_start += size sys.stderr.write("%s out of %s CNVs are included. Done.\n" % (real_counter, counter)) return PassingData(cumuSpan2ChrSpanRBDict=cumuSpan2ChrSpanRBDict, chrSpan2cumuStartRBDict=chrSpan2cumuStartRBDict)
def createCNVRBDict(self, db_250k, cnv_method_id=None, max_CNV_SNP_dist=None, array_id2row_index = None, snp_id_ls = []): """ 2010-9-30 This function is to get 1. CNVs from cnv_method_id, 2 the nearby SNPs for each CNV. create a RBDict based on CNV segments (add max_CNV_SNP_dist on each side). for each SNP, find out CNV segments which contain it. """ sys.stderr.write("Creating CNV RBDict ... \n") query = Stock_250kDB.CNV.query.filter_by(cnv_method_id=cnv_method_id) CNVRBDict = RBDict() count = 0 real_count = 0 for row in query: count += 1 segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=row.chromosome, \ span_ls=[max(1, row.start - max_CNV_SNP_dist), row.stop + max_CNV_SNP_dist], \ min_reciprocal_overlap=1, cnv_id=row.id, cnv_start=row.start, cnv_stop=row.stop) #2010-8-17 any overlap short of identity is tolerated. if segmentKey not in CNVRBDict: CNVRBDict[segmentKey] = PassingData(snp_id_ls=[], deletionDataLs = [0]*len(array_id2row_index)) """ # 2010-9-30 too much memory for cnv_array_call in row.cnv_array_call_ls: array_row_index = array_id2row_index.get(cnv_array_call.array_id) if array_row_index is not None: #ignore arrays not in SNPs CNVRBDict[segmentKey].deletionDataLs[array_row_index] = 1 real_count += 1 """ if count%200==0: sys.stderr.write("%s%s\t%s"%('\x08'*80, count, real_count)) if self.debug: break sys.stderr.write("%s%s\t%s\n %s Done.\n"%('\x08'*80, count, real_count, repr(CNVRBDict))) sys.stderr.write("Finding nearby SNPs for CNVs ... \n") compareIns = CNVCompare(min_reciprocal_overlap=0.0000001) #any overlap is an overlap count = 0 real_count = 0 for snp_id in snp_id_ls: chromosome, start = snp_id.split('_')[:2] chromosome = int(chromosome) start = int(start) snpSegmentKey = CNVSegmentBinarySearchTreeKey(chromosome=chromosome, \ span_ls=[start], \ min_reciprocal_overlap=0.0000001, ) #min_reciprocal_overlap doesn't matter here. node_ls = [] CNVRBDict.findNodes(snpSegmentKey, node_ls=node_ls, compareIns=compareIns) for node in node_ls: cnvSegKey = node.key node.value.snp_id_ls.append(snp_id) real_count += 1 count += 1 if count%1000==0: sys.stderr.write("%s%s\t%s"%('\x08'*80, count, real_count)) sys.stderr.write("%s%s\t%s\n Done.\n"%('\x08'*80, count, real_count)) return CNVRBDict
def partitionCNVIntoNonOverlapping(self, db_250k, cnv_method_id=None, min_reciprocal_overlap=0.000000001, \ table_name=None, frequency=None, chromosome=None): """ 2010-10-11 bugfix. remove a temporary condition restricting chromosome position. 2010-9-20 add argument table_name to accommodate other tables, such as CNVCall add frequency. if table_name=CNVCall, frequency is 1/(no_of_total_arrays) 2010-8-2 """ sys.stderr.write( "Partitioning CNVs from method %s into non-overlapping ones ... \n" % (cnv_method_id)) rbDict = RBDict() if table_name is None: table_name = Stock_250kDB.CNV.table.name where_sql = "where cnv_method_id=%s " % (cnv_method_id) if chromosome: #2010-9-28 where_sql += " and chromosome =%s" % (chromosome) query = db_250k.metadata.bind.execute("select * from %s %s"%\ (table_name, where_sql)) #query = Stock_250kDB.CNV.query.filter_by(cnv_method_id=cnv_method_id) counter = 0 for row in query: counter += 1 if frequency: frequency = frequency elif table_name == Stock_250kDB.CNV.table.name: frequency = getattr(row, 'frequency', 0.1) else: frequency = 0.1 segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=row.chromosome, span_ls=[row.start, row.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, parent_cnv_id_ls = [row.id],\ frequency = frequency) #start and stop of key is created based on span_ls if segmentKey not in rbDict: rbDict[segmentKey] = None else: self.removeOverlappingInRBTreeRecursive(rbDict, segmentKey) if counter % 1000 == 0: sys.stderr.write("%s\t%s\t%s" % ('\x08' * 80, counter, len(rbDict))) if self.debug: #break after 1000 if in debug mode break sys.stderr.write("\t %s original CNVs => %s non-overlapping CNVs.\n" % (counter, len(rbDict))) return rbDict
def prepareDataForPermutationRankTest(self, top_loci_in_chr_pos, genomeRBDict, param_data, report=False): """ 2011-3-16 """ if report: sys.stderr.write("Preparing data out of %s top loci for permutation test ...\n"%\ (len(top_loci_in_chr_pos))) permData = PassingData(candidate_gene_snp_rank_ls=[],\ non_candidate_gene_snp_rank_ls=[],\ captured_candidate_gene_set = set()) compareIns = CNVCompareByBigOverlapRatio( min_reciprocal_overlap=param_data.min_big_overlap) for i in range(len(top_loci_in_chr_pos)): chr, start, stop = top_loci_in_chr_pos[i][:3] segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=str(chr), \ span_ls=[start, stop], \ min_reciprocal_overlap=0.0000001,) #min_reciprocal_overlap doesn't matter here. # it's decided by compareIns. node_ls = [] genomeRBDict.findNodes(segmentKey, node_ls=node_ls, compareIns=compareIns) isNearCandidate = False for node in node_ls: geneSegKey = node.key for oneGeneData in node.value: if oneGeneData.gene_id in param_data.candidate_gene_set: permData.captured_candidate_gene_set.add( oneGeneData.gene_id) isNearCandidate = True break if isNearCandidate: permData.candidate_gene_snp_rank_ls.append(i + 1) else: permData.non_candidate_gene_snp_rank_ls.append(i + 1) if report: sys.stderr.write("%s loci near %s candidates. Done.\n"%\ (len(permData.candidate_gene_snp_rank_ls), \ len(permData.captured_candidate_gene_set))) return permData
def translateCumuPosIntoChrPos(self, top_loci_in_cumu_pos, cumuSpan2ChrSpanRBDict=None, compareIns=None): """ 2011-4-22 adjust because chr_id2cumu_start is now 0-based. 2011-4-22 For CNVs, one (cumu_start, cumu_stop) could span multiple keys in cumuSpan2ChrSpanRBDict 2011-3-16 """ top_loci = [] compareIns = CNVCompareBySmallOverlapRatio( min_reciprocal_overlap=0.0000001) for span in top_loci_in_cumu_pos: cumu_start, cumu_stop = span[:2] segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=0, \ span_ls=[cumu_start, cumu_stop], \ min_reciprocal_overlap=0.00000000000001,) #2010-8-17 overlapping keys are regarded as separate instances as long as they are identical. node_ls = [] cumuSpan2ChrSpanRBDict.findNodes(segmentKey, node_ls=node_ls, compareIns=compareIns) if len(node_ls) == 0: sys.stderr.write( "(%s, %s) not found in cumuSpan2ChrSpanRBDict.\n" % (cumu_start, cumu_stop)) for node in node_ls: chr, node_chr_start, node_chr_stop = node.value[:3] overlapData = get_overlap_ratio( segmentKey.span_ls, [node.key.start, node.key.stop]) overlapFraction1 = overlapData.overlapFraction1 overlapFraction2 = overlapData.overlapFraction2 overlap_length = overlapData.overlap_length overlap_start_pos = overlapData.overlap_start_pos overlap_stop_pos = overlapData.overlap_stop_pos start = overlap_start_pos - node.key.span_ls[ 0] + node_chr_start #overlap_start_pos is in cumu coordinates. stop = overlap_stop_pos - node.key.span_ls[0] + node_chr_start if stop > node_chr_stop: #truncate it. shouldn't happen though stop = node_chr_stop top_loci.append([chr, start, stop]) return top_loci
def translateChrPosDataObjectIntoCumuPos(self, top_loci, chrSpan2cumuStartRBDict=None): """ 2011-4-22 change chr_id2cumu_start to chrSpan2cumuStartRBDict 2011-3-21 top_loci has become a list of DataObject of GWR. 2011-3-16 """ sys.stderr.write( "Translating %s loci from chr-span coordinates into cumu-span ..." % (len(top_loci))) top_loci_in_cumu_pos = [] no_of_loci_skipped = 0 compareIns = CNVCompareBySmallOverlapRatio( min_reciprocal_overlap=0.0000001) for top_locus in top_loci: chr = top_locus.chromosome segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=chr, \ span_ls=[top_locus.position, top_locus.stop_position], \ min_reciprocal_overlap=0.00000000000001,) node_ls = [] chrSpan2cumuStartRBDict.findNodes(segmentKey, node_ls=node_ls, compareIns=compareIns) if len(node_ls) == 0: no_of_loci_skipped += 1 for node in node_ls: overlapData = get_overlap_ratio( segmentKey.span_ls, [node.key.start, node.key.stop]) overlapFraction1 = overlapData.overlapFraction1 overlapFraction2 = overlapData.overlapFraction2 overlap_length = overlapData.overlap_length overlap_start_pos = overlapData.overlap_start_pos overlap_stop_pos = overlapData.overlap_stop_pos cumu_start = overlap_start_pos - node.key.start + 1 + node.value #overlap_start_pos is in normal genome coordinates. cumu_stop = overlap_stop_pos - node.key.start + 1 + node.value top_loci_in_cumu_pos.append([cumu_start, cumu_stop]) sys.stderr.write("%s loci skipped. now %s loci.\n" % (no_of_loci_skipped, len(top_loci_in_cumu_pos))) return top_loci_in_cumu_pos
def translateCumuPosIntoChrPos(self, top_loci_in_cumu_pos, cumuSpan2ChrRBDict): """ 2011-3-16 """ top_loci = [] for span in top_loci_in_cumu_pos: cumu_start, cumu_stop = span[:3] segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=0, \ span_ls=[cumu_start, cumu_stop], \ min_reciprocal_overlap=0.00000000000001,) #2010-8-17 overlapping keys are regarded as separate instances as long as they are identical. node = cumuSpan2ChrRBDict.findNode(segmentKey) if node is None: sys.stderr.write("(%s, %s) not found in cumuSpan2ChrRBDict.\n"%(cumu_start, cumu_stop)) else: chr = str(node.value[0]) #chr in chr_id2cumu_start is of type "str" start = cumu_start - node.key.span_ls[0] + 1 stop = cumu_stop - node.key.span_ls[0] + 1 top_loci.append([chr, start, stop]) return top_loci
def constructPeakOverlapGraph(self, resultPeakRBDictList=[], genomeRBDict=None, candidate_gene_set=None, outputFname=None): """ 2012.3.16 make sure each edge is marked with a flag whether it's across two different call methods. for each component 1. get the final span (chr, start, stop) 2. check if any candidate gene is within or touches upon. 3. check if any edge is across two different call methods, which means they are overlapping peaks via two different call methods. """ sys.stderr.write("Constructing result peak overlap graph ...") g = nx.Graph() compareIns = CNVCompare( min_reciprocal_overlap=0.0000001) #any overlap is an overlap no_of_peaks_not_in_result2 = 0 overlap_ls = [] counter = 0 no_of_results = len(resultPeakRBDictList) for i in xrange(no_of_results): for j in xrange(i + 1, no_of_results): result1_peakRBDict = resultPeakRBDictList[i] result2_peakRBDict = resultPeakRBDictList[j] for queryNode in result1_peakRBDict: g.add_node(queryNode.value[0].id, chromosome=queryNode.key.chromosome, \ span_ls=[queryNode.key.start, queryNode.key.stop], \ call_method_id_ls=[queryNode.value[0].result.call_method_id],\ phenotype_method_id_ls = [queryNode.value[0].result.phenotype_method_id]) #add this node first, could be singleton counter += 1 segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=queryNode.key.chromosome, \ span_ls=[queryNode.key.start, queryNode.key.stop], \ min_reciprocal_overlap=0.0000001, ) #min_reciprocal_overlap doesn't matter here. # it's decided by compareIns. node_ls = [] result2_peakRBDict.findNodes(segmentKey, node_ls=node_ls, compareIns=compareIns) total_perc_overlapped_by_result2 = 0. for node in node_ls: overlapData = get_overlap_ratio( segmentKey.span_ls, [node.key.start, node.key.stop]) overlapFraction1 = overlapData.overlapFraction1 overlapFraction2 = overlapData.overlapFraction2 overlap_length = overlapData.overlap_length overlap_start_pos = overlapData.overlap_start_pos overlap_stop_pos = overlapData.overlap_stop_pos total_perc_overlapped_by_result2 += overlapFraction1 g.add_edge(queryNode.value[0].id, node.value[0].id, chromosome=queryNode.key.chromosome, \ span_ls=[min(queryNode.key.start, node.key.start), max(queryNode.key.stop, node.key.stop)], \ call_method_id_ls=[queryNode.value[0].result.call_method_id, node.value[0].result.call_method_id],\ phenotype_method_id_ls = [queryNode.value[0].result.phenotype_method_id, \ node.value[0].result.phenotype_method_id]) if total_perc_overlapped_by_result2 == 0: no_of_peaks_not_in_result2 += 1 overlap_ls.append(-0.5) else: overlap_ls.append(total_perc_overlapped_by_result2) sys.stderr.write("%s nodes. %s edges. %s connected components.\n"%(g.number_of_nodes(), g.number_of_edges(), \ nx.number_connected_components(g))) sys.stderr.write("Outputting overlap regions ...") writer = csv.writer(open(outputFname, 'w'), delimiter='\t') header = [ 'chromosome', 'start', 'stop', 'phenotype_id', 'fileNamePrefix' ] writer.writerow(header) no_of_output = 0 for cc in nx.connected_components(g): chromosome = None min_start = None max_stop = None call_method_id_set = set() phenotype_method_id_set = set() sg = nx.subgraph(g, cc) if len(cc) == 1: #only one node, no edges node_id = cc[0] nodeData = sg.node[node_id] min_start, max_stop = nodeData['span_ls'] chromosome = nodeData['chromosome'] call_method_id_set = set(nodeData['call_method_id_ls']) phenotype_method_id_set = set( nodeData['phenotype_method_id_ls']) else: for e in sg.edges_iter( data=True ): #data=True, return edge attribute dict in 3-tuple (u,v,data). edge_data = e[2] chromosome = edge_data['chromosome'] call_method_id_set = call_method_id_set.union( set(edge_data['call_method_id_ls'])) phenotype_method_id_set = phenotype_method_id_set.union( set(edge_data['phenotype_method_id_ls'])) span_ls = edge_data['span_ls'] if min_start is None: min_start = span_ls[0] else: min_start = min(min_start, span_ls[0]) if max_stop is None: max_stop = span_ls[1] else: max_stop = max(max_stop, span_ls[1]) #2012.3.27 don't extend the box before checking for overlap with candidate genes. #min_start = max(1, min_start-genomeRBDict.genePadding) #to extend so that candidate gene could be seen #max_stop = max_stop + genomeRBDict.genePadding #to extend so that candidate gene could be seen #check whether a candidate gene is within this segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=str(chromosome), \ span_ls=[min_start, max_stop], \ min_reciprocal_overlap=0.0000001, ) #min_reciprocal_overlap doesn't matter here. # it's decided by compareIns. node_ls = [] genomeRBDict.findNodes(segmentKey, node_ls=node_ls, compareIns=compareIns) nearCandidateGene = False near_peak_candidate_gene_id_list = [] for node in node_ls: geneSegKey = node.key for oneGeneData in node.value: if oneGeneData.gene_id in candidate_gene_set: nearCandidateGene = True near_peak_candidate_gene_id_list.append( oneGeneData.ncbi_gene_id ) #use ncbi gene id instead min_start = min( min_start, geneSegKey.span_ls[0] ) #2012.3.27 adjust to include the full length of the gene max_stop = max(max_stop, geneSegKey.span_ls[1]) near_peak_candidate_gene_id_list.sort() near_peak_candidate_gene_id_list = map( str, near_peak_candidate_gene_id_list) fileNamePrefixLs = [] if len(call_method_id_set) > 1: fileNamePrefixLs.append('olp') else: call_method_id = call_method_id_set.pop() fileNamePrefixLs.append('onlyCall%s' % (call_method_id)) if nearCandidateGene: fileNamePrefixLs.append( 'cand_%s' % ('_'.join(near_peak_candidate_gene_id_list))) else: fileNamePrefixLs.append("nonCand") fileNamePrefixLs.append("chr%s_%s_%s" % (chromosome, min_start, max_stop)) peak_id_ls_str = map(str, cc) fileNamePrefixLs.append("peak_id_%s" % ('_'.join(peak_id_ls_str))) fileNamePrefix = '_'.join(fileNamePrefixLs) for phenotype_id in phenotype_method_id_set: data_row = [ chromosome, min_start, max_stop, phenotype_id, 'pheno_%s_%s' % (phenotype_id, fileNamePrefix) ] writer.writerow(data_row) no_of_output += 1 del writer sys.stderr.write("%s lines outputted.\n" % (no_of_output))
def splitOverlappingOfTwoKeys(self, key1, key2, min_reciprocal_overlap=0.000000001): """ 2010-8-2 """ if key1.chromosome != key2.chromosome: #no overlapping between two. it's a bug if here is reached. return [key1, key2] # make sure key1 is ahead of key2 if key1.span_ls[0] > key2.span_ls[0]: tmp = key2 key2 = key1 key1 = tmp keysAfterSplit = [] if key1.span_ls[0] == key2.span_ls[0]: #start from the same position if key1.span_ls[1] == key2.span_ls[1]: #end at the same position splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key1.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls, frequency=key1.frequency+key2.frequency) keysAfterSplit.append(splitKey1) elif key1.span_ls[1] < key2.span_ls[1]: splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key1.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\ frequency=key1.frequency+key2.frequency) keysAfterSplit.append(splitKey1) splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.stop+1, key2.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key2.parent_cnv_id_ls,\ frequency=key2.frequency) keysAfterSplit.append(splitKey1) elif key1.span_ls[1] > key2.span_ls[1]: splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.start, key2.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\ frequency=key1.frequency+key2.frequency) keysAfterSplit.append(splitKey1) splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.stop+1, key1.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls,\ frequency=key1.frequency) keysAfterSplit.append(splitKey1) else: #key1 is ahead of key2 if key1.span_ls[1] == key2.span_ls[1]: #end at the same position splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key2.start-1], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls,\ frequency=key1.frequency) keysAfterSplit.append(splitKey1) splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.start, key1.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\ frequency=key1.frequency+key2.frequency) keysAfterSplit.append(splitKey1) elif key1.span_ls[1] < key2.span_ls[1]: splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key2.start-1], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls,\ frequency = key1.frequency) keysAfterSplit.append(splitKey1) splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.start, key1.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\ frequency = key1.frequency+key2.frequency) keysAfterSplit.append(splitKey1) splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.stop+1, key2.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key2.parent_cnv_id_ls,\ frequency = key2.frequency) keysAfterSplit.append(splitKey1) elif key1.span_ls[1] > key2.span_ls[1]: splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key2.start-1], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls,\ frequency = key1.frequency) keysAfterSplit.append(splitKey1) splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.start, key2.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\ frequency = key1.frequency+key2.frequency) keysAfterSplit.append(splitKey1) splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.stop+1, key1.stop], \ min_reciprocal_overlap=min_reciprocal_overlap, \ parent_cnv_id_ls = key1.parent_cnv_id_ls,\ frequency = key1.frequency) keysAfterSplit.append(splitKey1) return keysAfterSplit