コード例 #1
0
    def getTranslationDataStructureForBackgroundLoci(self,
                                                     db_250k,
                                                     cnv_method_id=None,
                                                     min_MAF=0.1):
        """
		2011-4-22
			1. get all loci whose MAF is above min_MAF
			2. construct a (chr,start,stop) 2 cumu_start dictionary
			3. construct a (cumu_start, cumu_stop) 2 (chr, start, stop) RBDict
			
		"""
        sys.stderr.write("Getting translation structures between (chr, start, stop) and (cumu_start, cumu_stop) for cnv method %s ..."%\
            cnv_method_id)
        TableClass = Stock_250kDB.CNV
        query = TableClass.query.filter_by(
            cnv_method_id=cnv_method_id).order_by(
                TableClass.chromosome).order_by(TableClass.start)

        chrSpan2cumuStartRBDict = RBDict()
        cumuSpan2ChrSpanRBDict = RBDict()

        cumu_start = 0
        counter = 0
        real_counter = 0
        for row in query:
            counter += 1
            maf = min(row.frequency, 1 - row.frequency)
            if maf <= min_MAF:
                continue

            real_counter += 1
            chrSpanKey = CNVSegmentBinarySearchTreeKey(chromosome=row.chromosome, \
                span_ls=[row.start, row.stop], \
                min_reciprocal_overlap=0.00000000000001,)
            #2010-8-17 overlapping keys are regarded as separate instances as long as they are not identical.
            chrSpan2cumuStartRBDict[
                chrSpanKey] = cumu_start  #cumu_start is 0-based

            size = row.stop - row.start + 1
            span_ls = [cumu_start + 1, cumu_start + size]
            segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=0, \
                span_ls=span_ls, \
                min_reciprocal_overlap=0.00000000000001,)
            #2010-8-17 overlapping keys are regarded as separate instances as long as they are not identical.
            if segmentKey not in cumuSpan2ChrSpanRBDict:
                cumuSpan2ChrSpanRBDict[segmentKey] = (row.chromosome,
                                                      row.start, row.stop)
            else:
                sys.stderr.write(
                    "Error: %s of chr %s is already in cumuSpan2ChrSpanRBDict.\n"
                    % (segmentKey, row.chromosome))

            cumu_start += size
        sys.stderr.write("%s out of %s CNVs are included. Done.\n" %
                         (real_counter, counter))
        return PassingData(cumuSpan2ChrSpanRBDict=cumuSpan2ChrSpanRBDict,
                           chrSpan2cumuStartRBDict=chrSpan2cumuStartRBDict)
コード例 #2
0
	def createCNVRBDict(self, db_250k, cnv_method_id=None, max_CNV_SNP_dist=None, array_id2row_index = None, snp_id_ls = []):
		"""
		2010-9-30
			This function is to get 1. CNVs from cnv_method_id, 2 the nearby SNPs for each CNV.
				create a RBDict based on CNV segments (add max_CNV_SNP_dist on each side).
				for each SNP, find out CNV segments which contain it.
		"""
		sys.stderr.write("Creating CNV RBDict ... \n")
		query = Stock_250kDB.CNV.query.filter_by(cnv_method_id=cnv_method_id)
		CNVRBDict = RBDict()
		count = 0
		real_count = 0
		for row in query:
			count += 1
			segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=row.chromosome, \
							span_ls=[max(1, row.start - max_CNV_SNP_dist), row.stop + max_CNV_SNP_dist], \
							min_reciprocal_overlap=1, cnv_id=row.id, cnv_start=row.start, cnv_stop=row.stop)
							#2010-8-17 any overlap short of identity is tolerated.
			if segmentKey not in CNVRBDict:
				CNVRBDict[segmentKey] = PassingData(snp_id_ls=[], deletionDataLs = [0]*len(array_id2row_index))
			"""
			# 2010-9-30 too much memory
			for cnv_array_call in row.cnv_array_call_ls:
				array_row_index = array_id2row_index.get(cnv_array_call.array_id)
				if array_row_index is not None:	#ignore arrays not in SNPs
					CNVRBDict[segmentKey].deletionDataLs[array_row_index] = 1
					real_count += 1
			"""
			if count%200==0:
				sys.stderr.write("%s%s\t%s"%('\x08'*80, count, real_count))
				if self.debug:
					break
		sys.stderr.write("%s%s\t%s\n %s Done.\n"%('\x08'*80, count, real_count, repr(CNVRBDict)))
		
		sys.stderr.write("Finding nearby SNPs for CNVs ... \n")
		compareIns = CNVCompare(min_reciprocal_overlap=0.0000001)	#any overlap is an overlap
		count = 0
		real_count = 0
		for snp_id in snp_id_ls:
			chromosome, start = snp_id.split('_')[:2]
			chromosome = int(chromosome)
			start = int(start)
			snpSegmentKey = CNVSegmentBinarySearchTreeKey(chromosome=chromosome, \
							span_ls=[start], \
							min_reciprocal_overlap=0.0000001, )	#min_reciprocal_overlap doesn't matter here.
			node_ls = []
			CNVRBDict.findNodes(snpSegmentKey, node_ls=node_ls, compareIns=compareIns)
			for node in node_ls:
				cnvSegKey = node.key
				node.value.snp_id_ls.append(snp_id)
				real_count += 1
			count += 1
			if count%1000==0:
				sys.stderr.write("%s%s\t%s"%('\x08'*80, count, real_count))
		sys.stderr.write("%s%s\t%s\n Done.\n"%('\x08'*80, count, real_count))
		return CNVRBDict
コード例 #3
0
    def partitionCNVIntoNonOverlapping(self, db_250k, cnv_method_id=None, min_reciprocal_overlap=0.000000001, \
            table_name=None, frequency=None, chromosome=None):
        """
		2010-10-11
			bugfix. remove a temporary condition restricting chromosome position.
		2010-9-20
			add argument table_name to accommodate other tables, such as CNVCall
			add frequency. if table_name=CNVCall, frequency is 1/(no_of_total_arrays)
		2010-8-2
		
		"""
        sys.stderr.write(
            "Partitioning CNVs from method %s into non-overlapping ones ... \n"
            % (cnv_method_id))
        rbDict = RBDict()
        if table_name is None:
            table_name = Stock_250kDB.CNV.table.name
        where_sql = "where cnv_method_id=%s " % (cnv_method_id)
        if chromosome:  #2010-9-28
            where_sql += " and chromosome =%s" % (chromosome)
        query = db_250k.metadata.bind.execute("select * from %s %s"%\
                 (table_name, where_sql))
        #query = Stock_250kDB.CNV.query.filter_by(cnv_method_id=cnv_method_id)
        counter = 0
        for row in query:
            counter += 1
            if frequency:
                frequency = frequency
            elif table_name == Stock_250kDB.CNV.table.name:
                frequency = getattr(row, 'frequency', 0.1)
            else:
                frequency = 0.1

            segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=row.chromosome, span_ls=[row.start, row.stop], \
                  min_reciprocal_overlap=min_reciprocal_overlap, parent_cnv_id_ls = [row.id],\
                  frequency = frequency) #start and stop of key is created based on span_ls
            if segmentKey not in rbDict:
                rbDict[segmentKey] = None
            else:
                self.removeOverlappingInRBTreeRecursive(rbDict, segmentKey)
            if counter % 1000 == 0:
                sys.stderr.write("%s\t%s\t%s" %
                                 ('\x08' * 80, counter, len(rbDict)))
                if self.debug:  #break after 1000 if in debug mode
                    break
        sys.stderr.write("\t %s original CNVs => %s non-overlapping CNVs.\n" %
                         (counter, len(rbDict)))
        return rbDict
コード例 #4
0
    def prepareDataForPermutationRankTest(self,
                                          top_loci_in_chr_pos,
                                          genomeRBDict,
                                          param_data,
                                          report=False):
        """
		2011-3-16
		"""
        if report:
            sys.stderr.write("Preparing data out of  %s top loci for permutation test ...\n"%\
                (len(top_loci_in_chr_pos)))
        permData = PassingData(candidate_gene_snp_rank_ls=[],\
             non_candidate_gene_snp_rank_ls=[],\
             captured_candidate_gene_set = set())
        compareIns = CNVCompareByBigOverlapRatio(
            min_reciprocal_overlap=param_data.min_big_overlap)
        for i in range(len(top_loci_in_chr_pos)):
            chr, start, stop = top_loci_in_chr_pos[i][:3]
            segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=str(chr), \
                span_ls=[start, stop], \
                min_reciprocal_overlap=0.0000001,)
            #min_reciprocal_overlap doesn't matter here.
            # it's decided by compareIns.
            node_ls = []
            genomeRBDict.findNodes(segmentKey,
                                   node_ls=node_ls,
                                   compareIns=compareIns)
            isNearCandidate = False
            for node in node_ls:
                geneSegKey = node.key
                for oneGeneData in node.value:
                    if oneGeneData.gene_id in param_data.candidate_gene_set:
                        permData.captured_candidate_gene_set.add(
                            oneGeneData.gene_id)
                        isNearCandidate = True
                        break
            if isNearCandidate:
                permData.candidate_gene_snp_rank_ls.append(i + 1)
            else:
                permData.non_candidate_gene_snp_rank_ls.append(i + 1)
        if report:
            sys.stderr.write("%s loci near %s candidates. Done.\n"%\
                (len(permData.candidate_gene_snp_rank_ls), \
                len(permData.captured_candidate_gene_set)))
        return permData
コード例 #5
0
    def translateCumuPosIntoChrPos(self,
                                   top_loci_in_cumu_pos,
                                   cumuSpan2ChrSpanRBDict=None,
                                   compareIns=None):
        """
		2011-4-22
			adjust because chr_id2cumu_start is now 0-based.
		2011-4-22
			For CNVs, one (cumu_start, cumu_stop) could span multiple keys in cumuSpan2ChrSpanRBDict
		2011-3-16
		"""
        top_loci = []
        compareIns = CNVCompareBySmallOverlapRatio(
            min_reciprocal_overlap=0.0000001)
        for span in top_loci_in_cumu_pos:
            cumu_start, cumu_stop = span[:2]
            segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=0, \
                span_ls=[cumu_start, cumu_stop], \
                min_reciprocal_overlap=0.00000000000001,)
            #2010-8-17 overlapping keys are regarded as separate instances as long as they are identical.
            node_ls = []
            cumuSpan2ChrSpanRBDict.findNodes(segmentKey,
                                             node_ls=node_ls,
                                             compareIns=compareIns)
            if len(node_ls) == 0:
                sys.stderr.write(
                    "(%s, %s) not found in cumuSpan2ChrSpanRBDict.\n" %
                    (cumu_start, cumu_stop))
            for node in node_ls:
                chr, node_chr_start, node_chr_stop = node.value[:3]
                overlapData = get_overlap_ratio(
                    segmentKey.span_ls, [node.key.start, node.key.stop])
                overlapFraction1 = overlapData.overlapFraction1
                overlapFraction2 = overlapData.overlapFraction2
                overlap_length = overlapData.overlap_length
                overlap_start_pos = overlapData.overlap_start_pos
                overlap_stop_pos = overlapData.overlap_stop_pos

                start = overlap_start_pos - node.key.span_ls[
                    0] + node_chr_start  #overlap_start_pos is in cumu coordinates.
                stop = overlap_stop_pos - node.key.span_ls[0] + node_chr_start
                if stop > node_chr_stop:  #truncate it. shouldn't happen though
                    stop = node_chr_stop
                top_loci.append([chr, start, stop])
        return top_loci
コード例 #6
0
    def translateChrPosDataObjectIntoCumuPos(self,
                                             top_loci,
                                             chrSpan2cumuStartRBDict=None):
        """
		2011-4-22
			change chr_id2cumu_start to chrSpan2cumuStartRBDict
		2011-3-21
			top_loci has become a list of DataObject of GWR.
		2011-3-16
		
		"""
        sys.stderr.write(
            "Translating %s loci from chr-span coordinates into cumu-span ..."
            % (len(top_loci)))
        top_loci_in_cumu_pos = []
        no_of_loci_skipped = 0
        compareIns = CNVCompareBySmallOverlapRatio(
            min_reciprocal_overlap=0.0000001)
        for top_locus in top_loci:
            chr = top_locus.chromosome
            segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=chr, \
                span_ls=[top_locus.position, top_locus.stop_position], \
                min_reciprocal_overlap=0.00000000000001,)
            node_ls = []
            chrSpan2cumuStartRBDict.findNodes(segmentKey,
                                              node_ls=node_ls,
                                              compareIns=compareIns)
            if len(node_ls) == 0:
                no_of_loci_skipped += 1
            for node in node_ls:
                overlapData = get_overlap_ratio(
                    segmentKey.span_ls, [node.key.start, node.key.stop])
                overlapFraction1 = overlapData.overlapFraction1
                overlapFraction2 = overlapData.overlapFraction2
                overlap_length = overlapData.overlap_length
                overlap_start_pos = overlapData.overlap_start_pos
                overlap_stop_pos = overlapData.overlap_stop_pos

                cumu_start = overlap_start_pos - node.key.start + 1 + node.value  #overlap_start_pos is in normal genome coordinates.
                cumu_stop = overlap_stop_pos - node.key.start + 1 + node.value
                top_loci_in_cumu_pos.append([cumu_start, cumu_stop])
        sys.stderr.write("%s loci skipped. now %s loci.\n" %
                         (no_of_loci_skipped, len(top_loci_in_cumu_pos)))
        return top_loci_in_cumu_pos
コード例 #7
0
	def translateCumuPosIntoChrPos(self, top_loci_in_cumu_pos, cumuSpan2ChrRBDict):
		"""
		2011-3-16
		"""
		top_loci = []
		for span in top_loci_in_cumu_pos:
			cumu_start, cumu_stop = span[:3]
			segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=0, \
							span_ls=[cumu_start, cumu_stop], \
							min_reciprocal_overlap=0.00000000000001,)
							#2010-8-17 overlapping keys are regarded as separate instances as long as they are identical.
			node = cumuSpan2ChrRBDict.findNode(segmentKey)
			if node is None:
				sys.stderr.write("(%s, %s) not found in cumuSpan2ChrRBDict.\n"%(cumu_start, cumu_stop))
			else:
				chr = str(node.value[0]) 	#chr in chr_id2cumu_start is of type "str"
				start = cumu_start - node.key.span_ls[0] + 1
				stop = cumu_stop - node.key.span_ls[0] + 1
				top_loci.append([chr, start, stop])
		return top_loci
コード例 #8
0
    def constructPeakOverlapGraph(self,
                                  resultPeakRBDictList=[],
                                  genomeRBDict=None,
                                  candidate_gene_set=None,
                                  outputFname=None):
        """
		2012.3.16
			make sure each edge is marked with a flag whether it's across two different call methods.
			
			for each component
				1. get the final span (chr, start, stop)
				2. check if any candidate gene is within or touches upon.
				3. check if any edge is across two different call methods,
					which means they are overlapping peaks via two different call methods.
			
		"""
        sys.stderr.write("Constructing result peak overlap graph ...")
        g = nx.Graph()

        compareIns = CNVCompare(
            min_reciprocal_overlap=0.0000001)  #any overlap is an overlap
        no_of_peaks_not_in_result2 = 0
        overlap_ls = []
        counter = 0
        no_of_results = len(resultPeakRBDictList)
        for i in xrange(no_of_results):
            for j in xrange(i + 1, no_of_results):
                result1_peakRBDict = resultPeakRBDictList[i]
                result2_peakRBDict = resultPeakRBDictList[j]
                for queryNode in result1_peakRBDict:
                    g.add_node(queryNode.value[0].id, chromosome=queryNode.key.chromosome, \
                       span_ls=[queryNode.key.start, queryNode.key.stop], \
                       call_method_id_ls=[queryNode.value[0].result.call_method_id],\
                       phenotype_method_id_ls = [queryNode.value[0].result.phenotype_method_id]) #add this node first, could be singleton
                    counter += 1
                    segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=queryNode.key.chromosome, \
                        span_ls=[queryNode.key.start, queryNode.key.stop], \
                        min_reciprocal_overlap=0.0000001, ) #min_reciprocal_overlap doesn't matter here.
                    # it's decided by compareIns.
                    node_ls = []
                    result2_peakRBDict.findNodes(segmentKey,
                                                 node_ls=node_ls,
                                                 compareIns=compareIns)
                    total_perc_overlapped_by_result2 = 0.
                    for node in node_ls:
                        overlapData = get_overlap_ratio(
                            segmentKey.span_ls,
                            [node.key.start, node.key.stop])
                        overlapFraction1 = overlapData.overlapFraction1
                        overlapFraction2 = overlapData.overlapFraction2
                        overlap_length = overlapData.overlap_length
                        overlap_start_pos = overlapData.overlap_start_pos
                        overlap_stop_pos = overlapData.overlap_stop_pos

                        total_perc_overlapped_by_result2 += overlapFraction1
                        g.add_edge(queryNode.value[0].id, node.value[0].id, chromosome=queryNode.key.chromosome, \
                          span_ls=[min(queryNode.key.start, node.key.start), max(queryNode.key.stop, node.key.stop)], \
                          call_method_id_ls=[queryNode.value[0].result.call_method_id, node.value[0].result.call_method_id],\
                          phenotype_method_id_ls = [queryNode.value[0].result.phenotype_method_id, \
                                node.value[0].result.phenotype_method_id])
                    if total_perc_overlapped_by_result2 == 0:
                        no_of_peaks_not_in_result2 += 1
                        overlap_ls.append(-0.5)
                    else:
                        overlap_ls.append(total_perc_overlapped_by_result2)
        sys.stderr.write("%s nodes. %s edges. %s connected components.\n"%(g.number_of_nodes(), g.number_of_edges(), \
                     nx.number_connected_components(g)))

        sys.stderr.write("Outputting overlap regions ...")
        writer = csv.writer(open(outputFname, 'w'), delimiter='\t')
        header = [
            'chromosome', 'start', 'stop', 'phenotype_id', 'fileNamePrefix'
        ]
        writer.writerow(header)
        no_of_output = 0
        for cc in nx.connected_components(g):
            chromosome = None
            min_start = None
            max_stop = None
            call_method_id_set = set()
            phenotype_method_id_set = set()
            sg = nx.subgraph(g, cc)
            if len(cc) == 1:  #only one node, no edges
                node_id = cc[0]
                nodeData = sg.node[node_id]
                min_start, max_stop = nodeData['span_ls']
                chromosome = nodeData['chromosome']
                call_method_id_set = set(nodeData['call_method_id_ls'])
                phenotype_method_id_set = set(
                    nodeData['phenotype_method_id_ls'])
            else:
                for e in sg.edges_iter(
                        data=True
                ):  #data=True, return edge attribute dict in 3-tuple (u,v,data).
                    edge_data = e[2]
                    chromosome = edge_data['chromosome']
                    call_method_id_set = call_method_id_set.union(
                        set(edge_data['call_method_id_ls']))
                    phenotype_method_id_set = phenotype_method_id_set.union(
                        set(edge_data['phenotype_method_id_ls']))
                    span_ls = edge_data['span_ls']
                    if min_start is None:
                        min_start = span_ls[0]
                    else:
                        min_start = min(min_start, span_ls[0])
                    if max_stop is None:
                        max_stop = span_ls[1]
                    else:
                        max_stop = max(max_stop, span_ls[1])
            #2012.3.27 don't extend the box before checking for overlap with candidate genes.
            #min_start = max(1, min_start-genomeRBDict.genePadding)	#to extend so that candidate gene could be seen
            #max_stop = max_stop + genomeRBDict.genePadding	#to extend so that candidate gene could be seen

            #check whether a candidate gene is within this
            segmentKey = CNVSegmentBinarySearchTreeKey(chromosome=str(chromosome), \
                span_ls=[min_start, max_stop], \
                min_reciprocal_overlap=0.0000001, ) #min_reciprocal_overlap doesn't matter here.
            # it's decided by compareIns.
            node_ls = []
            genomeRBDict.findNodes(segmentKey,
                                   node_ls=node_ls,
                                   compareIns=compareIns)
            nearCandidateGene = False
            near_peak_candidate_gene_id_list = []
            for node in node_ls:
                geneSegKey = node.key
                for oneGeneData in node.value:
                    if oneGeneData.gene_id in candidate_gene_set:
                        nearCandidateGene = True
                        near_peak_candidate_gene_id_list.append(
                            oneGeneData.ncbi_gene_id
                        )  #use ncbi gene id instead
                        min_start = min(
                            min_start, geneSegKey.span_ls[0]
                        )  #2012.3.27 adjust to include the full length of the gene
                        max_stop = max(max_stop, geneSegKey.span_ls[1])
            near_peak_candidate_gene_id_list.sort()
            near_peak_candidate_gene_id_list = map(
                str, near_peak_candidate_gene_id_list)

            fileNamePrefixLs = []
            if len(call_method_id_set) > 1:
                fileNamePrefixLs.append('olp')
            else:
                call_method_id = call_method_id_set.pop()
                fileNamePrefixLs.append('onlyCall%s' % (call_method_id))
            if nearCandidateGene:
                fileNamePrefixLs.append(
                    'cand_%s' % ('_'.join(near_peak_candidate_gene_id_list)))
            else:
                fileNamePrefixLs.append("nonCand")
            fileNamePrefixLs.append("chr%s_%s_%s" %
                                    (chromosome, min_start, max_stop))
            peak_id_ls_str = map(str, cc)
            fileNamePrefixLs.append("peak_id_%s" % ('_'.join(peak_id_ls_str)))

            fileNamePrefix = '_'.join(fileNamePrefixLs)
            for phenotype_id in phenotype_method_id_set:
                data_row = [
                    chromosome, min_start, max_stop, phenotype_id,
                    'pheno_%s_%s' % (phenotype_id, fileNamePrefix)
                ]
                writer.writerow(data_row)
                no_of_output += 1
        del writer
        sys.stderr.write("%s lines outputted.\n" % (no_of_output))
コード例 #9
0
    def splitOverlappingOfTwoKeys(self,
                                  key1,
                                  key2,
                                  min_reciprocal_overlap=0.000000001):
        """
		2010-8-2
		"""
        if key1.chromosome != key2.chromosome:  #no overlapping between two. it's a bug if here is reached.
            return [key1, key2]
        # make sure key1 is ahead of key2
        if key1.span_ls[0] > key2.span_ls[0]:
            tmp = key2
            key2 = key1
            key1 = tmp

        keysAfterSplit = []
        if key1.span_ls[0] == key2.span_ls[0]:  #start from the same position
            if key1.span_ls[1] == key2.span_ls[1]:  #end at the same position
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key1.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,
                       frequency=key1.frequency+key2.frequency)
                keysAfterSplit.append(splitKey1)
            elif key1.span_ls[1] < key2.span_ls[1]:
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key1.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\
                       frequency=key1.frequency+key2.frequency)
                keysAfterSplit.append(splitKey1)
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.stop+1, key2.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key2.parent_cnv_id_ls,\
                       frequency=key2.frequency)
                keysAfterSplit.append(splitKey1)
            elif key1.span_ls[1] > key2.span_ls[1]:
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.start, key2.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\
                       frequency=key1.frequency+key2.frequency)
                keysAfterSplit.append(splitKey1)
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.stop+1, key1.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls,\
                       frequency=key1.frequency)
                keysAfterSplit.append(splitKey1)
        else:  #key1 is ahead of key2
            if key1.span_ls[1] == key2.span_ls[1]:  #end at the same position
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key2.start-1], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls,\
                       frequency=key1.frequency)
                keysAfterSplit.append(splitKey1)
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.start, key1.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\
                       frequency=key1.frequency+key2.frequency)
                keysAfterSplit.append(splitKey1)
            elif key1.span_ls[1] < key2.span_ls[1]:
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key2.start-1], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls,\
                       frequency = key1.frequency)
                keysAfterSplit.append(splitKey1)
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.start, key1.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\
                       frequency = key1.frequency+key2.frequency)
                keysAfterSplit.append(splitKey1)
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.stop+1, key2.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key2.parent_cnv_id_ls,\
                       frequency = key2.frequency)
                keysAfterSplit.append(splitKey1)
            elif key1.span_ls[1] > key2.span_ls[1]:
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key1.start, key2.start-1], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls,\
                       frequency = key1.frequency)
                keysAfterSplit.append(splitKey1)
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.start, key2.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls+key2.parent_cnv_id_ls,\
                       frequency = key1.frequency+key2.frequency)
                keysAfterSplit.append(splitKey1)
                splitKey1 = CNVSegmentBinarySearchTreeKey(chromosome=key1.chromosome, span_ls=[key2.stop+1, key1.stop], \
                       min_reciprocal_overlap=min_reciprocal_overlap, \
                       parent_cnv_id_ls = key1.parent_cnv_id_ls,\
                       frequency = key1.frequency)
                keysAfterSplit.append(splitKey1)
        return keysAfterSplit