Python CompareFusionsBySpanningGenesの例

プログラミング言語: Python

名前空間/パッケージ名: CompareFusionsBySpanningGenes

hotexamples.comのコード掲載数: 4

Python CompareFusionsBySpanningGenes - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのCompareFusionsBySpanningGenes.CompareFusionsBySpanningGenesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

CompareFusionsBySpanningGenes(2)

find_overlap(1)

match_fusions(1)

コード例 #1

ファイルを表示

ファイル: FusionDetectionExperiment.py プロジェクト: xtmgah/fuma

	def remove_duplicates(self,args):
		"""
		- First create a table of those that overlap
		- Then create merged entries based on the overlap matrix
		"""
		if(not self.genes_spanning_left_junction or not self.genes_spanning_right_junction):
			raise Exception("Gene annotations on dataset '"+self.name+"' were not found")
		else:
			old_count = len(self)
			if(self.name.find("vs.") == -1):
				self.logger.info("Duplication removal: "+self.name+" ("+str(old_count)+" fusions)")
		
		unique_fusions = []
		
		if(args.matching_method in ["overlap","subset","egm"]):
			from CompareFusionsBySpanningGenes import CompareFusionsBySpanningGenes
			overlap = CompareFusionsBySpanningGenes(False,False,args)
		else:
			raise Exception("Unknown overlap method for removing duplicates: '"+args.matching_method+"' for dataset "+self.name)
		
		stats_duplicates = 0
		stats_non_gene_spanning = 0
		
		fusions_to_add = []
		
		for chromosome_left in self.index.items():
			for chromosome_right in chromosome_left[1].items():
				
				all_fusions = chromosome_right[1]
				n = len(all_fusions)
				
				queue = range(n)
				while(len(queue) > 0):
					duplicates = []
					for i in queue:
						fusion_1 = all_fusions[i]
						if(fusion_1):
							is_duplicate = False
							if(len(fusion_1.get_annotated_genes_left()) == 0 or len(fusion_1.get_annotated_genes_right()) == 0):
								stats_non_gene_spanning += 1
								all_fusions[i] = False
							else:
								for j in range(i+1,n):
									fusion_2 = all_fusions[j]
									if(fusion_2):
										match = overlap.match_fusions(fusion_1,fusion_2,False)
										
										if(match):
											fusion_1 = match
											all_fusions[i] = match
											all_fusions[j] = False
											is_duplicate = True
								
								if(is_duplicate):
									duplicates.append(i)
								else:
									unique_fusions.append(fusion_1)
					queue = duplicates
				
				for fusion in all_fusions:
					if(fusion):
						fusions_to_add.append(fusion)
		
		self.flush()
		for fusion in fusions_to_add:
			self.add_fusion(fusion)
		
		if(self.name.find("vs.") == -1):
			self.logger.info("* Full: "+str(old_count))
			self.logger.info("* Gene-spanning: "+str(old_count-stats_non_gene_spanning))
			self.logger.info("* Unique: "+str(len(self)))
		
		return len(self)

コード例 #2

ファイルを表示

ファイル: OverlapComplex.py プロジェクト: xflicsu/fuma

    def overlay_fusions(self, sparse=True, export_dir=False, args=None):
        """
		The SPARSE variable should only be True if the outpot format
		is 'summary', because all the overlap objects are removed.
		This makes the algorithm much more effictent (reduces space
		complexity from 0.5(n^2) => 2n).
		"""
        n = len(self.datasets)

        self.logger.info("Determining the overlap of fusion genes in " +
                         str(n) + " datasets")

        self.matrix_tmp = {}

        for i in range(len(self.datasets)):
            self.matrix_tmp[str(i + 1)] = self.datasets[i]

        #comparisons = self.find_combination_table(n)
        if (args.format == "list" and export_dir != False):
            if args.long_gene_size > 0:
                large_genes = "Spans large gene (>" + str(
                    args.long_gene_size) + "bp)"
            else:
                large_genes = "Spans large gene (feature disabled)"

            export_dir.write("Left-genes\tRight-genes\t" + large_genes + "\t" +
                             "\t".join(self.dataset_names) + "\n")

        ri = 0
        for r in self.find_combination_table(len(self.datasets)):
            r_0 = self.find_combination_table_r_i(len(self.datasets), ri, 0)

            # First cleanup the memory - reduces space complexity from 0.5(n^2) => 2n. In addition, memory should decrease in time
            dont_remove = []
            matches_this_iteration = set([])

            #for c in r:
            #keys = self.create_keys(c)

            #dont_remove.append(keys[0])
            #dont_remove.append(keys[1])

            #if(args.format != "list"):
            #for candidate in self.matrix_tmp.keys():
            #if candidate not in dont_remove:
            #del(self.matrix_tmp[candidate])

            # Then run analysis
            for c in r:
                keys = self.create_keys(c)

                comparison = CompareFusionsBySpanningGenes(
                    self.matrix_tmp[keys[0]], self.matrix_tmp[keys[1]], args)
                matches = comparison.find_overlap()
                matches_this_iteration = matches_this_iteration | matches[3]

                if (not sparse and export_dir):
                    if (args.format == "extensive"):
                        matches[0].export_to_CG_Junctions_file(
                            export_dir + "/" + matches[0].name +
                            ".CG-junctions.txt")

                self.matrix_tmp[keys[2]] = matches[0]
                self.matches_total[keys[2]] = len(matches[0])

            if (
                    args.format == "list"
            ):  # Write those that are not marked to go to the next iteration to a file
                if (len(r_0) > 2):
                    for export_key in self.find_combination_table_r(
                            len(self.datasets),
                            ri - 1):  #previous_comparisons:#comparisons[ri-1]:
                        export_key = [str(x) for x in export_key]
                        export_key = '.'.join(export_key)

                        self.matrix_tmp[export_key].export_to_list(
                            export_dir, self.dataset_names,
                            matches_this_iteration, args)
                        del (
                            self.matrix_tmp[export_key]
                        )  ## if this was once in a list to be removed, remove...
                else:
                    for export_key in [
                            str(i + 1) for i in range(len(self.datasets))
                    ]:
                        self.matrix_tmp[export_key].export_to_list(
                            export_dir, self.dataset_names,
                            matches_this_iteration, args)
                        #del(self.matrix_tmp[export_key]) ## if this was once in a list to be removed, remove...

            ri += 1

        if (args.format == "list" and export_dir != False):
            export_key = '.'.join([str(x) for x in r_0])
            self.matrix_tmp[export_key].export_to_list(
                export_dir, self.dataset_names, set([]),
                args)  ## if this was once in a list to be removed, remove...?

        return matches

コード例 #3

ファイルを表示

ファイル: OverlapComplex.py プロジェクト: yhoogstrate/fuma

	def overlay_fusions(self,sparse=True,export_dir=False,args=None):
		"""
		The SPARSE variable should only be True if the outpot format
		is 'summary', because all the overlap objects are removed.
		This makes the algorithm much more effictent (reduces space
		complexity from 0.5(n^2) => 2n).
		"""
		n = len(self.datasets)
		
		self.logger.info("Determining the overlap of fusion genes in "+str(n)+" datasets")
		
		self.matrix_tmp = {}
		
		for i in range(len(self.datasets)):
			self.matrix_tmp[str(i+1)] = self.datasets[i]
		
		#comparisons = self.find_combination_table(n)
		if(args.format=="list" and export_dir != False):
			if args.long_gene_size > 0:
				large_genes = "Spans large gene (>"+str(args.long_gene_size)+"bp)"
			else:
				large_genes = "Spans large gene (feature disabled)"
			
			export_dir.write("Left-genes\tRight-genes\t"+large_genes+"\t"+"\t".join(self.dataset_names)+"\n")
		
		ri = 0
		for r in self.find_combination_table(len(self.datasets)):
			r_0 = self.find_combination_table_r_i(len(self.datasets),ri,0)
			
			# First cleanup the memory - reduces space complexity from 0.5(n^2) => 2n. In addition, memory should decrease in time
			dont_remove = []
			matches_this_iteration = set([])
			
			#for c in r:
				#keys = self.create_keys(c)
				
				#dont_remove.append(keys[0])
				#dont_remove.append(keys[1])
			
			#if(args.format != "list"):
				#for candidate in self.matrix_tmp.keys():
					#if candidate not in dont_remove:
						#del(self.matrix_tmp[candidate])
			
			# Then run analysis
			for c in r:
				keys = self.create_keys(c)
				
				comparison = CompareFusionsBySpanningGenes(self.matrix_tmp[keys[0]],self.matrix_tmp[keys[1]],args)
				matches = comparison.find_overlap()
				matches_this_iteration = matches_this_iteration | matches[3]
				
				if(not sparse and export_dir):
					if(args.format=="extensive"):
						matches[0].export_to_CG_Junctions_file(export_dir+"/"+matches[0].name+".CG-junctions.txt")
				
				self.matrix_tmp[keys[2]] = matches[0]
				self.matches_total[keys[2]] = len(matches[0])
			
			if(args.format=="list"):# Write those that are not marked to go to the next iteration to a file
				if(len(r_0) > 2):
					for export_key in self.find_combination_table_r(len(self.datasets),ri-1):#previous_comparisons:#comparisons[ri-1]:
						export_key = [str(x) for x in export_key]
						export_key = '.'.join(export_key)
						
						self.matrix_tmp[export_key].export_to_list(export_dir,self.dataset_names,matches_this_iteration,args)
						del(self.matrix_tmp[export_key]) ## if this was once in a list to be removed, remove...
				else:
					for export_key in [str(i+1) for i in range(len(self.datasets))]:
						self.matrix_tmp[export_key].export_to_list(export_dir,self.dataset_names,matches_this_iteration,args)
						#del(self.matrix_tmp[export_key]) ## if this was once in a list to be removed, remove...
			
			ri += 1
		
		if(args.format == "list" and export_dir != False):
			export_key = '.'.join([str(x) for x in r_0])
			self.matrix_tmp[export_key].export_to_list(export_dir,self.dataset_names,set([]),args) ## if this was once in a list to be removed, remove...?
		
		return matches

コード例 #4

ファイルを表示

ファイル: FusionDetectionExperiment.py プロジェクト: xflicsu/fuma

    def remove_duplicates(self, args):
        """
		- First create a table of those that overlap
		- Then create merged entries based on the overlap matrix
		"""
        if (not self.genes_spanning_left_junction
                or not self.genes_spanning_right_junction):
            raise Exception("Gene annotations on dataset '" + self.name +
                            "' were not found")
        else:
            old_count = len(self)
            if (self.name.find("vs.") == -1):
                self.logger.info("Duplication removal: " + self.name + " (" +
                                 str(old_count) + " fusions)")

        unique_fusions = []

        if (args.matching_method in ["overlap", "subset", "egm"]):
            from CompareFusionsBySpanningGenes import CompareFusionsBySpanningGenes
            overlap = CompareFusionsBySpanningGenes(False, False, args)
        else:
            raise Exception(
                "Unknown overlap method for removing duplicates: '" +
                args.matching_method + "' for dataset " + self.name)

        stats_duplicates = 0
        stats_non_gene_spanning = 0

        fusions_to_add = []

        for chromosome_left in self.index.items():
            for chromosome_right in chromosome_left[1].items():

                all_fusions = chromosome_right[1]
                n = len(all_fusions)

                queue = range(n)
                while (len(queue) > 0):
                    duplicates = []
                    for i in queue:
                        fusion_1 = all_fusions[i]
                        if (fusion_1):
                            is_duplicate = False
                            if (len(fusion_1.get_annotated_genes_left(False))
                                    == 0 or len(
                                        fusion_1.get_annotated_genes_right(
                                            False)) == 0):
                                stats_non_gene_spanning += 1
                                all_fusions[i] = False
                            else:
                                for j in range(i + 1, n):
                                    fusion_2 = all_fusions[j]
                                    if (fusion_2):
                                        match = overlap.match_fusions(
                                            fusion_1, fusion_2, False)

                                        if (match):
                                            merged_matches = fusion_1.matches | fusion_2.matches

                                            fusion_1.matches = merged_matches
                                            fusion_1.acceptor_donor_direction = match.acceptor_donor_direction
                                            fusion_1.left_strand = match.left_strand
                                            fusion_1.right_strand = match.right_strand
                                            fusion_1.annotated_genes_left = match.annotated_genes_left
                                            fusion_1.annotated_genes_right = match.annotated_genes_right

                                            all_fusions[i] = fusion_1
                                            all_fusions[j] = False
                                            is_duplicate = True

                                            match.prepare_deletion()
                                            del (match)

                                if (is_duplicate):
                                    duplicates.append(i)
                                else:
                                    unique_fusions.append(fusion_1)
                    queue = duplicates

                for fusion in all_fusions:
                    if (fusion):
                        fusions_to_add.append(fusion)

        self.flush()
        for fusion in fusions_to_add:
            self.add_fusion(fusion)

        if (self.name.find("vs.") == -1):
            self.logger.debug("* Full: " + str(old_count))
            self.logger.debug("* Gene-spanning: " +
                              str(old_count - stats_non_gene_spanning))
            self.logger.debug("* Unique: " + str(len(self)))

        return len(self)