コード例 #1
0
ファイル: OverlapComplex.py プロジェクト: yhoogstrate/fuma
	def overlay_fusions(self,sparse=True,export_dir=False,args=None):
		"""
		The SPARSE variable should only be True if the outpot format
		is 'summary', because all the overlap objects are removed.
		This makes the algorithm much more effictent (reduces space
		complexity from 0.5(n^2) => 2n).
		"""
		n = len(self.datasets)
		
		self.logger.info("Determining the overlap of fusion genes in "+str(n)+" datasets")
		
		self.matrix_tmp = {}
		
		for i in range(len(self.datasets)):
			self.matrix_tmp[str(i+1)] = self.datasets[i]
		
		#comparisons = self.find_combination_table(n)
		if(args.format=="list" and export_dir != False):
			if args.long_gene_size > 0:
				large_genes = "Spans large gene (>"+str(args.long_gene_size)+"bp)"
			else:
				large_genes = "Spans large gene (feature disabled)"
			
			export_dir.write("Left-genes\tRight-genes\t"+large_genes+"\t"+"\t".join(self.dataset_names)+"\n")
		
		ri = 0
		for r in self.find_combination_table(len(self.datasets)):
			r_0 = self.find_combination_table_r_i(len(self.datasets),ri,0)
			
			# First cleanup the memory - reduces space complexity from 0.5(n^2) => 2n. In addition, memory should decrease in time
			dont_remove = []
			matches_this_iteration = set([])
			
			#for c in r:
				#keys = self.create_keys(c)
				
				#dont_remove.append(keys[0])
				#dont_remove.append(keys[1])
			
			#if(args.format != "list"):
				#for candidate in self.matrix_tmp.keys():
					#if candidate not in dont_remove:
						#del(self.matrix_tmp[candidate])
			
			# Then run analysis
			for c in r:
				keys = self.create_keys(c)
				
				comparison = CompareFusionsBySpanningGenes(self.matrix_tmp[keys[0]],self.matrix_tmp[keys[1]],args)
				matches = comparison.find_overlap()
				matches_this_iteration = matches_this_iteration | matches[3]
				
				if(not sparse and export_dir):
					if(args.format=="extensive"):
						matches[0].export_to_CG_Junctions_file(export_dir+"/"+matches[0].name+".CG-junctions.txt")
				
				self.matrix_tmp[keys[2]] = matches[0]
				self.matches_total[keys[2]] = len(matches[0])
			
			if(args.format=="list"):# Write those that are not marked to go to the next iteration to a file
				if(len(r_0) > 2):
					for export_key in self.find_combination_table_r(len(self.datasets),ri-1):#previous_comparisons:#comparisons[ri-1]:
						export_key = [str(x) for x in export_key]
						export_key = '.'.join(export_key)
						
						self.matrix_tmp[export_key].export_to_list(export_dir,self.dataset_names,matches_this_iteration,args)
						del(self.matrix_tmp[export_key]) ## if this was once in a list to be removed, remove...
				else:
					for export_key in [str(i+1) for i in range(len(self.datasets))]:
						self.matrix_tmp[export_key].export_to_list(export_dir,self.dataset_names,matches_this_iteration,args)
						#del(self.matrix_tmp[export_key]) ## if this was once in a list to be removed, remove...
			
			ri += 1
		
		if(args.format == "list" and export_dir != False):
			export_key = '.'.join([str(x) for x in r_0])
			self.matrix_tmp[export_key].export_to_list(export_dir,self.dataset_names,set([]),args) ## if this was once in a list to be removed, remove...?
		
		return matches
コード例 #2
0
ファイル: OverlapComplex.py プロジェクト: xflicsu/fuma
    def overlay_fusions(self, sparse=True, export_dir=False, args=None):
        """
		The SPARSE variable should only be True if the outpot format
		is 'summary', because all the overlap objects are removed.
		This makes the algorithm much more effictent (reduces space
		complexity from 0.5(n^2) => 2n).
		"""
        n = len(self.datasets)

        self.logger.info("Determining the overlap of fusion genes in " +
                         str(n) + " datasets")

        self.matrix_tmp = {}

        for i in range(len(self.datasets)):
            self.matrix_tmp[str(i + 1)] = self.datasets[i]

        #comparisons = self.find_combination_table(n)
        if (args.format == "list" and export_dir != False):
            if args.long_gene_size > 0:
                large_genes = "Spans large gene (>" + str(
                    args.long_gene_size) + "bp)"
            else:
                large_genes = "Spans large gene (feature disabled)"

            export_dir.write("Left-genes\tRight-genes\t" + large_genes + "\t" +
                             "\t".join(self.dataset_names) + "\n")

        ri = 0
        for r in self.find_combination_table(len(self.datasets)):
            r_0 = self.find_combination_table_r_i(len(self.datasets), ri, 0)

            # First cleanup the memory - reduces space complexity from 0.5(n^2) => 2n. In addition, memory should decrease in time
            dont_remove = []
            matches_this_iteration = set([])

            #for c in r:
            #keys = self.create_keys(c)

            #dont_remove.append(keys[0])
            #dont_remove.append(keys[1])

            #if(args.format != "list"):
            #for candidate in self.matrix_tmp.keys():
            #if candidate not in dont_remove:
            #del(self.matrix_tmp[candidate])

            # Then run analysis
            for c in r:
                keys = self.create_keys(c)

                comparison = CompareFusionsBySpanningGenes(
                    self.matrix_tmp[keys[0]], self.matrix_tmp[keys[1]], args)
                matches = comparison.find_overlap()
                matches_this_iteration = matches_this_iteration | matches[3]

                if (not sparse and export_dir):
                    if (args.format == "extensive"):
                        matches[0].export_to_CG_Junctions_file(
                            export_dir + "/" + matches[0].name +
                            ".CG-junctions.txt")

                self.matrix_tmp[keys[2]] = matches[0]
                self.matches_total[keys[2]] = len(matches[0])

            if (
                    args.format == "list"
            ):  # Write those that are not marked to go to the next iteration to a file
                if (len(r_0) > 2):
                    for export_key in self.find_combination_table_r(
                            len(self.datasets),
                            ri - 1):  #previous_comparisons:#comparisons[ri-1]:
                        export_key = [str(x) for x in export_key]
                        export_key = '.'.join(export_key)

                        self.matrix_tmp[export_key].export_to_list(
                            export_dir, self.dataset_names,
                            matches_this_iteration, args)
                        del (
                            self.matrix_tmp[export_key]
                        )  ## if this was once in a list to be removed, remove...
                else:
                    for export_key in [
                            str(i + 1) for i in range(len(self.datasets))
                    ]:
                        self.matrix_tmp[export_key].export_to_list(
                            export_dir, self.dataset_names,
                            matches_this_iteration, args)
                        #del(self.matrix_tmp[export_key]) ## if this was once in a list to be removed, remove...

            ri += 1

        if (args.format == "list" and export_dir != False):
            export_key = '.'.join([str(x) for x in r_0])
            self.matrix_tmp[export_key].export_to_list(
                export_dir, self.dataset_names, set([]),
                args)  ## if this was once in a list to be removed, remove...?

        return matches