def run(self): Command.run(self) # Load filter self.filter = LabelFilter() if self.args.filter is not None: self.log.info("Loading filter ...") self.log.debug(" > {0}".format(self.args.filter)) self.filter.load(self.args.filter) self.log.debug(" {0} includes, {1} excludes".format( self.filter.include_count, self.filter.exclude_count)) # Load data self.log.info("Loading data ...") self.log.debug(" > {0}".format(self.args.data_path)) #TODO: Support loading plain matrices: /file.tsv#slice=SIFT self.matrix = tdm.load_matrix(self.args.data_path) self.log.debug(" {0} rows, {1} columns and {2} slices".format( self.matrix.num_rows, self.matrix.num_cols, self.matrix.num_slices)) # Get selected slice indices if self.args.slices is not None: slices = [] for name in self.args.slices.split(","): name = name.strip() if name not in self.matrix.slice_name_index: raise Exception("Slice not found: {0}".format(name)) slices += [self.matrix.slice_name_index[name]] else: slices = range(self.matrix.num_slices) col_names = [self.matrix.slice_names[i] for i in slices] if self.args.save_data: for i in slices: slice_name = self.matrix.slice_names[i] self.log.info("Saving {0} data matrix ...".format(slice_name)) self.save_matrix(self.args.output_path, self.args.analysis_name, self.args.output_format, self.matrix.row_names, self.matrix.col_names, self.matrix.data[i], suffix="data-{0}".format(slice_name)) # GENES --------------------------------------- # One to one mapping for genes map = {} for row_name in self.matrix.row_names: if self.filter.valid(row_name): map[row_name] = (row_name,) genes_mapping = MatrixMapping(self.matrix, map) genes_method_name = "{0}-{1}".format(self.args.estimator, EmpiricalTest.NAME) # Analysis for genes self.log.info("Analysing genes with '{0}' ...".format(genes_method_name)) analysis = OncodriveFmAnalysis( "oncodrivefm.genes", num_samplings = self.args.num_samplings, mut_threshold = self.args.mut_gene_threshold, num_cores=self.args.num_cores) results = analysis.compute(self.matrix, genes_mapping, genes_method_name, slices) method = create_method(genes_method_name) if self.args.save_analysis: self.log.info("Saving genes analysis results ...") self.save_splited_results( self.args.output_path, self.args.analysis_name, self.args.output_format, self.matrix, genes_mapping, method, results, slices, suffix="genes") # Combination for genes self.log.info("Combining analysis results ...") combined_results = method.combine(np.ma.masked_invalid(results.T)) self.log.info("Saving genes combined results ...") self.save_matrix(self.args.output_path, self.args.analysis_name, self.args.output_format, genes_mapping.group_names, method.combination_columns, combined_results.T, params=[("slices", ",".join(col_names)), ("method", method.name)], suffix="genes", valid_row=lambda row: sum([1 if np.isnan(v) else 0 for v in row]) == 0) if self.args.mapping is None: return # PATHWAYS --------------------------------------- # Load pathways mappping self.log.info("Loading pathways mapping ...") self.log.debug(" > {0}".format(self.args.mapping)) pathways_mapping = self.load_mapping(self.matrix, self.args.mapping) self.log.debug(" {0} pathways".format(pathways_mapping.num_groups)) pathways_method_name = "{0}-{1}".format(self.args.estimator, ZscoreTest.NAME) # Analysis for pathways self.log.info("Analysing pathways with '{0}' ...".format(pathways_method_name)) analysis = OncodriveFmAnalysis( "oncodrivefm.pathways", num_samplings = self.args.num_samplings, mut_threshold = self.args.mut_pathway_threshold, num_cores=self.args.num_cores) results = analysis.compute(self.matrix, pathways_mapping, pathways_method_name, slices) method = create_method(pathways_method_name) if self.args.save_analysis: self.log.info("Saving pathways analysis results ...") self.save_splited_results( self.args.output_path, self.args.analysis_name, self.args.output_format, self.matrix, pathways_mapping, method, results, slices, suffix="pathways") # Combination for pathways self.log.info("Combining analysis results ...") combined_results = method.combine(np.ma.masked_invalid(results.T)) self.log.info("Saving pathways combined results ...") self.save_matrix(self.args.output_path, self.args.analysis_name, self.args.output_format, pathways_mapping.group_names, method.combination_columns, combined_results.T, params=[("slices", ",".join(col_names)), ("method", method.name)], suffix="pathways", valid_row=lambda row: sum([1 if np.isnan(v) else 0 for v in row]) == 0)
def run(self): Command.run(self) # Load data self.log.info("Loading data ...") self.log.debug(" > {0}".format(self.args.data_path)) #TODO: Support loading plain matrices: /file.tsv#name=SIFT self.matrix = tdm.load_matrix(self.args.data_path) self.log.debug(" {0} rows, {1} columns and {2} slices".format( self.matrix.num_rows, self.matrix.num_cols, self.matrix.num_slices)) # Load filter self.filter = LabelFilter() if self.args.filter is not None: self.log.info("Loading filter ...") self.log.debug(" > {0}".format(self.args.filter)) self.filter.load(self.args.filter) self.log.debug(" {0} includes, {1} excludes".format( self.filter.include_count, self.filter.exclude_count)) # Load mapping if self.args.mapping is not None: self.log.info("Loading mapping ...") self.log.debug(" > {0}".format(self.args.mapping)) self.mapping = self.load_mapping(self.matrix, self.args.mapping, self.filter) self.log.debug(" {0} features".format(self.mapping.num_groups)) method_name = "{0}-{1}".format(self.args.estimator, ZscoreTest.NAME) else: # One to one mapping map = {} for row_name in self.matrix.row_names: if self.filter.valid(row_name): map[row_name] = (row_name,) self.mapping = MatrixMapping(self.matrix, map) method_name = "{0}-{1}".format(self.args.estimator, EmpiricalTest.NAME) # Get selected slice indices if self.args.slices is not None: slices = [] for name in self.args.slices.split(","): name = name.strip() if name not in self.matrix.slice_name_index: self.log.warn("Skipping slice not found: {0}".format(name)) continue slices += [self.matrix.slice_name_index[name]] else: slices = range(self.matrix.num_slices) col_names = [self.matrix.slice_names[i] for i in slices] if self.args.save_data: for i in slices: slice_name = self.matrix.slice_names[i] self.log.info("Saving {0} data matrix ...".format(slice_name)) self.save_matrix(self.args.output_path, self.args.analysis_name, self.args.output_format, self.matrix.row_names, self.matrix.col_names, self.matrix.data[i], suffix="data-{0}".format(slice_name)) # Run the analysis self.log.info("Running the analysing using '{0}' ...".format(method_name)) analysis = OncodriveFmAnalysis( "oncodrivefm.compute", num_samplings = self.args.num_samplings, mut_threshold = self.args.mut_threshold, num_cores=self.args.num_cores) results = analysis.compute(self.matrix, self.mapping, method_name, slices) method = create_method(method_name) self.log.info("Saving results ...") #TODO: Have an option to save in TDM instead of splited self.save_splited_results( self.args.output_path, self.args.analysis_name, self.args.output_format, self.matrix, self.mapping, method, results, slices)