def run_iteration(self, iteration, force=False): """Run a single cMonkey iteration Keyword arguments: iteration -- The iteration number to run force -- Set to true to force recalculations (DEFAULT:FALSE) """ logging.info("Iteration # %d", iteration) iteration_result = {'iteration': iteration, 'score_means': {}} if force: rscores = self.row_scoring.compute_force(iteration_result) else: rscores = self.row_scoring.compute(iteration_result) start_time = util.current_millis() if force: cscores = self.column_scoring.compute_force(iteration_result) else: cscores = self.column_scoring.compute(iteration_result) elapsed = util.current_millis() - start_time if elapsed > 0.0001: logging.debug("computed column_scores in %f s.", elapsed / 1000.0) self.membership().update(self.ratios, rscores, cscores, self['num_iterations'], iteration_result) mean_net_score = 0.0 mean_mot_pvalue = 0.0 if 'networks' in iteration_result.keys(): mean_net_score = iteration_result['networks'] mean_mot_pvalue = "NA" if 'motif-pvalue' in iteration_result.keys(): mean_mot_pvalue = "" mean_mot_pvalues = iteration_result['motif-pvalue'] mean_mot_pvalue = "" for seqtype in mean_mot_pvalues.keys(): mean_mot_pvalue = mean_mot_pvalue + (" '%s' = %f" % (seqtype, mean_mot_pvalues[seqtype])) logging.debug('mean net = %s | mean mot = %s', str(mean_net_score), mean_mot_pvalue) # Reduce I/O, will write the results to database only on a debug run if not self['minimize_io']: if iteration == 1 or (iteration % self['result_freq'] == 0): self.write_results(iteration_result) # This should not be too much writing, so we can keep it OUT of minimize_io option...? if iteration == 1 or (iteration % self['stats_freq'] == 0): self.write_stats(iteration_result) self.update_iteration(iteration) if 'dump_results' in self['debug'] and (iteration == 1 or (iteration % self['debug_freq'] == 0)): # write complete result into a cmresults.tsv conn = self.__dbconn() path = os.path.join(self['output_dir'], 'cmresults-%04d.tsv.bz2' % iteration) with bz2.BZ2File(path, 'w') as outfile: debug.write_iteration(conn, outfile, iteration, self['num_clusters'], self['output_dir'])
def run_iteration(self, iteration, force=False): """Run a single cMonkey iteration Keyword arguments: iteration -- The iteration number to run force -- Set to true to force recalculations (DEFAULT:FALSE) """ logging.info("Iteration # %d", iteration) iteration_result = {'iteration': iteration, 'score_means': {}} if force: rscores = self.row_scoring.compute_force(iteration_result) else: rscores = self.row_scoring.compute(iteration_result) start_time = util.current_millis() if force: cscores = self.column_scoring.compute_force(iteration_result) else: cscores = self.column_scoring.compute(iteration_result) elapsed = util.current_millis() - start_time if elapsed > 0.0001: logging.debug("computed column_scores in %f s.", elapsed / 1000.0) self.membership().update(self.ratios, rscores, cscores, self.config_params['num_iterations'], iteration_result) mean_net_score = 0.0 mean_mot_pvalue = 0.0 if 'networks' in iteration_result.keys(): mean_net_score = iteration_result['networks'] mean_mot_pvalue = "NA" if 'motif-pvalue' in iteration_result.keys(): mean_mot_pvalue = "" mean_mot_pvalues = iteration_result['motif-pvalue'] mean_mot_pvalue = "" for seqtype in mean_mot_pvalues.keys(): mean_mot_pvalue = mean_mot_pvalue + (" '%s' = %f" % (seqtype, mean_mot_pvalues[seqtype])) logging.debug('mean net = %s | mean mot = %s', str(mean_net_score), mean_mot_pvalue) # Reduce I/O, will write the results to database only on a debug run if not self.config_params['minimize_io']: if iteration == 1 or (iteration % self.config_params['result_freq'] == 0): self.write_results(iteration_result) # This should not be too much writing, so we can keep it OUT of minimize_io option...? if iteration == 1 or (iteration % self.config_params['stats_freq'] == 0): self.write_stats(iteration_result) self.update_iteration(iteration) if 'dump_results' in self.config_params['debug'] and (iteration == 1 or (iteration % self.config_params['debug_freq'] == 0)): # write complete result into a cmresults.tsv session = self.__dbsession() path = os.path.join(self.config_params['output_dir'], 'cmresults-%04d.tsv.bz2' % iteration) with bz2.BZ2File(path, 'w') as outfile: debug.write_iteration(session, outfile, iteration, self.config_params['num_clusters'], self.config_params['output_dir'])
def membership(self): if self.__membership is None: logging.debug("creating and seeding memberships") self.__membership = self.__make_membership() # debug: write seed into an analytical file for iteration 0 if 'random_seed' in self.config_params['debug']: self.write_memberships(0) # write complete result into a cmresults.tsv path = os.path.join(self.config_params['output_dir'], 'cmresults-0000.tsv.bz2') with bz2.BZ2File(path, 'w') as outfile: debug.write_iteration(self.__dbsession, outfile, 0, self.config_params['num_clusters'], self.config_params['output_dir']) return self.__membership
def membership(self): if self.__membership is None: logging.debug("creating and seeding memberships") self.__membership = self.__make_membership() # debug: write seed into an analytical file for iteration 0 if 'random_seed' in self.config_params['debug']: self.write_memberships(0) # write complete result into a cmresults.tsv path = os.path.join(self.config_params['output_dir'], 'cmresults-0000.tsv.bz2') with bz2.BZ2File(path, 'w') as outfile: debug.write_iteration(self.dbsession(), outfile, 0, self.config_params['num_clusters'], self.config_params['output_dir']) return self.__membership
def run_iterations(self, start_iter=None, num_iter=None): if start_iter is None: start_iter = self['start_iteration'] if num_iter is None: num_iter=self['num_iterations'] + 1 if self.config_params['interactive']: # stop here in interactive mode return for iteration in range(start_iter, num_iter): start_time = util.current_millis() force = self['resume'] and iteration == start_iter self.run_iteration(iteration, force=force) # garbage collection after everything in iteration went out of scope gc.collect() elapsed = util.current_millis() - start_time logging.debug("performed iteration %d in %f s.", iteration, elapsed / 1000.0) if 'profile_mem' in self['debug'] and (iteration == 1 or iteration % 100 == 0): with open(os.path.join(self['output_dir'], 'memprofile.tsv'), 'a') as outfile: self.write_mem_profile(outfile, iteration) """run post processing after the last iteration. We store the results in num_iterations + 1 to have a clean separation""" if self['postadjust']: logging.info("Postprocessing: Adjusting the clusters....") # run combiner using the weights of the last iteration rscores = self.row_scoring.combine_cached(self['num_iterations']) rd_scores = memb.get_row_density_scores(self.membership(), rscores) logging.info("Recomputed combined + density scores.") memb.postadjust(self.membership(), rd_scores) BSCM_obj = self.column_scoring.get_BSCM() if not (BSCM_obj is None): new_membership = BSCM_obj.resplit_clusters(self.membership(), cutoff=0.05) logging.info("Adjusted. Now re-run scoring (iteration: %d)", self['num_iterations']) iteration_result = {'iteration': self['num_iterations'] + 1, 'score_means': {}} combined_scores = self.row_scoring.compute_force(iteration_result) # write the combined scores for benchmarking/diagnostics with open(self.combined_rscores_pickle_path(), 'wb') as outfile: pickle.dump(combined_scores, outfile) self.write_results(iteration_result) self.write_stats(iteration_result) self.update_iteration(iteration) # default behaviour: # always write complete result into a cmresults.tsv for R/cmonkey # compatibility conn = self.__dbconn() path = os.path.join(self['output_dir'], 'cmresults-postproc.tsv.bz2') with bz2.BZ2File(path, 'w') as outfile: debug.write_iteration(conn, outfile, self['num_iterations'] + 1, self['num_clusters'], self['output_dir']) # TODO: Why is conn never closed? Where does it write to the db? # additionally: run tomtom on the motifs if requested if (self['MEME']['global_background'] == 'True' and self['Postprocessing']['run_tomtom'] == 'True'): meme.run_tomtom(conn, self['output_dir'], self['MEME']['version']) self.write_finish_info() logging.info("Done !!!!")
def run_iterations(self, start_iter=None, num_iter=None): if start_iter is None: start_iter = self['start_iteration'] if num_iter is None: num_iter = self['num_iterations'] + 1 if self.config_params['interactive']: # stop here in interactive mode return for iteration in range(start_iter, num_iter): start_time = util.current_millis() force = self['resume'] and iteration == start_iter self.run_iteration(iteration, force=force) # garbage collection after everything in iteration went out of scope gc.collect() elapsed = util.current_millis() - start_time logging.debug("performed iteration %d in %f s.", iteration, elapsed / 1000.0) if 'profile_mem' in self['debug'] and (iteration == 1 or iteration % 100 == 0): with open(os.path.join(self['output_dir'], 'memprofile.tsv'), 'a') as outfile: self.write_mem_profile(outfile, iteration) """run post processing after the last iteration. We store the results in num_iterations + 1 to have a clean separation""" if self['postadjust']: logging.info("Postprocessing: Adjusting the clusters....") # run combiner using the weights of the last iteration rscores = self.row_scoring.combine_cached(self['num_iterations']) rd_scores = memb.get_row_density_scores(self.membership(), rscores) logging.info("Recomputed combined + density scores.") memb.postadjust(self.membership(), rd_scores) BSCM_obj = self.column_scoring.get_BSCM() if not (BSCM_obj is None): new_membership = BSCM_obj.resplit_clusters(self.membership(), cutoff=0.05) logging.info("Adjusted. Now re-run scoring (iteration: %d)", self['num_iterations']) iteration_result = { 'iteration': self['num_iterations'] + 1, 'score_means': {} } combined_scores = self.row_scoring.compute_force(iteration_result) # write the combined scores for benchmarking/diagnostics with open(self.combined_rscores_pickle_path(), 'wb') as outfile: pickle.dump(combined_scores, outfile) self.write_results(iteration_result) self.write_stats(iteration_result) self.update_iteration(iteration) # default behaviour: # always write complete result into a cmresults.tsv for R/cmonkey # compatibility conn = self.__dbconn() path = os.path.join(self['output_dir'], 'cmresults-postproc.tsv.bz2') with bz2.BZ2File(path, 'w') as outfile: debug.write_iteration(conn, outfile, self['num_iterations'] + 1, self['num_clusters'], self['output_dir']) # TODO: Why is conn never closed? Where does it write to the db? # additionally: run tomtom on the motifs if requested if (self['MEME']['global_background'] == 'True' and self['Postprocessing']['run_tomtom'] == 'True'): meme.run_tomtom(conn, self['output_dir'], self['MEME']['version']) self.write_finish_info() logging.info("Done !!!!")