Example #1
0
    def run_iteration(self, iteration, force=False):
        """Run a single cMonkey iteration

             Keyword arguments:
             iteration -- The iteration number to run
             force     -- Set to true to force recalculations (DEFAULT:FALSE)
        """
        logging.info("Iteration # %d", iteration)
        iteration_result = {'iteration': iteration, 'score_means': {}}
        if force:
            rscores = self.row_scoring.compute_force(iteration_result)
        else:
            rscores = self.row_scoring.compute(iteration_result)
        start_time = util.current_millis()

        if force:
            cscores = self.column_scoring.compute_force(iteration_result)
        else:
            cscores = self.column_scoring.compute(iteration_result)

        elapsed = util.current_millis() - start_time
        if elapsed > 0.0001:
            logging.debug("computed column_scores in %f s.", elapsed / 1000.0)

        self.membership().update(self.ratios, rscores, cscores,
                                 self['num_iterations'], iteration_result)

        mean_net_score = 0.0
        mean_mot_pvalue = 0.0
        if 'networks' in iteration_result.keys():
            mean_net_score = iteration_result['networks']
        mean_mot_pvalue = "NA"
        if 'motif-pvalue' in iteration_result.keys():
            mean_mot_pvalue = ""
            mean_mot_pvalues = iteration_result['motif-pvalue']
            mean_mot_pvalue = ""
            for seqtype in mean_mot_pvalues.keys():
                mean_mot_pvalue = mean_mot_pvalue + (" '%s' = %f" % (seqtype, mean_mot_pvalues[seqtype]))

        logging.debug('mean net = %s | mean mot = %s', str(mean_net_score), mean_mot_pvalue)

        # Reduce I/O, will write the results to database only on a debug run
        if not self['minimize_io']:
            if iteration == 1 or (iteration % self['result_freq'] == 0):
                self.write_results(iteration_result)

        # This should not be too much writing, so we can keep it OUT of minimize_io option...?
        if iteration == 1 or (iteration % self['stats_freq'] == 0):
            self.write_stats(iteration_result)
            self.update_iteration(iteration)

        if 'dump_results' in self['debug'] and (iteration == 1 or
                                                (iteration % self['debug_freq'] == 0)):
            # write complete result into a cmresults.tsv
            conn = self.__dbconn()
            path =  os.path.join(self['output_dir'], 'cmresults-%04d.tsv.bz2' % iteration)
            with bz2.BZ2File(path, 'w') as outfile:
                debug.write_iteration(conn, outfile, iteration,
                                      self['num_clusters'], self['output_dir'])
Example #2
0
    def run_iteration(self, iteration, force=False):
        """Run a single cMonkey iteration

             Keyword arguments:
             iteration -- The iteration number to run
             force     -- Set to true to force recalculations (DEFAULT:FALSE)
        """
        logging.info("Iteration # %d", iteration)
        iteration_result = {'iteration': iteration, 'score_means': {}}
        if force:
            rscores = self.row_scoring.compute_force(iteration_result)
        else:
            rscores = self.row_scoring.compute(iteration_result)
        start_time = util.current_millis()

        if force:
            cscores = self.column_scoring.compute_force(iteration_result)
        else:
            cscores = self.column_scoring.compute(iteration_result)

        elapsed = util.current_millis() - start_time
        if elapsed > 0.0001:
            logging.debug("computed column_scores in %f s.", elapsed / 1000.0)

        self.membership().update(self.ratios, rscores, cscores,
                                 self.config_params['num_iterations'], iteration_result)

        mean_net_score = 0.0
        mean_mot_pvalue = 0.0
        if 'networks' in iteration_result.keys():
            mean_net_score = iteration_result['networks']
        mean_mot_pvalue = "NA"
        if 'motif-pvalue' in iteration_result.keys():
            mean_mot_pvalue = ""
            mean_mot_pvalues = iteration_result['motif-pvalue']
            mean_mot_pvalue = ""
            for seqtype in mean_mot_pvalues.keys():
                mean_mot_pvalue = mean_mot_pvalue + (" '%s' = %f" % (seqtype, mean_mot_pvalues[seqtype]))

        logging.debug('mean net = %s | mean mot = %s', str(mean_net_score), mean_mot_pvalue)

        # Reduce I/O, will write the results to database only on a debug run
        if not self.config_params['minimize_io']:
            if iteration == 1 or (iteration % self.config_params['result_freq'] == 0):
                self.write_results(iteration_result)

        # This should not be too much writing, so we can keep it OUT of minimize_io option...?
        if iteration == 1 or (iteration % self.config_params['stats_freq'] == 0):
            self.write_stats(iteration_result)
            self.update_iteration(iteration)

        if 'dump_results' in self.config_params['debug'] and (iteration == 1 or
                                                              (iteration % self.config_params['debug_freq'] == 0)):
            # write complete result into a cmresults.tsv
            session = self.__dbsession()
            path =  os.path.join(self.config_params['output_dir'], 'cmresults-%04d.tsv.bz2' % iteration)
            with bz2.BZ2File(path, 'w') as outfile:
                debug.write_iteration(session, outfile, iteration,
                                      self.config_params['num_clusters'], self.config_params['output_dir'])
Example #3
0
    def membership(self):
        if self.__membership is None:
            logging.debug("creating and seeding memberships")
            self.__membership = self.__make_membership()

            # debug: write seed into an analytical file for iteration 0
            if 'random_seed' in self.config_params['debug']:
                self.write_memberships(0)
                # write complete result into a cmresults.tsv
                path =  os.path.join(self.config_params['output_dir'], 'cmresults-0000.tsv.bz2')
                with bz2.BZ2File(path, 'w') as outfile:
                    debug.write_iteration(self.__dbsession, outfile, 0,
                                          self.config_params['num_clusters'], self.config_params['output_dir'])

        return self.__membership
Example #4
0
    def membership(self):
        if self.__membership is None:
            logging.debug("creating and seeding memberships")
            self.__membership = self.__make_membership()

            # debug: write seed into an analytical file for iteration 0
            if 'random_seed' in self.config_params['debug']:
                self.write_memberships(0)
                # write complete result into a cmresults.tsv
                path =  os.path.join(self.config_params['output_dir'], 'cmresults-0000.tsv.bz2')
                with bz2.BZ2File(path, 'w') as outfile:
                    debug.write_iteration(self.dbsession(), outfile, 0,
                                          self.config_params['num_clusters'], self.config_params['output_dir'])

        return self.__membership
Example #5
0
    def run_iterations(self, start_iter=None, num_iter=None):
        if start_iter is None:
            start_iter = self['start_iteration']
        if num_iter is None:
            num_iter=self['num_iterations'] + 1

        if self.config_params['interactive']:  # stop here in interactive mode
            return

        for iteration in range(start_iter, num_iter):
            start_time = util.current_millis()
            force = self['resume'] and iteration == start_iter
            self.run_iteration(iteration, force=force)

            # garbage collection after everything in iteration went out of scope
            gc.collect()
            elapsed = util.current_millis() - start_time
            logging.debug("performed iteration %d in %f s.", iteration, elapsed / 1000.0)

            if 'profile_mem' in self['debug'] and (iteration == 1 or iteration % 100 == 0):
                with open(os.path.join(self['output_dir'], 'memprofile.tsv'), 'a') as outfile:
                    self.write_mem_profile(outfile, iteration)


        """run post processing after the last iteration. We store the results in
        num_iterations + 1 to have a clean separation"""
        if self['postadjust']:
            logging.info("Postprocessing: Adjusting the clusters....")
            # run combiner using the weights of the last iteration

            rscores = self.row_scoring.combine_cached(self['num_iterations'])
            rd_scores = memb.get_row_density_scores(self.membership(), rscores)
            logging.info("Recomputed combined + density scores.")
            memb.postadjust(self.membership(), rd_scores)

            BSCM_obj = self.column_scoring.get_BSCM()
            if not (BSCM_obj is None):
                new_membership = BSCM_obj.resplit_clusters(self.membership(), cutoff=0.05)

            logging.info("Adjusted. Now re-run scoring (iteration: %d)",
                         self['num_iterations'])
            iteration_result = {'iteration': self['num_iterations'] + 1,
                                'score_means': {}}

            combined_scores = self.row_scoring.compute_force(iteration_result)

            # write the combined scores for benchmarking/diagnostics
            with open(self.combined_rscores_pickle_path(), 'wb') as outfile:
                pickle.dump(combined_scores, outfile)

            self.write_results(iteration_result)
            self.write_stats(iteration_result)
            self.update_iteration(iteration)

            # default behaviour:
            # always write complete result into a cmresults.tsv for R/cmonkey
            # compatibility
            conn = self.__dbconn()
            path =  os.path.join(self['output_dir'], 'cmresults-postproc.tsv.bz2')
            with bz2.BZ2File(path, 'w') as outfile:
                debug.write_iteration(conn, outfile,
                                      self['num_iterations'] + 1,
                                      self['num_clusters'], self['output_dir'])
            # TODO: Why is conn never closed?  Where does it write to the db?

            # additionally: run tomtom on the motifs if requested
            if (self['MEME']['global_background'] == 'True' and
                self['Postprocessing']['run_tomtom'] == 'True'):
                meme.run_tomtom(conn, self['output_dir'], self['MEME']['version'])

        self.write_finish_info()
        logging.info("Done !!!!")
    def run_iterations(self, start_iter=None, num_iter=None):
        if start_iter is None:
            start_iter = self['start_iteration']
        if num_iter is None:
            num_iter = self['num_iterations'] + 1

        if self.config_params['interactive']:  # stop here in interactive mode
            return

        for iteration in range(start_iter, num_iter):
            start_time = util.current_millis()
            force = self['resume'] and iteration == start_iter
            self.run_iteration(iteration, force=force)

            # garbage collection after everything in iteration went out of scope
            gc.collect()
            elapsed = util.current_millis() - start_time
            logging.debug("performed iteration %d in %f s.", iteration,
                          elapsed / 1000.0)

            if 'profile_mem' in self['debug'] and (iteration == 1
                                                   or iteration % 100 == 0):
                with open(os.path.join(self['output_dir'], 'memprofile.tsv'),
                          'a') as outfile:
                    self.write_mem_profile(outfile, iteration)
        """run post processing after the last iteration. We store the results in
        num_iterations + 1 to have a clean separation"""
        if self['postadjust']:
            logging.info("Postprocessing: Adjusting the clusters....")
            # run combiner using the weights of the last iteration

            rscores = self.row_scoring.combine_cached(self['num_iterations'])
            rd_scores = memb.get_row_density_scores(self.membership(), rscores)
            logging.info("Recomputed combined + density scores.")
            memb.postadjust(self.membership(), rd_scores)

            BSCM_obj = self.column_scoring.get_BSCM()
            if not (BSCM_obj is None):
                new_membership = BSCM_obj.resplit_clusters(self.membership(),
                                                           cutoff=0.05)

            logging.info("Adjusted. Now re-run scoring (iteration: %d)",
                         self['num_iterations'])
            iteration_result = {
                'iteration': self['num_iterations'] + 1,
                'score_means': {}
            }

            combined_scores = self.row_scoring.compute_force(iteration_result)

            # write the combined scores for benchmarking/diagnostics
            with open(self.combined_rscores_pickle_path(), 'wb') as outfile:
                pickle.dump(combined_scores, outfile)

            self.write_results(iteration_result)
            self.write_stats(iteration_result)
            self.update_iteration(iteration)

            # default behaviour:
            # always write complete result into a cmresults.tsv for R/cmonkey
            # compatibility
            conn = self.__dbconn()
            path = os.path.join(self['output_dir'],
                                'cmresults-postproc.tsv.bz2')
            with bz2.BZ2File(path, 'w') as outfile:
                debug.write_iteration(conn, outfile,
                                      self['num_iterations'] + 1,
                                      self['num_clusters'], self['output_dir'])
            # TODO: Why is conn never closed?  Where does it write to the db?

            # additionally: run tomtom on the motifs if requested
            if (self['MEME']['global_background'] == 'True'
                    and self['Postprocessing']['run_tomtom'] == 'True'):
                meme.run_tomtom(conn, self['output_dir'],
                                self['MEME']['version'])

        self.write_finish_info()
        logging.info("Done !!!!")