Ejemplo n.º 1
0
    def add_result(self, cfg, model, result):
        """
        We get the result class from raxml or phyml. We need to transform this
        into a numpy record, and then store it locally, and in the database
        """
        K = float(cfg.processor.models.get_num_params(model))
        n = float(len(self.column_set))
        lnL = float(result.lnl)
        aic = get_aic(lnL, K)
        bic = get_bic(lnL, K, n)
        aicc = get_aicc(lnL, K, n)

        result.subset_id = self.subset_id
        result.model_id = model
        result.params = K
        result.aic = aic
        result.aicc = aicc
        result.bic = bic

        # Now assign the data record in the result into the current model
        # result and save it to the database
        self.result_array[self.result_current] = result._data
        cfg.database.save_result(self, self.result_current)
        self.result_current += 1

        log.debug(
            "Added model to subset. Model: %s, params: %d, sites:%d, lnL:%.2f, site_rate %f"
            % (model, K, n, lnL, result.site_rate))
Ejemplo n.º 2
0
    def add_result(self, cfg, model, result):
        """
        We get the result class from raxml or phyml. We need to transform this
        into a numpy record, and then store it locally, and in the database
        """
        K = float(cfg.processor.models.get_num_params(model))
        n = float(len(self.column_set))
        lnL = float(result.lnl)
        aic = get_aic(lnL, K)
        bic = get_bic(lnL, K, n)
        aicc = get_aicc(lnL, K, n)

        result.subset_id = self.subset_id
        result.model_id = model
        result.params = K
        result.aic = aic
        result.aicc = aicc
        result.bic = bic

        # Now assign the data record in the result into the current model
        # result and save it to the database
        self.result_array[self.result_current] = result._data
        cfg.database.save_result(self, self.result_current)
        self.result_current += 1

        log.debug("Added model to subset. Model: %s, params: %d, sites:%d, lnL:%.2f, site_rate %f"
                  % (model, K, n, lnL, result.site_rate))
Ejemplo n.º 3
0
def subset_list_score(list_of_subsets, the_config, alignment):
    """Takes a list of subsets and return the aic, aicc, or bic score"""

    lnL, sum_k, subs_len = subset_list_stats(list_of_subsets, the_config, alignment)

    if the_config.model_selection == 'aic':
        return get_aic(lnL, sum_k)
    elif the_config.model_selection == 'aicc':
        return get_aicc(lnL, sum_k, subs_len)
    elif the_config.model_selection == 'bic':
        return get_bic(lnL, sum_k, subs_len)
Ejemplo n.º 4
0
def subset_list_score(list_of_subsets, the_config, alignment):
    """Takes a list of subsets and return the aic, aicc, or bic score"""

    lnL, sum_k, subs_len = subset_list_stats(list_of_subsets, the_config,
                                             alignment)

    if the_config.model_selection == 'aic':
        return get_aic(lnL, sum_k)
    elif the_config.model_selection == 'aicc':
        return get_aicc(lnL, sum_k, subs_len)
    elif the_config.model_selection == 'bic':
        return get_bic(lnL, sum_k, subs_len)
Ejemplo n.º 5
0
    def __init__(self, sch, nseq, branchlengths, model_selection):
        self.scheme_name = sch.name
        self.scheme = sch
        self.model_selection = model_selection

        # Calculate AIC, BIC, AICc for each scheme.
        # How you do this depends on whether brlens are linked or not.
        self.nsubs = len(sch.subsets)  # number of subsets
        sum_subset_k = sum([
            s.best_params for s in sch
        ])  # sum of number of parameters in the best model of each subset

        log.debug("""Calculating number of parameters in scheme.
                  Total parameters from subset models: %d""" % (sum_subset_k))

        if branchlengths == 'linked':  # linked brlens - only one extra parameter per subset
            self.sum_k = sum_subset_k + (self.nsubs - 1) + (
                (2 * nseq) - 3)  # number of parameters in a scheme
            log.debug("Total parameters from brlens: %d" % ((2 * nseq) - 3))
            log.debug("Parameters from subset multipliers: %d" %
                      (self.nsubs - 1))

        elif branchlengths == 'unlinked':  # unlinked brlens - every subset has its own set of brlens
            self.sum_k = sum_subset_k + (self.nsubs * (
                (2 * nseq) - 3))  # number of parameters in a scheme
            log.debug("Total parameters from brlens: %d" % ((2 * nseq) - 3) *
                      self.nsubs)

        else:
            # WTF?
            log.error("Unknown option for branchlengths: %s", branchlengths)
            raise PartitionFinderError

        log.debug("Grand total parameters: %d" % (self.sum_k))

        self.lnl = sum([s.best_lnl for s in sch])
        self.nsites = sum([len(s.column_set) for s in sch])

        K = float(self.sum_k)
        n = float(self.nsites)
        lnL = float(self.lnl)

        log.debug("n: %d\tK: %d\tlnL: %d" % (n, K, lnL))

        self.aic = get_aic(lnL, K)
        self.bic = get_bic(lnL, K, n)
        self.aicc = get_aicc(lnL, K, n)
Ejemplo n.º 6
0
    def __init__(self, sch, nseq, branchlengths, model_selection):
        self.scheme_name = sch.name
        self.scheme = sch
        self.model_selection = model_selection

        # Calculate AIC, BIC, AICc for each scheme.
        # How you do this depends on whether brlens are linked or not.
        self.nsubs = len(sch.subsets)  # number of subsets
        sum_subset_k = sum([s.best_params for s in sch])  # sum of number of parameters in the best model of each subset

        log.debug("""Calculating number of parameters in scheme.
                  Total parameters from subset models: %d""" % (sum_subset_k))

        if branchlengths == 'linked':  # linked brlens - only one extra parameter per subset
            self.sum_k = sum_subset_k + (self.nsubs - 1) + (
                (2 * nseq) - 3)  # number of parameters in a scheme
            log.debug("Total parameters from brlens: %d" %
                      ((2 * nseq) - 3))
            log.debug("Parameters from subset multipliers: %d" %
                      (self.nsubs - 1))

        elif branchlengths == 'unlinked':  # unlinked brlens - every subset has its own set of brlens
            self.sum_k = sum_subset_k + (self.nsubs * (
                (2 * nseq) - 3))  # number of parameters in a scheme
            log.debug("Total parameters from brlens: %d" % ((
                2 * nseq) - 3) * self.nsubs)

        else:
            # WTF?
            log.error("Unknown option for branchlengths: %s", branchlengths)
            raise PartitionFinderError

        log.debug("Grand total parameters: %d" % (self.sum_k))

        self.lnl = sum([s.best_lnl for s in sch])
        self.nsites = sum([len(s.column_set) for s in sch])

        K = float(self.sum_k)
        n = float(self.nsites)
        lnL = float(self.lnl)

        log.debug("n: %d\tK: %d\tlnL: %d" % (n, K, lnL))

        self.aic = get_aic(lnL, K)
        self.bic = get_bic(lnL, K, n)
        self.aicc = get_aicc(lnL, K, n)