Esempio n. 1
0
    def get_samples(self, typ, init, prop_sd, p, data=None, save=False):
        """
        MCMC algorithm for sampling theta and q with log(sigma) parametrisation
        from prior or posterior.
        -----------------------------------------------------------------------
        typ:     "prior" or "post".
        init:    Initialisation for MCMC algorithm.
        prop_sd: Proposal normal standard deviations for MCMC algorithm.
        p:       [p_1, p_2, p_3].
        data:    GEVData for posterior.
        """

        # Number of iterations
        N = 501000

        # Length of burn-in
        burnin = 1000

        if typ == "post":
            self.post["data"] = data

            # Transformation M -> M*
            prior = lambda X: self.pdf(X) * data.theta_annual_det(X)

            target = lambda X: util.logpost(X, prior, data)
        else:
            target = self.pdf

        # Transformation sigma -> log(sigma)
        new_target = util.log_transform(target, util.sig_trans, lambda X: X[1])

        mcmc_obj = util.MCMCSample(new_target, init, prop_sd, N, burnin)

        mcmc_obj.results(para_names=[
            r"$\tilde{\mu}$", r"$\log\tilde{\sigma}$", r"$\tilde{\xi}$"
        ],
                         colour=self.colour,
                         save=save,
                         save_name="%s-%s-%s-%s" %
                         (self.inst_name, *self.hyperpara, typ))

        if typ == "post":
            # Transformation M* -> M
            sample_theta = np.array(
                [data.theta_annual(X) for X in mcmc_obj.sample])
        else:
            sample_theta = mcmc_obj.sample

        getattr(self, typ)["theta"]["sample"] = sample_theta

        # Transformation log(sigma) -> sigma
        sample_theta[:, 1] = np.exp(sample_theta[:, 1])

        sample_q = np.array(
            [util.quantile(theta, 1.0 - p) for theta in sample_theta])

        getattr(self, typ)["q"]["sample"] = sample_q

        getattr(self, typ)["mcmc"] = mcmc_obj
Esempio n. 2
0
 def compute_substitution(cluster_column_scores):
     """calculate substitution value for missing column scores"""
     membership_values = []
     for cluster in xrange(1, num_clusters + 1):
         columns = membership.columns_for_cluster(cluster)
         column_scores = cluster_column_scores[cluster - 1]
         if column_scores is not None:
             colnames, scores = column_scores
             for col in xrange(len(colnames)):
                 if colnames[col] in columns:
                     membership_values.append(scores[col])
     return util.quantile(membership_values, 0.95)
Esempio n. 3
0
 def test_quantile(self):
     """tests the quantile function"""
     data = [1, 2, 3, 4, 5]
     self.assertEquals(1, util.quantile(data, 0))
     self.assertEquals(1.8, util.quantile(data, 0.2))
     self.assertEquals(2, util.quantile(data, 0.25))
     self.assertEquals(3, util.quantile(data, 0.5))
     self.assertEquals(4, util.quantile(data, 0.75))
     self.assertEquals(5, util.quantile(data, 1))
     self.assertTrue(np.isnan(util.quantile([], 0.99)))
Esempio n. 4
0
 def compute_substitution(cluster_column_scores):
     """calculate substitution value for missing column scores"""
     membership_values = []
     for cluster in xrange(1, num_clusters + 1):
         columns = membership.columns_for_cluster(cluster)
         column_scores = cluster_column_scores[cluster - 1]
         if column_scores is not None:
             colnames, scores = column_scores
             for col in xrange(len(colnames)):
                 if colnames[col] in columns:
                     membership_values.append(scores[col])
     return util.quantile(membership_values, 0.95)
Esempio n. 5
0
 def compute_substitution(cluster_column_scores):
     """calculate substitution value for missing column scores"""
     membership_values = []
     for cluster in xrange(1, num_clusters + 1):
         columns = membership.columns_for_cluster(cluster)
         column_scores = cluster_column_scores[cluster - 1]
         if column_scores != None:
             for row in xrange(column_scores.num_rows):
                 for col in xrange(column_scores.num_columns):
                     if column_scores.column_names[col] in columns:
                         membership_values.append(column_scores.values[row][col])
     return util.quantile(membership_values, 0.95)
Esempio n. 6
0
def __quantile_normalize_scores(cluster_row_scores,
                                row_names,
                                membership,
                                num_clusters):
    """quantile normalize the row scores in cluster_row_scores
    that are not NaN or +/-Inf and are in a row cluster membership
    """
    values_for_quantile = []
    for cluster in xrange(1, num_clusters + 1):
        row_scores_for_cluster = cluster_row_scores[cluster - 1]
        cluster_rows = membership.rows_for_cluster(cluster)
        if row_scores_for_cluster != None:
            for row in xrange(len(row_scores_for_cluster)):
                score = row_scores_for_cluster[row]
                gene_name = row_names[row]
                if np.isfinite(score) and (gene_name in cluster_rows):
                    values_for_quantile.append(score)
    return util.quantile(values_for_quantile, 0.95)
Esempio n. 7
0
def combine(result_matrices, score_scalings, membership, iteration,
            config_params):
    """This is  the combining function, taking n result matrices and scalings"""
    quantile_normalize = config_params['quantile_normalize']

    for i, m in enumerate(result_matrices):
        m.fix_extreme_values()
        m.subtract_with_quantile(0.99)

        # debug mode: print scoring matrices before combining
        if ('dump_scores' in config_params['debug']
                and (iteration == 1 or
                     (iteration % config_params['debug_freq'] == 0))):
            funs = config_params['pipeline']['row-scoring']['args'][
                'functions']
            m.write_tsv_file(os.path.join(
                config_params['output_dir'],
                'score-%s-%04d.tsv' % (funs[i]['id'], iteration)),
                             compressed=False)

    if quantile_normalize:
        if len(result_matrices) > 1:
            start_time = util.current_millis()
            result_matrices = dm.quantile_normalize_scores(
                result_matrices, score_scalings)
            elapsed = util.current_millis() - start_time
            logging.debug("quantile normalize in %f s.", elapsed / 1000.0)

        in_matrices = [m.values for m in result_matrices]

    else:
        in_matrices = []
        num_clusters = membership.num_clusters()
        mat = result_matrices[0]
        index_map = {name: index for index, name in enumerate(mat.row_names)}
        # we assume matrix 0 is always the gene expression score
        # we also assume that the matrices are already extreme value
        # fixed
        rsm = []
        for cluster in range(1, num_clusters + 1):
            row_members = sorted(membership.rows_for_cluster(cluster))
            rsm.extend([
                mat.values[index_map[row], cluster - 1] for row in row_members
            ])
        scale = util.mad(rsm)
        if scale == 0:  # avoid that we are dividing by 0
            scale = util.r_stddev(rsm)
        if scale != 0:
            median_rsm = util.median(rsm)
            rsvalues = (mat.values - median_rsm) / scale
            num_rows, num_cols = rsvalues.shape
            rscores = dm.DataMatrix(num_rows,
                                    num_cols,
                                    mat.row_names,
                                    mat.column_names,
                                    values=rsvalues)
            rscores.fix_extreme_values()
        else:
            logging.warn("combiner scaling -> scale == 0 !!!")
            rscores = mat
        in_matrices.append(rscores.values)

        if len(result_matrices) > 1:
            rs_quant = util.quantile(rscores.values, 0.01)
            logging.debug("RS_QUANT = %f", rs_quant)
            for i in range(1, len(result_matrices)):
                values = result_matrices[i].values
                qqq = abs(util.quantile(values, 0.01))
                if qqq == 0:
                    logging.debug(
                        'SPARSE SCORES - %d attempt 1: pick from sorted values',
                        i)
                    qqq = sorted(values.ravel())[9]
                if qqq == 0:
                    logging.debug(
                        'SPARSE SCORES - %d attempt 2: pick minimum value', i)
                    qqq = abs(values.min())
                if qqq != 0:
                    values = values / qqq * abs(rs_quant)
                else:
                    logging.debug('SPARSE SCORES - %d not normalizing!', i)
                in_matrices.append(values)

    if len(result_matrices) > 0:
        start_time = util.current_millis()
        # assuming same format of all matrices
        combined_score = np.zeros(in_matrices[0].shape)
        for i in xrange(len(in_matrices)):
            combined_score += in_matrices[i] * score_scalings[i]

        elapsed = util.current_millis() - start_time
        logging.debug("combined score in %f s.", elapsed / 1000.0)
        matrix0 = result_matrices[0]  # as reference for names
        return dm.DataMatrix(matrix0.num_rows,
                             matrix0.num_columns,
                             matrix0.row_names,
                             matrix0.column_names,
                             values=combined_score)
    else:
        return None
Esempio n. 8
0
 def test_quantile_nan(self):
     """tests the quantile function with NaN"""
     data = [0.2, 0.1, np.nan, 0.3]
     self.assertAlmostEqual(0.102, util.quantile(data, 0.01))
Esempio n. 9
0
 def quantile(self, probability):
     """returns the result of the quantile function over all contained
     values"""
     return util.quantile(self.values.ravel(), probability)
Esempio n. 10
0
def combine(result_matrices, score_scalings, membership, quantile_normalize):
    """This is  the combining function, taking n result matrices and scalings"""
    for m in result_matrices:
        m.fix_extreme_values()

    if quantile_normalize:
        if len(result_matrices) > 1:
            start_time = util.current_millis()
            result_matrices = dm.quantile_normalize_scores(result_matrices,
                                                           score_scalings)
            elapsed = util.current_millis() - start_time
            logging.info("quantile normalize in %f s.", elapsed / 1000.0)

        in_matrices = [m.values for m in result_matrices]

    else:
        in_matrices = []
        num_clusters = membership.num_clusters()
        mat = result_matrices[0]
        index_map = {name: index for index, name in enumerate(mat.row_names)}
        # we assume matrix 0 is always the gene expression score
        # we also assume that the matrices are already extreme value
        # fixed
        rsm = []
        for cluster in range(1, num_clusters + 1):
            row_members = sorted(membership.rows_for_cluster(cluster))
            rsm.extend([mat.values[index_map[row]][cluster - 1]
                        for row in row_members])
        scale = util.mad(rsm)
        if scale == 0:  # avoid that we are dividing by 0
            scale = util.r_stddev(rsm)
        if scale != 0:
            median_rsm = util.median(rsm)
            rsvalues = (mat.values - median_rsm) / scale
            num_rows, num_cols = rsvalues.shape
            rscores = dm.DataMatrix(num_rows, num_cols,
                                    mat.row_names,
                                    mat.column_names,
                                    values=rsvalues)
            rscores.fix_extreme_values()
        else:
            logging.warn("combiner scaling -> scale == 0 !!!")
            rscores = mat
        in_matrices.append(rscores.values)

        if len(result_matrices) > 1:
            rs_quant = util.quantile(rscores.values, 0.01)
            logging.info("RS_QUANT = %f", rs_quant)
            for i in range(1, len(result_matrices)):
                values = result_matrices[i].values
                qqq = abs(util.quantile(values, 0.01))
                #print "qqq(%d) = %f" % (i, qqq)
                if qqq == 0:
                    logging.error("very sparse score !!!")
                values = values / qqq * abs(rs_quant)
                in_matrices.append(values)

    if len(result_matrices) > 0:
        start_time = util.current_millis()
        # assuming same format of all matrices
        combined_score = np.zeros(in_matrices[0].shape)
        for i in xrange(len(in_matrices)):
            combined_score += in_matrices[i] * score_scalings[i]

        elapsed = util.current_millis() - start_time
        logging.info("combined score in %f s.", elapsed / 1000.0)
        matrix0 = result_matrices[0]  # as reference for names
        return dm.DataMatrix(matrix0.num_rows, matrix0.num_columns,
                             matrix0.row_names, matrix0.column_names,
                             values=combined_score)
    else:
        return None
Esempio n. 11
0
# Chooses optimal value of M
data = data.optimal_M(theta[2])

# Plots fit of annual maxima
data.set_obs_in_year(365)
theta_annual = data.theta_annual(theta)
data.fit_GEV(theta=theta_annual, save=save_all)

# Constructing priors #-------------------------------------------------------#
#=============================================================================#

p = np.array([0.1, 0.01, 0.001])

pi = priors.all_priors(p,
                       qu=util.quantile(theta, 1.0 - p),
                       var=[27] * 3,
                       name=study_name)

# MCMC sampling #-------------------------------------------------------------#
#=============================================================================#

for i in range(4):
    if pi[i].prior["proper"]:
        pi[i].get_samples(
            "prior",
            [
                [25.0, 2.0, 0.2],  # k = 3, I copula
                None,
                [25.0, 2.0, 0.0],  # k = 3, ME copula
                None
Esempio n. 12
0
 def quantile(self, probability):
     """returns the result of the quantile function over all contained
     values"""
     return util.quantile(self.values.ravel(), probability)
Esempio n. 13
0
def combine(result_matrices, score_scalings, membership, iteration, config_params):
    """This is  the combining function, taking n result matrices and scalings"""
    quantile_normalize = config_params['quantile_normalize']

    for i, m in enumerate(result_matrices):
        m.fix_extreme_values()
        m.subtract_with_quantile(0.99)

        # debug mode: print scoring matrices before combining
        if ('dump_scores' in config_params['debug'] and
            (iteration == 1 or (iteration % config_params['debug_freq'] == 0))):
            funs = config_params['pipeline']['row-scoring']['args']['functions']
            m.write_tsv_file(os.path.join(config_params['output_dir'], 'score-%s-%04d.tsv' % (funs[i]['id'], iteration)), compressed=False)

    if quantile_normalize:
        if len(result_matrices) > 1:
            start_time = util.current_millis()
            result_matrices = dm.quantile_normalize_scores(result_matrices,
                                                           score_scalings)
            elapsed = util.current_millis() - start_time
            logging.debug("quantile normalize in %f s.", elapsed / 1000.0)

        in_matrices = [m.values for m in result_matrices]

    else:
        in_matrices = []
        num_clusters = membership.num_clusters()
        mat = result_matrices[0]
        index_map = {name: index for index, name in enumerate(mat.row_names)}
        # we assume matrix 0 is always the gene expression score
        # we also assume that the matrices are already extreme value
        # fixed
        rsm = []
        for cluster in range(1, num_clusters + 1):
            row_members = sorted(membership.rows_for_cluster(cluster))
            rsm.extend([mat.values[index_map[row], cluster - 1] for row in row_members])
        scale = util.mad(rsm)
        if scale == 0:  # avoid that we are dividing by 0
            scale = util.r_stddev(rsm)
        if scale != 0:
            median_rsm = util.median(rsm)
            rsvalues = (mat.values - median_rsm) / scale
            num_rows, num_cols = rsvalues.shape
            rscores = dm.DataMatrix(num_rows, num_cols,
                                    mat.row_names,
                                    mat.column_names,
                                    values=rsvalues)
            rscores.fix_extreme_values()
        else:
            logging.warn("combiner scaling -> scale == 0 !!!")
            rscores = mat
        in_matrices.append(rscores.values)

        if len(result_matrices) > 1:
            rs_quant = util.quantile(rscores.values, 0.01)
            logging.debug("RS_QUANT = %f", rs_quant)
            for i in range(1, len(result_matrices)):
                values = result_matrices[i].values
                qqq = abs(util.quantile(values, 0.01))
                if qqq == 0:
                    logging.warn('SPARSE SCORES - %d attempt 1: pick from sorted values', i)
                    qqq = sorted(values.ravel())[9]
                if qqq == 0:
                    logging.warn('SPARSE SCORES - %d attempt 2: pick minimum value', i)
                    qqq = abs(values.min())
                if qqq != 0:
                    values = values / qqq * abs(rs_quant)
                else:
                    logging.warn('SPARSE SCORES - %d not normalizing!', i)
                in_matrices.append(values)

    if len(result_matrices) > 0:
        start_time = util.current_millis()
        # assuming same format of all matrices
        combined_score = np.zeros(in_matrices[0].shape)
        for i in xrange(len(in_matrices)):
            combined_score += in_matrices[i] * score_scalings[i]

        elapsed = util.current_millis() - start_time
        logging.debug("combined score in %f s.", elapsed / 1000.0)
        matrix0 = result_matrices[0]  # as reference for names
        return dm.DataMatrix(matrix0.num_rows, matrix0.num_columns,
                             matrix0.row_names, matrix0.column_names,
                             values=combined_score)
    else:
        return None