コード例 #1
0
    def _perform_mcmc(self, sustainData, seq_init, f_init, n_iterations,
                      seq_sigma, f_sigma):
        # Take MCMC samples of the uncertainty in the SuStaIn model parameters

        N = self.stage_zscore.shape[1]
        N_S = seq_init.shape[0]

        if isinstance(f_sigma, float):  # FIXME: hack to enable multiplication
            f_sigma = np.array([f_sigma])

        samples_sequence = np.zeros((N_S, N, n_iterations))
        samples_f = np.zeros((N_S, n_iterations))
        samples_likelihood = np.zeros((n_iterations, 1))
        samples_sequence[:, :,
                         0] = seq_init  # don't need to copy as we don't write to 0 index
        samples_f[:, 0] = f_init

        # Reduce frequency of tqdm update to 0.1% of total for larger iteration numbers
        tqdm_update_iters = int(n_iterations /
                                1000) if n_iterations > 100000 else None

        for i in tqdm(range(n_iterations),
                      "MCMC Iteration",
                      n_iterations,
                      miniters=tqdm_update_iters):
            if i > 0:
                seq_order = self.global_rng.permutation(
                    N_S
                )  # this function returns different random numbers to Matlab
                for s in seq_order:
                    move_event_from = int(np.ceil(
                        N * self.global_rng.random())) - 1
                    current_sequence = samples_sequence[s, :, i - 1]

                    current_location = np.array([0] * N)
                    current_location[current_sequence.astype(int)] = np.arange(
                        N)

                    selected_event = int(current_sequence[move_event_from])
                    this_stage_zscore = self.stage_zscore[0, selected_event]
                    selected_biomarker = self.stage_biomarker_index[
                        0, selected_event]
                    possible_zscores_biomarker = self.stage_zscore[
                        self.stage_biomarker_index == selected_biomarker]

                    # slightly different conditional check to matlab version to protect python from calling min,max on an empty array
                    min_filter = possible_zscores_biomarker < this_stage_zscore
                    max_filter = possible_zscores_biomarker > this_stage_zscore
                    events = np.array(range(N))
                    if np.any(min_filter):
                        min_zscore_bound = max(
                            possible_zscores_biomarker[min_filter])
                        min_zscore_bound_event = events[(
                            (self.stage_zscore[0] == min_zscore_bound
                             ).astype(int) +
                            (self.stage_biomarker_index[0] ==
                             selected_biomarker).astype(int)) == 2]
                        move_event_to_lower_bound = current_location[
                            min_zscore_bound_event] + 1
                    else:
                        move_event_to_lower_bound = 0

                    if np.any(max_filter):
                        max_zscore_bound = min(
                            possible_zscores_biomarker[max_filter])
                        max_zscore_bound_event = events[(
                            (self.stage_zscore[0] == max_zscore_bound
                             ).astype(int) +
                            (self.stage_biomarker_index[0] ==
                             selected_biomarker).astype(int)) == 2]
                        move_event_to_upper_bound = current_location[
                            max_zscore_bound_event]
                    else:
                        move_event_to_upper_bound = N

                    # FIXME: hack because python won't produce an array in range (N,N), while matlab will produce an array (N)... urgh
                    if move_event_to_lower_bound == move_event_to_upper_bound:
                        possible_positions = np.array([0])
                    else:
                        possible_positions = np.arange(
                            move_event_to_lower_bound,
                            move_event_to_upper_bound)

                    distance = possible_positions - move_event_from

                    if isinstance(seq_sigma, int):  # FIXME: change to float
                        this_seq_sigma = seq_sigma
                    else:
                        this_seq_sigma = seq_sigma[s, selected_event]

                    # use own normal PDF because stats.norm is slow
                    weight = AbstractSustain.calc_coeff(
                        this_seq_sigma) * AbstractSustain.calc_exp(
                            distance, 0., this_seq_sigma)
                    weight /= np.sum(weight)
                    index = self.global_rng.choice(
                        range(len(possible_positions)),
                        1,
                        replace=True,
                        p=weight
                    )  # FIXME: difficult to check this because random.choice is different to Matlab randsample

                    move_event_to = possible_positions[index]

                    current_sequence = np.delete(current_sequence,
                                                 move_event_from, 0)
                    new_sequence = np.concatenate([
                        current_sequence[np.arange(move_event_to)],
                        [selected_event],
                        current_sequence[np.arange(move_event_to, N - 1)]
                    ])
                    samples_sequence[s, :, i] = new_sequence

                new_f = samples_f[:, i -
                                  1] + f_sigma * self.global_rng.standard_normal(
                                  )
                new_f = (np.fabs(new_f) / np.sum(np.fabs(new_f)))
                samples_f[:, i] = new_f

            S = samples_sequence[:, :, i]
            f = samples_f[:, i]
            likelihood_sample, _, _, _, _ = self._calculate_likelihood(
                sustainData, S, f)
            samples_likelihood[i] = likelihood_sample

            if i > 0:
                ratio = np.exp(samples_likelihood[i] -
                               samples_likelihood[i - 1])
                if ratio < self.global_rng.random():
                    samples_likelihood[i] = samples_likelihood[i - 1]
                    samples_sequence[:, :, i] = samples_sequence[:, :, i - 1]
                    samples_f[:, i] = samples_f[:, i - 1]

        perm_index = np.where(samples_likelihood == max(samples_likelihood))
        perm_index = perm_index[0]
        ml_likelihood = max(samples_likelihood)
        ml_sequence = samples_sequence[:, :, perm_index]
        ml_f = samples_f[:, perm_index]

        return ml_sequence, ml_f, ml_likelihood, samples_sequence, samples_f, samples_likelihood
コード例 #2
0
    def _perform_mcmc(self, sustainData, seq_init, f_init, n_iterations,
                      seq_sigma, f_sigma):
        # Take MCMC samples of the uncertainty in the SuStaIn model parameters

        M = sustainData.getNumSamples()
        N = sustainData.getNumStages()
        N_S = seq_init.shape[0]

        if isinstance(f_sigma, float):  # FIXME: hack to enable multiplication
            f_sigma = np.array([f_sigma])

        samples_sequence = np.zeros((N_S, N, n_iterations))
        samples_f = np.zeros((N_S, n_iterations))
        samples_likelihood = np.zeros((n_iterations, 1))
        samples_sequence[:, :,
                         0] = seq_init  # don't need to copy as we don't write to 0 index
        samples_f[:, 0] = f_init

        # Reduce frequency of tqdm update to 0.1% of total for larger iteration numbers
        tqdm_update_iters = int(n_iterations /
                                1000) if n_iterations > 100000 else None

        for i in tqdm(range(n_iterations),
                      "MCMC Iteration",
                      n_iterations,
                      miniters=tqdm_update_iters):
            if i > 0:
                seq_order = self.global_rng.permutation(N_S)
                # this function returns different random numbers to Matlab

                # Abstract out seq_order loop
                move_event_from = np.ceil(
                    N * self.global_rng.random(len(seq_order))).astype(int) - 1
                current_sequence = samples_sequence[seq_order, :, i - 1]

                selected_event = current_sequence[
                    np.arange(current_sequence.shape[0]), move_event_from]

                possible_positions = np.arange(N) + np.zeros(
                    (len(seq_order), 1))

                distance = np.arange(N) + np.zeros(
                    (len(seq_order), 1)) - move_event_from[:, np.newaxis]

                weight = AbstractSustain.calc_coeff(
                    seq_sigma) * AbstractSustain.calc_exp(
                        distance, 0., seq_sigma)
                weight = np.divide(weight, weight.sum(1)[:, None])

                index = [
                    self.global_rng.choice(np.arange(len(row)),
                                           1,
                                           replace=True,
                                           p=row)[0] for row in weight
                ]

                move_event_to = np.arange(N)[index]

                r = current_sequence.shape[0]
                # Don't need to copy, but doing it for clarity
                new_seq = current_sequence.copy()
                new_seq[np.arange(r), move_event_from] = new_seq[np.arange(r),
                                                                 move_event_to]
                new_seq[np.arange(r), move_event_to] = selected_event

                samples_sequence[seq_order, :, i] = new_seq

                new_f = samples_f[:, i -
                                  1] + f_sigma * self.global_rng.standard_normal(
                                  )
                # TEMP: MATLAB comparison
                #new_f                       = samples_f[:, i - 1] + f_sigma * stats.norm.ppf(np.random.rand(1,N_S))

                new_f = (np.fabs(new_f) / np.sum(np.fabs(new_f)))
                samples_f[:, i] = new_f
            S = samples_sequence[:, :, i]

            #f                               = samples_f[:, i]
            #likelihood_sample, _, _, _, _   = self._calculate_likelihood(sustainData, S, f)

            p_perm_k = np.zeros((M, N + 1, N_S))
            for s in range(N_S):
                p_perm_k[:, :, s] = self._calculate_likelihood_stage(
                    sustainData, S[s, :])

            #NOTE: added extra axes to get np.tile to work the same as Matlab's repmat in this 3D tiling
            f_val_mat = np.tile(samples_f[:, i, np.newaxis, np.newaxis],
                                (1, N + 1, M))
            f_val_mat = np.transpose(f_val_mat, (2, 1, 0))

            total_prob_stage = np.sum(p_perm_k * f_val_mat, 2)
            total_prob_subj = np.sum(total_prob_stage, 1)

            likelihood_sample = np.sum(np.log(total_prob_subj + 1e-250))

            samples_likelihood[i] = likelihood_sample

            if i > 0:
                ratio = np.exp(samples_likelihood[i] -
                               samples_likelihood[i - 1])
                if ratio < self.global_rng.random():
                    samples_likelihood[i] = samples_likelihood[i - 1]
                    samples_sequence[:, :, i] = samples_sequence[:, :, i - 1]
                    samples_f[:, i] = samples_f[:, i - 1]

        perm_index = np.where(samples_likelihood == np.max(samples_likelihood))
        perm_index = perm_index[0][0]
        ml_likelihood = np.max(samples_likelihood)
        ml_sequence = samples_sequence[:, :, perm_index]
        ml_f = samples_f[:, perm_index]

        return ml_sequence, ml_f, ml_likelihood, samples_sequence, samples_f, samples_likelihood
コード例 #3
0
ファイル: MixtureSustain.py プロジェクト: isaac-6/pySuStaIn
    def _perform_mcmc(self, sustainData, seq_init, f_init, n_iterations, seq_sigma, f_sigma):
        # Take MCMC samples of the uncertainty in the SuStaIn model parameters

        M                                   = sustainData.getNumSamples()
        N                                   = sustainData.getNumStages()
        N_S                                 = seq_init.shape[0]

        if isinstance(f_sigma, float):  # FIXME: hack to enable multiplication
            f_sigma                         = np.array([f_sigma])

        samples_sequence                    = np.zeros((N_S, N, n_iterations))
        samples_f                           = np.zeros((N_S, n_iterations))
        samples_likelihood                  = np.zeros((n_iterations, 1))
        samples_sequence[:, :, 0]           = seq_init  # don't need to copy as we don't write to 0 index
        samples_f[:, 0]                     = f_init

        for i in range(n_iterations):
            if i % (n_iterations / 10) == 0:
                print('Iteration', i, 'of', n_iterations, ',', int(float(i) / float(n_iterations) * 100.), '% complete')
            if i > 0:
                seq_order                   = MixtureSustain.randperm_local(N_S) #np.random.permutation(N_S)  # this function returns different random numbers to Matlab
                for s in seq_order:
                    move_event_from         = int(np.ceil(N * np.random.rand())) - 1

                    current_sequence        = samples_sequence[s, :, i - 1]

                    current_location        = np.array([0] * N)
                    current_location[current_sequence.astype(int)] = np.arange(N)

                    #select an event in the sequence to move
                    selected_event          = int(current_sequence[move_event_from])

                    possible_positions      = np.arange(N)

                    distance                = possible_positions - move_event_from

                    if isinstance(seq_sigma, int):  # FIXME: change to float       ##if ((seq_sigma.shape[0]==1) + (seq_sigma.shape[1]==1)) == 2:
                        this_seq_sigma      = seq_sigma
                    else:
                        this_seq_sigma      = seq_sigma[s, selected_event]

                    # use own normal PDF because stats.norm is slow
                    weight                  = AbstractSustain.calc_coeff(this_seq_sigma) * AbstractSustain.calc_exp(distance, 0., this_seq_sigma)
                    weight                  /= np.sum(weight)

                    #TEMP: MATLAB comparison
                    #index                   = 0
                    index                   = np.random.choice(range(len(possible_positions)), 1, replace=True, p=weight)  # FIXME: difficult to check this because random.choice is different to Matlab randsample

                    move_event_to           = possible_positions[index]

                    current_sequence        = np.delete(current_sequence, move_event_from, 0)
                    new_sequence            = np.concatenate([current_sequence[np.arange(move_event_to)], [selected_event], current_sequence[np.arange(move_event_to, N - 1)]])
                    samples_sequence[s, :, i] = new_sequence

                new_f                       = samples_f[:, i - 1] + f_sigma * np.random.randn()
                # TEMP: MATLAB comparison
                #new_f                       = samples_f[:, i - 1] + f_sigma * stats.norm.ppf(np.random.rand(1,N_S))

                new_f                       = (np.fabs(new_f) / np.sum(np.fabs(new_f)))
                samples_f[:, i]             = new_f
            S                               = samples_sequence[:, :, i]

            #f                               = samples_f[:, i]
            #likelihood_sample, _, _, _, _   = self._calculate_likelihood(sustainData, S, f)

            p_perm_k                        = np.zeros((M, N+1, N_S))
            for s in range(N_S):
                p_perm_k[:,:,s]             = self._calculate_likelihood_stage(sustainData, S[s,:])


            #NOTE: added extra axes to get np.tile to work the same as Matlab's repmat in this 3D tiling
            f_val_mat                       = np.tile(samples_f[:,i, np.newaxis, np.newaxis], (1, N+1, M))
            f_val_mat                       = np.transpose(f_val_mat, (2, 1, 0))

            total_prob_stage                = np.sum(p_perm_k * f_val_mat, 2)
            total_prob_subj                 = np.sum(total_prob_stage, 1)

            likelihood_sample               = sum(np.log(total_prob_subj + 1e-250))

            samples_likelihood[i]           = likelihood_sample

            if i > 0:
                ratio                           = np.exp(samples_likelihood[i] - samples_likelihood[i - 1])
                if ratio < np.random.rand():
                    samples_likelihood[i]       = samples_likelihood[i - 1]
                    samples_sequence[:, :, i]   = samples_sequence[:, :, i - 1]
                    samples_f[:, i]             = samples_f[:, i - 1]

        perm_index                          = np.where(samples_likelihood == max(samples_likelihood))
        perm_index                          = perm_index[0][0]
        ml_likelihood                       = max(samples_likelihood)
        ml_sequence                         = samples_sequence[:, :, perm_index]
        ml_f                                = samples_f[:, perm_index]

        return ml_sequence, ml_f, ml_likelihood, samples_sequence, samples_f, samples_likelihood