Example #1
0
    def post_sample(self):
        for variable in [
                'p_j_cumulative',
                'r_j_cumulative',
                'r_avg_diff_matrix',
                'pr_avg_diff_matrix',
        ]:
            pass

        for variable in [
                'r_avg_diff_matrix',
                'pr_avg_diff_matrix',
        ]:
            self.v[variable + '_normalized'] = h.normalize(self.v[variable])

        for variable in []:
            self.v[variable] = 1 - self.v[variable]

        t2 = h.now()
        self.print('Finished model generation at', t2)
        self.print('Warmup total time', self.v['time_delta_total_warmup'])
        self.print('Post-warmup total time',
                   self.v['time_delta_total_post_warmup'])
        self.print('Total time', self.v['time_delta_total'])

        self.previous_save_time = h.now() - 2 * self.s.SAVE_TIME_DELTA_MIN
        self.save_history()
Example #2
0
def get_p_value(c, p, r):
    global C, t1
    print("Processing count %d/%d" % (C, 10556 * 119))
    print("Time now", h.now())
    print("Time since beginning", h.now() - t1)
    C += 1

    print(c, p, r)

    return tfpd.NegativeBinomial(total_count=F64(r),
                                 probs=F64(p)).cdf(F64(c)).numpy()
Example #3
0
    def set_final_values_from_trace(self, genes=None):
        print('Warning: skipping first %d iterations' % self.skip)
        if genes is None:
            genes = range(self.data.shape[0])

            self.v['p_j_final'] = np.zeros((self.data.shape[0], 1))
            self.v['r_j_final'] = np.zeros((self.data.shape[0], 1))
            self.v['mu_j_final'] = np.zeros((self.data.shape[0], 1))
            self.v['var_j_final'] = np.zeros((self.data.shape[0], 1))

            self.v['p_j_cumulative'] = np.zeros((self.data.shape[0], 1))
            self.v['r_j_cumulative'] = np.zeros((self.data.shape[0], 1))
            self.v['mu_j_cumulative'] = np.zeros((self.data.shape[0], 1))
            self.v['var_j_cumulative'] = np.zeros((self.data.shape[0], 1))

            self.v['p_values_mean'] = np.zeros(self.data.shape)
            self.v['p_values_mode'] = np.zeros(self.data.shape)
            self.v['p_values_var'] = np.zeros(self.data.shape)

        for _iteration, j in enumerate(genes):
            if j != 0:
                h.delete_print_lines(1)
            print('%s: Processing trace directory %s gene %d' %
                  (h.now(), self.trace_data_folder, j))
            p_j_array = self.get_p_j_trace(j)
            p_j_array.shape = (p_j_array.size, 1)
            r_j_array = self.get_r_j_trace(j)
            r_j_array.shape = (r_j_array.size, 1)
            mu_j_array = h.get_mu(p_j_array, r_j_array)
            var_j_array = h.get_var(p_j_array, r_j_array)

            self.v['p_j_final'][j, 0] = p_j_array.mean()
            self.v['r_j_final'][j, 0] = r_j_array.mean()
            self.v['mu_j_final'][j, 0] = mu_j_array.mean()
            self.v['var_j_final'][j, 0] = var_j_array.mean()

            self.v['p_j_cumulative'][j, 0] = p_j_array.sum()
            self.v['r_j_cumulative'][j, 0] = r_j_array.sum()
            self.v['mu_j_cumulative'][j, 0] = mu_j_array.sum()
            self.v['var_j_cumulative'][j, 0] = var_j_array.sum()

            P = F64(np.tile(p_j_array, self.data.shape[1]))
            R = F64(np.tile(r_j_array, self.data.shape[1]))
            nb = tfpd.NegativeBinomial(total_count=R, probs=P)

            C = F64(
                np.tile(self.data[j],
                        (self.v['iteration'] + 1 - self.skip, 1)))
            cdf_ = nb.cdf(C).numpy()
            self.v['p_values_mean'][j] = cdf_.mean(axis=0)
            self.v['p_values_var'][j] = cdf_.var(axis=0)
            for i in range(self.data.shape[1]):
                self.v['p_values_mode'][j][i] = h.mode_of_continuous(
                    cdf_[:, i], 10)

        print('Saving...')
        with open(os.path.join(self.trace_data_folder, 'variables.pickle'),
                  'wb') as f_variables:
            pickle.dump(self.v, f_variables)
        print('Finished!')
Example #4
0
def get_p_value_matrix(c__, p__, r__):
    assert c__.shape == p__.shape == r__.shape
    result = np.zeros_like(c__)
    t1 = h.now()
    print("Started at", t1)
    for j in range(c__.shape[0]):
        print(j)
        t_j = h.now()
        print(t_j)
        print("Total time since beginning", t_j - t1)
        for i in range(c__.shape[1]):
            result[j, i] = h.get_p_value(c__[j, i], p__[j, i], r__[j, i])
    t2 = h.now()
    print("Finished at", t2)
    print("Total time it took", t2 - t1)
    return result
Example #5
0
    def run(self):
        t1 = h.now()
        self.print('Start train run', h.format_time(t1))
        self.print("Iterative posterior updating...")
        self.print("=========================")

        self.sample(self.s.ITERATIONS + self.s.WARMUP - self.v['iteration'])
        self.post_sample()
Example #6
0
    def save_history(self):
        t1 = h.now()
        if not self.should_save_history():

            if (t1 - self.previous_save_time) > self.s.SAVE_TIME_DELTA_MIN:
                self.print(
                    "Started saving results to disk, please don't stop the program now...",
                    t1)
                self.print(" * Backing up existing files...")
                if os.path.isfile(
                        self.v_fname) and os.stat(self.v_fname).st_size != 0:
                    shutil.copyfile(self.v_fname, self.v_fname + '~')

                with open(self.v_fname, 'wb') as f_variables:
                    self.print(' * Saving (only) current iteration data...')
                    self.v['numpy_random_state'] = self.prng.random.get_state()
                    pickle.dump(self.v, f_variables)
                self.previous_save_time = h.now()

                self.print(
                    'Saving finished at %s. It took' % self.previous_save_time,
                    self.previous_save_time - t1)
                return

        if self.should_save_history() and (
            (t1 - self.previous_save_time) > self.s.SAVE_TIME_DELTA_MIN):
            self.print(
                "Started saving results to disk, please don't stop the program now...",
                t1)
            self.print(" * Backing up existing files...")
            for _fname in (self.v_fname, ):
                if os.path.isfile(_fname) and os.stat(_fname).st_size != 0:
                    shutil.copyfile(_fname, _fname + '~')

            with open(self.v_fname, 'wb') as f_variables:
                self.print(' * Saving current iteration data...')
                self.v['numpy_random_state'] = self.prng.get_state()
                pickle.dump(self.v, f_variables)

            history_j = {}
            lengths = set()
            for key in self.history_j:
                if self.history_j[key]:

                    history_j[key] = np.stack(self.history_j[key])
                    lengths.add(len(history_j[key]))

            if (len(lengths) == 1) and (next(iter(lengths)) > 0):
                self.print(
                    ' * Saving sampling history data (%d iterations since last save)...'
                    % (next(iter(lengths))))
                for i in range(self.first_gene, self.first_gene + self.J):
                    for key in self.history_j:

                        self.append_history(
                            i, key, history_j[key][:, i - self.first_gene])
            elif len(lengths) == 0:

                pass
            else:
                print(lengths)
                raise Exception("This shouldn't happen")

            for key in history_j:
                self.history_j[key] = []
            self.previous_save_time = h.now()
            self.print('Saving finished at %s. It took' %
                       self.previous_save_time,
                       self.previous_save_time - t1,
                       end='.')
            if len(lengths) != 0:
                _length = next(iter(lengths))
                self.print(' %s per iteration.' %
                           ((self.previous_save_time - t1) / _length))
            else:
                self.print()
Example #7
0
    def sample(self, iterations):
        s = self.s

        a_ji_matrix = self.s.A0 + self.data

        _t_iter_begin = h.now()

        self.previous_save_time = _t_iter_begin

        _a_j_array = self.v['a_j_array']

        _e_j_array = np.zeros((self.J, 1))
        _e_ji_matrix = np.zeros(self.data.shape)
        _first_iteration = self.v['iteration']
        for _iteration in range(self.v['iteration'],
                                self.v['iteration'] + iterations):
            self.print('=========================')
            self.print('Starting iteration %d/%d(%d + %d)' %
                       (self.v['iteration'] + 1, self.s.WARMUP +
                        self.s.ITERATIONS, self.s.WARMUP, self.s.ITERATIONS))
            self.print('=========================')

            p_j = self.v['p_j']
            r_j = self.v['r_j']

            _b_j_array = (r_j * self.N) + s.B0

            r_j.shape = (self.J, 1)
            N_vector = np.repeat(self.N, self.N)
            N_vector.shape = (1, self.N)
            _b_ji_matrix = (r_j * N_vector) + s.B0

            _f_j_array = np.negative(p_j) + 1
            np.log(_f_j_array, _f_j_array)
            _f_j_array *= self.N
            np.negative(_f_j_array, _f_j_array)
            _f_j_array += s.F0
            np.reciprocal(_f_j_array, _f_j_array)

            _f_j_column = _f_j_array.copy()
            _f_j_column.shape = (self.J, 1)
            _f_ji_matrix = np.tile(_f_j_column, self.N)

            _e_j_array, _e_ji_matrix = hn.sync_prng_state(self.prng)(
                hn.crt_p_j_r_j)(self.data, self.J, self.N, r_j, self.s.E0,
                                _e_j_array, _e_ji_matrix)

            self.v['p_j'] = self.prng.beta(
                _a_j_array,
                _b_j_array,
            )
            self.v['r_j'] = self.prng.gamma(
                _e_j_array,
                _f_j_array,
            )

            if self.should_save_history():

                self.history_j['p'].append(self.v['p_j'])
                self.history_j['r'].append(self.v['r_j'])

            _p_j = self.v['p_j'].copy()
            _p_j.shape = (self.J, 1)

            _r_j = self.v['r_j'].copy()
            _r_j.shape = (self.J, 1)

            if _iteration >= self.s.WARMUP:

                self.v['p_j_cumulative'] += _p_j
                self.v['r_j_cumulative'] += _r_j

            if _iteration >= self.s.SCORE_WARMUP:
                _b_j_array.shape = (self.J, 1)
                _e_j_array.shape = (self.J, 1)
                _f_j_array.shape = (self.J, 1)

                p_matrix = self.prng.beta(
                    a_ji_matrix,
                    _b_ji_matrix,
                )

                r_matrix = self.prng.gamma(
                    _e_ji_matrix,
                    _f_ji_matrix,
                )

                r_diff_matrix = np.abs(r_matrix - _r_j)
                self.v['r_avg_diff_matrix'] += r_diff_matrix

                self.v['pr_avg_diff_matrix'] += (
                    (p_matrix - _p_j)**2 +
                    (r_matrix - _r_j)**2) / (_p_j**2 + _r_j**2)

                if (_iteration -
                        self.s.SCORE_WARMUP) % self.s.SCORE_SAVE_INTERVAL == 0:
                    for score in ['r_avg_diff_matrix', 'pr_avg_diff_matrix']:
                        pass

            _t_iter_end = h.now()
            _t_iter_delta = _t_iter_end - _t_iter_begin
            _t_iter_begin = _t_iter_end

            self.v['time_delta_total'] += _t_iter_delta

            if _iteration < self.s.WARMUP:
                _phase = 'warmup'
                self.v['time_delta_total_warmup'] += _t_iter_delta
                self.v['time_delta_iteration_warmup'] = _t_iter_delta
            else:
                _phase = 'post-warmup'
                self.v['time_delta_total_post_warmup'] += _t_iter_delta
                self.v['time_delta_iteration_post_warmup'] = _t_iter_delta

            self.v['iteration'] = _iteration + 1

            if _iteration == _first_iteration:
                self.delete_print_lines(3)
            else:
                self.delete_print_lines(8)
            self.print(
                'Iteration %d/%d (%s phase) took' %
                (_iteration + 1, (self.s.ITERATIONS + self.s.WARMUP), _phase),
                _t_iter_delta)
            self.print('Total time till now', self.v['time_delta_total'])
            if (_iteration + 1) < self.s.WARMUP:
                total_time_estimated_warmup = self.v[
                    'time_delta_total_warmup'] / (_iteration +
                                                  1) * self.s.WARMUP
                self.print(
                    'Estimated total warmup time remaining',
                    total_time_estimated_warmup -
                    self.v['time_delta_total_warmup'])
                self.print('Estimated total warmup time',
                           total_time_estimated_warmup)
                self.print(
                    'Warning: still in warmup phase, post warmup time-length estimation is not available.'
                )
            else:

                total_time_estimated_warmup = self.v['time_delta_total_warmup']
                self.print('Warmup took', total_time_estimated_warmup)
                if (_iteration + 1) > self.s.WARMUP:
                    total_time_estimated_post_warmup = self.v[
                        'time_delta_total_post_warmup'] / (
                            (_iteration + 1) -
                            self.s.WARMUP) * self.s.ITERATIONS
                    total_time_estimated = total_time_estimated_warmup + total_time_estimated_post_warmup
                    if self.v['iteration'] < (self.s.ITERATIONS +
                                              self.s.WARMUP):
                        self.print('Post-warmup time till now',
                                   self.v['time_delta_total_post_warmup'])
                        self.print(
                            'Remaining time %s/%s' %
                            (total_time_estimated - self.v['time_delta_total'],
                             total_time_estimated))

            self.v.update({
                'j': 0,
                'e_j_incomplete': None,
                'e_ji_matrix_incomplete': None,
            })
            self.v['numpy_random_state'] = self.prng.get_state()
            self.save_history()

        self.delete_print_lines(3)
        return self.v, self.history_j
Example #8
0
        raise Exception("This shouldn't happen.")

    def get_z_score_matrix(self):
        p__ = np.tile(self.v['p_j_final'], self.data.shape[1])
        r__ = np.tile(self.v['r_j_final'], self.data.shape[1])
        mu__ = h.get_mu(p__, r__)
        l_ji__ = np.log2((self.data + 1) / (mu__ + 1))
        l_j_ = l_ji__.mean(axis=1)
        l_j_.shape = (self.data.shape[0], 1)
        l_j_std_ = l_ji__.std(axis=1)
        l_j_std_.shape = (self.data.shape[0], 1)
        return (l_ji__ - l_j_) / l_j_std_


t1 = h.now()
C = 0


def get_p_value(c, p, r):
    global C, t1
    print("Processing count %d/%d" % (C, 10556 * 119))
    print("Time now", h.now())
    print("Time since beginning", h.now() - t1)
    C += 1

    print(c, p, r)

    return tfpd.NegativeBinomial(total_count=F64(r),
                                 probs=F64(p)).cdf(F64(c)).numpy()