def post_sample(self): for variable in [ 'p_j_cumulative', 'r_j_cumulative', 'r_avg_diff_matrix', 'pr_avg_diff_matrix', ]: pass for variable in [ 'r_avg_diff_matrix', 'pr_avg_diff_matrix', ]: self.v[variable + '_normalized'] = h.normalize(self.v[variable]) for variable in []: self.v[variable] = 1 - self.v[variable] t2 = h.now() self.print('Finished model generation at', t2) self.print('Warmup total time', self.v['time_delta_total_warmup']) self.print('Post-warmup total time', self.v['time_delta_total_post_warmup']) self.print('Total time', self.v['time_delta_total']) self.previous_save_time = h.now() - 2 * self.s.SAVE_TIME_DELTA_MIN self.save_history()
def get_p_value(c, p, r): global C, t1 print("Processing count %d/%d" % (C, 10556 * 119)) print("Time now", h.now()) print("Time since beginning", h.now() - t1) C += 1 print(c, p, r) return tfpd.NegativeBinomial(total_count=F64(r), probs=F64(p)).cdf(F64(c)).numpy()
def set_final_values_from_trace(self, genes=None): print('Warning: skipping first %d iterations' % self.skip) if genes is None: genes = range(self.data.shape[0]) self.v['p_j_final'] = np.zeros((self.data.shape[0], 1)) self.v['r_j_final'] = np.zeros((self.data.shape[0], 1)) self.v['mu_j_final'] = np.zeros((self.data.shape[0], 1)) self.v['var_j_final'] = np.zeros((self.data.shape[0], 1)) self.v['p_j_cumulative'] = np.zeros((self.data.shape[0], 1)) self.v['r_j_cumulative'] = np.zeros((self.data.shape[0], 1)) self.v['mu_j_cumulative'] = np.zeros((self.data.shape[0], 1)) self.v['var_j_cumulative'] = np.zeros((self.data.shape[0], 1)) self.v['p_values_mean'] = np.zeros(self.data.shape) self.v['p_values_mode'] = np.zeros(self.data.shape) self.v['p_values_var'] = np.zeros(self.data.shape) for _iteration, j in enumerate(genes): if j != 0: h.delete_print_lines(1) print('%s: Processing trace directory %s gene %d' % (h.now(), self.trace_data_folder, j)) p_j_array = self.get_p_j_trace(j) p_j_array.shape = (p_j_array.size, 1) r_j_array = self.get_r_j_trace(j) r_j_array.shape = (r_j_array.size, 1) mu_j_array = h.get_mu(p_j_array, r_j_array) var_j_array = h.get_var(p_j_array, r_j_array) self.v['p_j_final'][j, 0] = p_j_array.mean() self.v['r_j_final'][j, 0] = r_j_array.mean() self.v['mu_j_final'][j, 0] = mu_j_array.mean() self.v['var_j_final'][j, 0] = var_j_array.mean() self.v['p_j_cumulative'][j, 0] = p_j_array.sum() self.v['r_j_cumulative'][j, 0] = r_j_array.sum() self.v['mu_j_cumulative'][j, 0] = mu_j_array.sum() self.v['var_j_cumulative'][j, 0] = var_j_array.sum() P = F64(np.tile(p_j_array, self.data.shape[1])) R = F64(np.tile(r_j_array, self.data.shape[1])) nb = tfpd.NegativeBinomial(total_count=R, probs=P) C = F64( np.tile(self.data[j], (self.v['iteration'] + 1 - self.skip, 1))) cdf_ = nb.cdf(C).numpy() self.v['p_values_mean'][j] = cdf_.mean(axis=0) self.v['p_values_var'][j] = cdf_.var(axis=0) for i in range(self.data.shape[1]): self.v['p_values_mode'][j][i] = h.mode_of_continuous( cdf_[:, i], 10) print('Saving...') with open(os.path.join(self.trace_data_folder, 'variables.pickle'), 'wb') as f_variables: pickle.dump(self.v, f_variables) print('Finished!')
def get_p_value_matrix(c__, p__, r__): assert c__.shape == p__.shape == r__.shape result = np.zeros_like(c__) t1 = h.now() print("Started at", t1) for j in range(c__.shape[0]): print(j) t_j = h.now() print(t_j) print("Total time since beginning", t_j - t1) for i in range(c__.shape[1]): result[j, i] = h.get_p_value(c__[j, i], p__[j, i], r__[j, i]) t2 = h.now() print("Finished at", t2) print("Total time it took", t2 - t1) return result
def run(self): t1 = h.now() self.print('Start train run', h.format_time(t1)) self.print("Iterative posterior updating...") self.print("=========================") self.sample(self.s.ITERATIONS + self.s.WARMUP - self.v['iteration']) self.post_sample()
def save_history(self): t1 = h.now() if not self.should_save_history(): if (t1 - self.previous_save_time) > self.s.SAVE_TIME_DELTA_MIN: self.print( "Started saving results to disk, please don't stop the program now...", t1) self.print(" * Backing up existing files...") if os.path.isfile( self.v_fname) and os.stat(self.v_fname).st_size != 0: shutil.copyfile(self.v_fname, self.v_fname + '~') with open(self.v_fname, 'wb') as f_variables: self.print(' * Saving (only) current iteration data...') self.v['numpy_random_state'] = self.prng.random.get_state() pickle.dump(self.v, f_variables) self.previous_save_time = h.now() self.print( 'Saving finished at %s. It took' % self.previous_save_time, self.previous_save_time - t1) return if self.should_save_history() and ( (t1 - self.previous_save_time) > self.s.SAVE_TIME_DELTA_MIN): self.print( "Started saving results to disk, please don't stop the program now...", t1) self.print(" * Backing up existing files...") for _fname in (self.v_fname, ): if os.path.isfile(_fname) and os.stat(_fname).st_size != 0: shutil.copyfile(_fname, _fname + '~') with open(self.v_fname, 'wb') as f_variables: self.print(' * Saving current iteration data...') self.v['numpy_random_state'] = self.prng.get_state() pickle.dump(self.v, f_variables) history_j = {} lengths = set() for key in self.history_j: if self.history_j[key]: history_j[key] = np.stack(self.history_j[key]) lengths.add(len(history_j[key])) if (len(lengths) == 1) and (next(iter(lengths)) > 0): self.print( ' * Saving sampling history data (%d iterations since last save)...' % (next(iter(lengths)))) for i in range(self.first_gene, self.first_gene + self.J): for key in self.history_j: self.append_history( i, key, history_j[key][:, i - self.first_gene]) elif len(lengths) == 0: pass else: print(lengths) raise Exception("This shouldn't happen") for key in history_j: self.history_j[key] = [] self.previous_save_time = h.now() self.print('Saving finished at %s. It took' % self.previous_save_time, self.previous_save_time - t1, end='.') if len(lengths) != 0: _length = next(iter(lengths)) self.print(' %s per iteration.' % ((self.previous_save_time - t1) / _length)) else: self.print()
def sample(self, iterations): s = self.s a_ji_matrix = self.s.A0 + self.data _t_iter_begin = h.now() self.previous_save_time = _t_iter_begin _a_j_array = self.v['a_j_array'] _e_j_array = np.zeros((self.J, 1)) _e_ji_matrix = np.zeros(self.data.shape) _first_iteration = self.v['iteration'] for _iteration in range(self.v['iteration'], self.v['iteration'] + iterations): self.print('=========================') self.print('Starting iteration %d/%d(%d + %d)' % (self.v['iteration'] + 1, self.s.WARMUP + self.s.ITERATIONS, self.s.WARMUP, self.s.ITERATIONS)) self.print('=========================') p_j = self.v['p_j'] r_j = self.v['r_j'] _b_j_array = (r_j * self.N) + s.B0 r_j.shape = (self.J, 1) N_vector = np.repeat(self.N, self.N) N_vector.shape = (1, self.N) _b_ji_matrix = (r_j * N_vector) + s.B0 _f_j_array = np.negative(p_j) + 1 np.log(_f_j_array, _f_j_array) _f_j_array *= self.N np.negative(_f_j_array, _f_j_array) _f_j_array += s.F0 np.reciprocal(_f_j_array, _f_j_array) _f_j_column = _f_j_array.copy() _f_j_column.shape = (self.J, 1) _f_ji_matrix = np.tile(_f_j_column, self.N) _e_j_array, _e_ji_matrix = hn.sync_prng_state(self.prng)( hn.crt_p_j_r_j)(self.data, self.J, self.N, r_j, self.s.E0, _e_j_array, _e_ji_matrix) self.v['p_j'] = self.prng.beta( _a_j_array, _b_j_array, ) self.v['r_j'] = self.prng.gamma( _e_j_array, _f_j_array, ) if self.should_save_history(): self.history_j['p'].append(self.v['p_j']) self.history_j['r'].append(self.v['r_j']) _p_j = self.v['p_j'].copy() _p_j.shape = (self.J, 1) _r_j = self.v['r_j'].copy() _r_j.shape = (self.J, 1) if _iteration >= self.s.WARMUP: self.v['p_j_cumulative'] += _p_j self.v['r_j_cumulative'] += _r_j if _iteration >= self.s.SCORE_WARMUP: _b_j_array.shape = (self.J, 1) _e_j_array.shape = (self.J, 1) _f_j_array.shape = (self.J, 1) p_matrix = self.prng.beta( a_ji_matrix, _b_ji_matrix, ) r_matrix = self.prng.gamma( _e_ji_matrix, _f_ji_matrix, ) r_diff_matrix = np.abs(r_matrix - _r_j) self.v['r_avg_diff_matrix'] += r_diff_matrix self.v['pr_avg_diff_matrix'] += ( (p_matrix - _p_j)**2 + (r_matrix - _r_j)**2) / (_p_j**2 + _r_j**2) if (_iteration - self.s.SCORE_WARMUP) % self.s.SCORE_SAVE_INTERVAL == 0: for score in ['r_avg_diff_matrix', 'pr_avg_diff_matrix']: pass _t_iter_end = h.now() _t_iter_delta = _t_iter_end - _t_iter_begin _t_iter_begin = _t_iter_end self.v['time_delta_total'] += _t_iter_delta if _iteration < self.s.WARMUP: _phase = 'warmup' self.v['time_delta_total_warmup'] += _t_iter_delta self.v['time_delta_iteration_warmup'] = _t_iter_delta else: _phase = 'post-warmup' self.v['time_delta_total_post_warmup'] += _t_iter_delta self.v['time_delta_iteration_post_warmup'] = _t_iter_delta self.v['iteration'] = _iteration + 1 if _iteration == _first_iteration: self.delete_print_lines(3) else: self.delete_print_lines(8) self.print( 'Iteration %d/%d (%s phase) took' % (_iteration + 1, (self.s.ITERATIONS + self.s.WARMUP), _phase), _t_iter_delta) self.print('Total time till now', self.v['time_delta_total']) if (_iteration + 1) < self.s.WARMUP: total_time_estimated_warmup = self.v[ 'time_delta_total_warmup'] / (_iteration + 1) * self.s.WARMUP self.print( 'Estimated total warmup time remaining', total_time_estimated_warmup - self.v['time_delta_total_warmup']) self.print('Estimated total warmup time', total_time_estimated_warmup) self.print( 'Warning: still in warmup phase, post warmup time-length estimation is not available.' ) else: total_time_estimated_warmup = self.v['time_delta_total_warmup'] self.print('Warmup took', total_time_estimated_warmup) if (_iteration + 1) > self.s.WARMUP: total_time_estimated_post_warmup = self.v[ 'time_delta_total_post_warmup'] / ( (_iteration + 1) - self.s.WARMUP) * self.s.ITERATIONS total_time_estimated = total_time_estimated_warmup + total_time_estimated_post_warmup if self.v['iteration'] < (self.s.ITERATIONS + self.s.WARMUP): self.print('Post-warmup time till now', self.v['time_delta_total_post_warmup']) self.print( 'Remaining time %s/%s' % (total_time_estimated - self.v['time_delta_total'], total_time_estimated)) self.v.update({ 'j': 0, 'e_j_incomplete': None, 'e_ji_matrix_incomplete': None, }) self.v['numpy_random_state'] = self.prng.get_state() self.save_history() self.delete_print_lines(3) return self.v, self.history_j
raise Exception("This shouldn't happen.") def get_z_score_matrix(self): p__ = np.tile(self.v['p_j_final'], self.data.shape[1]) r__ = np.tile(self.v['r_j_final'], self.data.shape[1]) mu__ = h.get_mu(p__, r__) l_ji__ = np.log2((self.data + 1) / (mu__ + 1)) l_j_ = l_ji__.mean(axis=1) l_j_.shape = (self.data.shape[0], 1) l_j_std_ = l_ji__.std(axis=1) l_j_std_.shape = (self.data.shape[0], 1) return (l_ji__ - l_j_) / l_j_std_ t1 = h.now() C = 0 def get_p_value(c, p, r): global C, t1 print("Processing count %d/%d" % (C, 10556 * 119)) print("Time now", h.now()) print("Time since beginning", h.now() - t1) C += 1 print(c, p, r) return tfpd.NegativeBinomial(total_count=F64(r), probs=F64(p)).cdf(F64(c)).numpy()