def generate_conditional_ps(self): # p(TiN|Somatic) and p(TiN|Germline) t_het_direction = np.ones([self.number_of_sites, len(self.af)]) t_het_direction[:, 0:np.int(np.round(np.true_divide(len(self.af), 2)))] = -1 self.afexp = np.repeat(np.expand_dims(self.af, 1), self.number_of_sites, axis=1).T t_af_w = beta._cdf(self.afexp, np.expand_dims(self.t_alt_count + 1, 1), np.expand_dims(self.t_ref_count + 1, 1)) - beta._cdf(self.afexp - 0.005, np.expand_dims(self.t_alt_count + 1, 1), np.expand_dims(self.t_ref_count + 1, 1)) f_t_af = self.skew - np.abs(self.skew - self.afexp) t_af = np.multiply(self.afexp, np.expand_dims(self.n_depth, 1)) psi_t_af = self.skew - f_t_af psi_t_af = np.multiply(psi_t_af, t_het_direction) for TiN_idx, TiN in enumerate(self.TiN_range): n_ac_given_tin = np.multiply(t_af, np.expand_dims(self.CN_ratio[:, TiN_idx], 1)) exp_f = self.skew + np.multiply(psi_t_af, np.expand_dims(self.CN_ratio[:, TiN_idx], 1)) n_het_ac_given_tin = np.multiply(exp_f, self.n_depth[:, np.newaxis]) self.p_TiN_given_S[:, TiN_idx] += np.sum( np.multiply(beta._cdf(np.expand_dims(self.normal_f[:] + .01, 1), n_ac_given_tin + 1, self.n_depth[:, np.newaxis] - n_ac_given_tin + 1) - beta._cdf(np.expand_dims(self.normal_f[:], 1), n_ac_given_tin + 1, self.n_depth[:, np.newaxis] - n_ac_given_tin + 1), t_af_w), axis=1) self.p_TiN_given_het[:, TiN_idx] += np.sum( np.multiply(beta._cdf(np.expand_dims(self.normal_f[:] + .01, 1), n_het_ac_given_tin + 1, self.n_depth[:, np.newaxis] - n_het_ac_given_tin + 1) - beta._cdf(np.expand_dims(self.normal_f[:], 1), n_het_ac_given_tin + 1, self.n_depth[:, np.newaxis] - n_het_ac_given_tin + 1), t_af_w), axis=1) self.p_artifact = beta._cdf(self.normal_f + .01, self.t_alt_count + 1, self.t_ref_count + 1) - beta._cdf( self.normal_f, self.t_alt_count + 1, self.t_ref_count + 1) self.p_TiN_given_G = np.multiply(1 - self.p_artifact[:, np.newaxis], self.p_TiN_given_het) + np.multiply( self.p_artifact[:, np.newaxis], 1 - self.p_TiN_given_het)
def calculate_TiN_likelihood(self): self.t_alt_count = self.hets.as_matrix(['ALT_COUNT_T']) self.t_ref_count = self.hets.as_matrix(['REF_COUNT_T']) self.afexp = np.repeat(np.expand_dims(self.af, 1), len(self.hets), axis=1).T t_af_w = beta._cdf(self.afexp, self.t_alt_count + 1, self.t_ref_count + 1) - beta._cdf(self.afexp-0.005, self.t_alt_count + 1, self.t_ref_count + 1) f_t_af = self.mu_af_n - np.abs(self.mu_af_n - self.afexp) psi_t_af = self.mu_af_n - f_t_af psi_t_af = np.multiply(psi_t_af, np.expand_dims(self.hets['d'], 1)) self.n_alt_count = np.squeeze(self.hets.as_matrix(['ALT_COUNT_N'])) self.n_ref_count = np.squeeze(self.hets.as_matrix(['REF_COUNT_N'])) self.p_TiN = np.zeros([len(self.hets), self.resolution]) for i, f in enumerate(self.af): exp_f = self.mu_af_n + np.multiply(np.expand_dims(psi_t_af[:, i], 1), self.CN_ratio) exp_f[exp_f < 0] = 0 self.p_TiN += np.multiply(beta._pdf(exp_f, np.expand_dims(self.n_alt_count + 1, 1), np.expand_dims(self.n_ref_count + 1, 1)) * 0.01, np.expand_dims(t_af_w[:, i], 1)) seg_var = np.zeros([len(self.segs), 1]) TiN_MAP = np.zeros([len(self.segs), 1], dtype=int) TiN_likelihood = np.zeros([len(self.segs), self.resolution]) TiN_post = np.zeros([len(self.segs), self.resolution]) counter = 0 for seg_id, seg in self.segs.iterrows(): self.seg_likelihood[seg_id] = np.sum( np.log(self.p_TiN[np.array(self.hets['seg_id'] == seg_id, dtype=bool)]), axis=0) seg_var[counter] = np.nanvar( np.argmax(self.p_TiN[np.array(self.hets['seg_id'] == seg_id, dtype=bool)], axis=0)) TiN_MAP[counter] = np.nanargmax(self.seg_likelihood[seg_id]) TiN_likelihood[counter, :] = np.sum(np.log(self.p_TiN[np.array(self.hets['seg_id'] == seg_id, dtype=bool)]), axis=0) prior = np.true_divide(np.ones([1, self.resolution]), self.resolution) TiN_post[counter, :] = TiN_likelihood[counter, :] + np.log(prior) TiN_post[counter, :] = TiN_post[counter, :] + (1 - np.max(TiN_post[counter, :])) TiN_post[counter, :] = np.exp(TiN_post[counter, :]) TiN_post[counter, :] = np.true_divide(TiN_post[counter, :], np.sum(TiN_post[counter, :])) counter += 1 self.TiN_post_seg = TiN_post self.segs.loc[:, ('TiN_var')] = seg_var self.segs.loc[:, ('TiN_MAP')] = self.TiN_range[TiN_MAP] * 100 self.TiN_likelihood_matrix = TiN_likelihood