def call_cn_var(cnvtag, var_alt, var_ref, alt_forward, alt_reverse, var_list, var_db): """ Call CN for variant sites in non-homology regions. Use different minimum read cutoffs for clean variant sites and other sites. Total CN at each site is also considered during filtering. """ total_cn = get_total_cn_per_site(cnvtag, var_db, var_list) assert total_cn is not None cn_prob = [] for i, forward in enumerate(alt_forward): reverse = alt_reverse[i] total_ref = var_ref[i] total_var = var_alt[i] if total_var > 0 and var_list[i] in NOISY_VAR: ntotal = forward + reverse oddsratio, pvalue = fisher_exact([[forward, reverse], [ntotal / 2, ntotal / 2]]) if pvalue < P_CUTOFF or forward <= 1 or reverse <= 1: total_var = 0 if var_list[i] in CLEAN_VAR: cn_prob.append(call_reg1_cn(total_cn[i], total_var, total_ref, 2)) elif var_list[i] in NOISY_VAR: cn_prob.append(call_reg1_cn(total_cn[i], total_var, total_ref, 7)) else: cn_prob.append(call_reg1_cn(total_cn[i], total_var, total_ref, 4)) cn_call = process_raw_call_denovo(cn_prob, 0.8, 0.65, total_cn) return cn_call
def call_exon9gc(d6_count, d7_count, full_length_cn): """ Call exon 9 conversion """ lsnp1 = d6_count lsnp2 = d7_count if full_length_cn is not None: full_length_cn = int(full_length_cn) d6_values = [] cn_prob = [] for i, count1 in enumerate(lsnp1): count2 = lsnp2[i] d6_values.append(full_length_cn * count1 / (count1 + count2)) cn_prob.append(call_reg1_cn(full_length_cn, count1, count2, 3)) cn_prob_processed_stringent = process_raw_call_gc(cn_prob, 0.88) cn_calls = list( set([a for a in cn_prob_processed_stringent if a is not None])) if len(cn_calls) == 1: count1 = np.mean(d6_count) count2 = np.mean(d7_count) ave_call = process_raw_call_gc( [call_reg1_cn(full_length_cn, count1, count2, 3)], 0.75) if ave_call[0] is not None and ave_call[0] == cn_calls[0]: if ave_call[0] == 1: if min(d6_values) < 1.2 and max(d6_values) < 1.3: return ave_call[0] else: return ave_call[0] return None
def call_cn_var(cnvtag, lsnp1, lsnp2, var_list, var_db): """ Call CN for variant sites in non-homology regions. Use different minimum read cutoffs for clean variant sites and other sites. Total CN at each site is also considered during filtering. """ total_cn = get_total_cn_per_site(cnvtag, var_db, var_list) assert total_cn is not None cn_prob = [] for i, count1 in enumerate(lsnp1): count2 = lsnp2[i] if var_list[i] not in CLEAN_VAR: cn_prob.append(call_reg1_cn(total_cn[i], count1, count2, 4)) else: cn_prob.append(call_reg1_cn(total_cn[i], count1, count2, 2)) cn_call = process_raw_call_denovo(cn_prob, 0.8, 0.65, total_cn) return cn_call
def call_cn_snp(total_cn, lsnp1, lsnp2, threshold=0.6): """ Call CN for SNP sites between CYP2D6 and CYP2D7. Use a loose cutoff as this is for CNV/hybrid group calling. """ cn_prob = [] for i, count1 in enumerate(lsnp1): count2 = lsnp2[i] cn_prob.append(call_reg1_cn(total_cn, count1, count2)) cn_call = process_raw_call_gc(cn_prob, threshold) return cn_call
def call_cn_var_homo(total_cn, lsnp1, lsnp2): """ Call CN for variant sites in homology regions. """ cn_prob = [] for i, count1 in enumerate(lsnp1): count2 = lsnp2[i] cn_prob.append(call_reg1_cn(total_cn, count1, count2, 4)) cn_call = [] for site_call in process_raw_call_denovo(cn_prob, 0.8, 0.65): if site_call is None: cn_call.append(None) else: cn_call.append(min(site_call, total_cn - 2)) return cn_call
def call_exon9gc(d6_count, d7_count, full_length_cn): """ Call exon 9 conversion """ lsnp1 = [d6_count] lsnp2 = [d7_count] if full_length_cn is not None: full_length_cn = int(full_length_cn) cn_prob = [] for i, count1 in enumerate(lsnp1): count2 = lsnp2[i] cn_prob.append(call_reg1_cn(full_length_cn, count1, count2, 3)) cn_prob_processed_stringent = process_raw_call_gc(cn_prob, 0.9) return cn_prob_processed_stringent[0]
def get_smn12_call(raw_cn_call, lsnp1, lsnp2, var_ref, var_alt, mdepth): """Return the copy nubmer call of SMN1, SMN2 and SMNstar.""" smn1_fraction = get_fraction(lsnp1, lsnp2) smn_call = namedtuple( 'smn_call', 'SMN1 SMN2 SMNstar isCarrier isSMA \ SMN1_CN_raw Info Confidence g27134TG_raw g27134TG_CN') raw_cn_call = update_full_length_cn(raw_cn_call) full_length_cn = raw_cn_call.exon78_cn if full_length_cn is None: # No-call for full-length CN tag = 'FLCNnoCall' full_length_cn = raw_cn_call.exon78_depth raw_smn1_cn = get_raw_smn1_cn(full_length_cn, smn1_fraction) # In cases where full length copy number is no-call, # Test for zero copy of SMN1 at the splice variant site. # If true, report range for SMN2 CN sma_likelihood_ratio = smn1_cn_zero(lsnp1[SPLICE_INDEX], lsnp2[SPLICE_INDEX], mdepth) if sma_likelihood_ratio > 1 / SMA_CUTOFF: cn_smn2 = '%i-%i' % (math.floor(full_length_cn), math.ceil(full_length_cn)) dout = smn_call(0, cn_smn2, None, False, True, raw_smn1_cn, tag, [None] * TOTAL_NUM_SITES, None, None) elif sma_likelihood_ratio < SMA_CUTOFF: dout = smn_call(None, None, None, None, False, raw_smn1_cn, tag, [None] * TOTAL_NUM_SITES, None, None) else: dout = smn_call(None, None, None, None, None, raw_smn1_cn, tag, [None] * TOTAL_NUM_SITES, None, None) else: full_length_cn = int(full_length_cn) raw_smn1_cn = get_raw_smn1_cn(full_length_cn, smn1_fraction) # Most likely SMN1 CN (or the best two if posterior probability is low) # at each site. cn_prob = [] for i in range(TOTAL_NUM_SITES): cn_prob.append(call_reg1_cn(full_length_cn, lsnp1[i], lsnp2[i])) # Combine all 6 sites and make a call. combined_call = call_reg1_cn( full_length_cn, sum([lsnp1[a] for a in SELECTED_SITES_INDEX]), sum([lsnp2[a] for a in SELECTED_SITES_INDEX])) tag, cn_smn1, lsitecall_loose = get_smn1_call_and_tag( cn_prob, combined_call) sma_likelihood_ratio = smn1_cn_zero(lsnp1[SPLICE_INDEX], lsnp2[SPLICE_INDEX], mdepth) is_sma = get_sma_status(lsitecall_loose, cn_prob, cn_smn1, sma_likelihood_ratio) is_carrier = get_carrier_status(lsitecall_loose, cn_prob, cn_smn1, sma_likelihood_ratio) # targeted variant(s) var_cn_confident = None raw_var_cn = None var_fraction = get_fraction(var_alt, var_ref) raw_var_cn = get_raw_smn1_cn(full_length_cn, var_fraction)[0] var_cn = [call_reg1_cn(full_length_cn, var_alt[0], var_ref[0])] var_cn_filtered = process_raw_call_denovo(var_cn, POSTERIOR_CUTOFF_MEDIUM, POSTERIOR_CUTOFF_LOOSE, keep_none=False) if var_cn_filtered != []: var_cn_confident = var_cn_filtered[0] if var_cn_confident is not None and cn_smn1 is not None \ and cn_smn1 < var_cn_confident: var_cn_confident = cn_smn1 # Call CN for SMN2 and SMN* cn_smn2 = None cn_smnstar = None if raw_cn_call.exon16_cn is not None: cn_smnstar = int(raw_cn_call.exon16_cn) - full_length_cn if cn_smnstar < 0: raise Exception( 'Total SMN CN is smaller than full-length SMN CN.') if cn_smn1 is not None: cn_smn2 = full_length_cn - cn_smn1 dout = smn_call(cn_smn1, cn_smn2, cn_smnstar, is_carrier, is_sma, raw_smn1_cn, tag, cn_prob, raw_var_cn, var_cn_confident) return dout