class TestTrioModel(unittest.TestCase): def setUp(self): self.trio_model = TrioModel(reads=[[30, 0, 0, 0], [30, 0, 0, 0], [30, 0, 0, 0]]) def test_pop_sample(self): parent_prob_mat = self.trio_model.pop_sample() proba = np.sum(parent_prob_mat) self.assertAlmostEqual(proba, 1) def test_germ_muta(self): proba = np.sum(self.trio_model.child_prob_mat) self.assertAlmostEqual(proba, 256) def test_soma_muta(self): proba = np.sum(self.trio_model.soma_prob_mat) self.assertAlmostEqual(proba, 16) def test_seq_err(self): seq_prob_mat = self.trio_model.seq_err_all() proba = np.sum(seq_prob_mat) self.assertAlmostEqual(proba, 3) def test_trio(self): proba = self.trio_model.trio() self.assertAlmostEqual(proba, 0)
def __init__(self, coverage, germ_muta_rate, soma_muta_rate): """ Generate a random sample and calculate probability of mutation with the given coverage. Coverage, and germline and somatic mutation rates are adjustable via command line. """ if germ_muta_rate is not None and soma_muta_rate is not None: self.trio_model = TrioModel(germ_muta_rate=germ_muta_rate, soma_muta_rate=soma_muta_rate) elif germ_muta_rate is not None: self.trio_model = TrioModel(germ_muta_rate=germ_muta_rate) elif soma_muta_rate is not None: self.trio_model = TrioModel(soma_muta_rate=soma_muta_rate) else: self.trio_model = TrioModel() self.cov = coverage self.has_muta = False
def write_proba(child, mother, father, filename): """ Write the probability of each site on a new line to a text file. Args: child: Iterable of the content in the child pileup file. mother: Iterable of the content in the mother pileup file. father: Iterable of the content in the father pileup file. filename: String representing the name of the output file. """ child_lines = trim_header(child) mother_lines = trim_header(mother) father_lines = trim_header(father) fout = open(filename, 'w') for c, m, f in itertools.zip_longest(child_lines, mother_lines, father_lines): reads = [get_reads(c), get_reads(m), get_reads(f)] trio_model = TrioModel(reads=reads) proba = trio_model.trio() if proba >= THRESHOLD: site_proba = '%s\t%s\n' % (c.split("\t")[1], str(proba)) fout.write(site_proba) fout.close()
def setUp(self): self.trio_model = TrioModel(reads=[[30, 0, 0, 0], [30, 0, 0, 0], [30, 0, 0, 0]])
for line in handle: values = line.strip('\n').split('\t') child_read_arr = values[:4] mom_read_arr = values[4:8] dad_read_arr = values[8:12] child_read = [int(count) for count in child_read_arr] mom_read = [int(count) for count in mom_read_arr] dad_read = [int(count) for count in dad_read_arr] reads = [child_read, mom_read, dad_read] rates_arr = values[12:16] rates = [float(rate) for rate in rates_arr] disp = float(values[16]) if values[16] else 1000 # default dispersion value bias = float(values[17]) if values[17] else None trio_model = TrioModel( reads=reads, pop_muta_rate=rates[0], germ_muta_rate=rates[1], soma_muta_rate=rates[2], seq_err_rate=rates[3], dm_disp=disp, dm_bias=bias ) proba = trio_model.trio() print(reads, end='\t') print(proba) handle.close()
# run python driver.py <parameters.txt> handle = open(sys.argv[1]) for line in handle: values = line.strip('\n').split('\t') child_read_arr = values[:4] mom_read_arr = values[4:8] dad_read_arr = values[8:12] child_read = [int(count) for count in child_read_arr] mom_read = [int(count) for count in mom_read_arr] dad_read = [int(count) for count in dad_read_arr] reads = [child_read, mom_read, dad_read] rates_arr = values[12:16] rates = [float(rate) for rate in rates_arr] disp = float(values[16]) if values[16] else 1000 # default dispersion value bias = float(values[17]) if values[17] else None trio_model = TrioModel( reads=reads, pop_muta_rate=rates[0], germ_muta_rate=rates[1], soma_muta_rate=rates[2], seq_err_rate=rates[3], dm_disp=disp, dm_bias=bias ) proba = trio_model.trio() print(reads, end='\t') print(proba) handle.close()