예제 #1
0
class TestTrioModel(unittest.TestCase):
    def setUp(self):
        self.trio_model = TrioModel(reads=[[30, 0, 0, 0], [30, 0, 0, 0], [30, 0, 0, 0]])

    def test_pop_sample(self):
        parent_prob_mat = self.trio_model.pop_sample()
        proba = np.sum(parent_prob_mat)
        self.assertAlmostEqual(proba, 1)

    def test_germ_muta(self):
        proba = np.sum(self.trio_model.child_prob_mat)
        self.assertAlmostEqual(proba, 256)

    def test_soma_muta(self):
        proba = np.sum(self.trio_model.soma_prob_mat)
        self.assertAlmostEqual(proba, 16)

    def test_seq_err(self):
        seq_prob_mat = self.trio_model.seq_err_all()
        proba = np.sum(seq_prob_mat)
        self.assertAlmostEqual(proba, 3)

    def test_trio(self):
        proba = self.trio_model.trio()
        self.assertAlmostEqual(proba, 0)
예제 #2
0
    def __init__(self, coverage, germ_muta_rate, soma_muta_rate):
        """
        Generate a random sample and calculate probability of mutation with the
        given coverage.

        Coverage, and germline and somatic mutation rates are adjustable via
        command line.
        """
        if germ_muta_rate is not None and soma_muta_rate is not None:
            self.trio_model = TrioModel(germ_muta_rate=germ_muta_rate,
                                        soma_muta_rate=soma_muta_rate)
        elif germ_muta_rate is not None:
            self.trio_model = TrioModel(germ_muta_rate=germ_muta_rate)
        elif soma_muta_rate is not None:
            self.trio_model = TrioModel(soma_muta_rate=soma_muta_rate)
        else:
            self.trio_model = TrioModel()
        self.cov = coverage
        self.has_muta = False
예제 #3
0
def write_proba(child, mother, father, filename):
    """
    Write the probability of each site on a new line to a text file.

    Args:
        child: Iterable of the content in the child pileup file.
        mother: Iterable of the content in the mother pileup file.
        father: Iterable of the content in the father pileup file.
        filename: String representing the name of the output file.
    """
    child_lines = trim_header(child)
    mother_lines = trim_header(mother)
    father_lines = trim_header(father)
    fout = open(filename, 'w')
    for c, m, f in itertools.zip_longest(child_lines, mother_lines, father_lines):
        reads = [get_reads(c), get_reads(m), get_reads(f)]
        trio_model = TrioModel(reads=reads)
        proba = trio_model.trio()
        if proba >= THRESHOLD:
            site_proba = '%s\t%s\n' % (c.split("\t")[1], str(proba))
            fout.write(site_proba)
    fout.close()
예제 #4
0
def write_proba(child, mother, father, filename):
    """
    Write the probability of each site on a new line to a text file.

    Args:
        child: Iterable of the content in the child pileup file.
        mother: Iterable of the content in the mother pileup file.
        father: Iterable of the content in the father pileup file.
        filename: String representing the name of the output file.
    """
    child_lines = trim_header(child)
    mother_lines = trim_header(mother)
    father_lines = trim_header(father)
    fout = open(filename, 'w')
    for c, m, f in itertools.zip_longest(child_lines, mother_lines,
                                         father_lines):
        reads = [get_reads(c), get_reads(m), get_reads(f)]
        trio_model = TrioModel(reads=reads)
        proba = trio_model.trio()
        if proba >= THRESHOLD:
            site_proba = '%s\t%s\n' % (c.split("\t")[1], str(proba))
            fout.write(site_proba)
    fout.close()
예제 #5
0
 def setUp(self):
     self.trio_model = TrioModel(reads=[[30, 0, 0, 0], [30, 0, 0, 0], [30, 0, 0, 0]])
예제 #6
0
for line in handle:
    values = line.strip('\n').split('\t')
    child_read_arr = values[:4]
    mom_read_arr = values[4:8]
    dad_read_arr = values[8:12]
    child_read = [int(count) for count in child_read_arr]
    mom_read = [int(count) for count in mom_read_arr]
    dad_read = [int(count) for count in dad_read_arr]
    reads = [child_read, mom_read, dad_read]
    rates_arr = values[12:16]
    rates = [float(rate) for rate in rates_arr]

    disp = float(values[16]) if values[16] else 1000  # default dispersion value
    bias = float(values[17]) if values[17] else None

    trio_model = TrioModel(
        reads=reads,
        pop_muta_rate=rates[0],
        germ_muta_rate=rates[1],
        soma_muta_rate=rates[2],
        seq_err_rate=rates[3],
        dm_disp=disp,
        dm_bias=bias
    )
    proba = trio_model.trio()
    print(reads, end='\t')
    print(proba)

handle.close()
예제 #7
0
# run python driver.py <parameters.txt>
handle = open(sys.argv[1])

for line in handle:
    values = line.strip('\n').split('\t')
    child_read_arr = values[:4]
    mom_read_arr = values[4:8]
    dad_read_arr = values[8:12]
    child_read = [int(count) for count in child_read_arr]
    mom_read = [int(count) for count in mom_read_arr]
    dad_read = [int(count) for count in dad_read_arr]
    reads = [child_read, mom_read, dad_read]
    rates_arr = values[12:16]
    rates = [float(rate) for rate in rates_arr]
    disp = float(values[16]) if values[16] else 1000  # default dispersion value
    bias = float(values[17]) if values[17] else None

    trio_model = TrioModel(
        reads=reads,
        pop_muta_rate=rates[0],
        germ_muta_rate=rates[1],
        soma_muta_rate=rates[2],
        seq_err_rate=rates[3],
        dm_disp=disp,
        dm_bias=bias
    )
    proba = trio_model.trio()
    print(reads, end='\t')
    print(proba)

handle.close()