Exemple #1
0
 def test_loci_mutations_indel(self):
     s = models.Sequence(self.old_sequence)
     loci = s.loci
     models.make_indel(loci[3], ti_td=0)
     self.assertIn('del4', loci[3].mutations[0])
     s = models.Sequence(self.old_sequence)
     loci = s.loci
     models.make_indel(loci[3], ti_td=10)
     while not 'ins' in loci[3].mutations[0]:
         s = models.Sequence(self.old_sequence)
         loci = s.loci
         models.make_indel(loci[3], ti_td=10)
     self.assertIn('ins4', loci[3].mutations[0])
     self.assertEqual(loci[3].loc_aa, 4)
Exemple #2
0
def load_data(hic_dir, seq_path, histone_path, seq_format='fasta', test=False):
    """
    Returns a list of (Sequence, InteractionMatrix) tuples given the Hi-C data,
    the DNA sequences, and histone modifications in the paths provided.

    hic_dir: a path to a directory containing files named "loops_<NUM>.pickle",
            where each file contains a series of pickled tuples as written by
            load_submatrices.py
    seq_path: a path to a pickle containing genome subsequences as written by
            load_genome_ranges.py
    histone_path: a path to a pickle containing histone modifications as written
            by epigenome_seq_extraction.py
    """

    sequences = {}
    histones = {}
    # Read histone data
    print("Reading histone data...")
    for id, histone_data in load_pickles(histone_path):
        histones[id.replace('chr', '')] = histone_data
        if test and len(histones) == 100: break

    # Read sequence data
    print("Reading sequence data...")
    for record in SeqIO.parse(seq_path, seq_format):
        id = record.id.replace('chr', '')
        if id not in histones:
            print("Histone modifications not found for sequence {}".format(id))
            continue

        seq = models.Sequence(id, str(record.seq), histones[id])
        sequences[id] = seq
        if test and len(sequences) == len(histones): break

    # Read Hi-C data
    print("Reading Hi-C data...")
    results = {}
    file_idx = 0
    num_omitted = 0
    while True:
        path = os.path.join(hic_dir, "loops_{}.pickle".format(file_idx))
        if not os.path.exists(path):
            break
        for id, data in load_pickles(path):
            if id not in histones:
                print("Sequence object not found for sequence {}".format(id))
                continue
            try:
                results[id] = (sequences[id],
                               models.InteractionMatrix(id, data))
            except ValueError:
                num_omitted += 1
        file_idx += 1
        if test and len(results) == len(sequences): break

    print("Loaded {} items - {} had missing data.".format(
        len(results), num_omitted))
    return [val for key, val in sorted(results.items())]
Exemple #3
0
def plot_p():
    with open('../../data/split/Seq2_Sus', 'r') as f:
        old_sequence = f.read()[10:].replace('\n', '').lower()
    seq = models.Sequence(old_sequence)
    codon_freq = codon_frequencies.F1x4(seq)
    q = models.goldman_Q(codon_freq=codon_freq)
    models.plot_p_over_time(q, t=0.1, codon='aaa', logscale=False)
    q = models.goldman_Q()
    models.plot_p_over_time(q, t=0.1, codon='aaa', logscale=False)
Exemple #4
0
 def test_compile_histories(self):
     t10 = evolve.evolve_tree(models.Sequence(self.old_sequence),
                              taxa=10,
                              t=0.1,
                              omega=1.1,
                              kappa=1.5)
     histories = evolve.compile_histories(t10)
     self.assertIsInstance(histories, dict)
     for i in trees.get_list_of_tree_nodes(t10):
         self.assertIn(i.id, histories)
Exemple #5
0
 def test_evolve_tree(self):
     t10 = evolve.evolve_tree(models.Sequence(self.old_sequence),
                              taxa=10,
                              t=0.1,
                              omega=1.1,
                              kappa=1.5)
     new_sequences = [i.value for i in trees.get_list_of_tree_leaves(t10)]
     self.assertEqual(len(new_sequences), 10)
     self.assertTrue(any(
         i.seq != self.old_sequence
         for i in new_sequences))  #vanishing probability of failure
Exemple #6
0
    def test_evolve_sequence_with_q(self):
        q = models.goldman_Q(scale_q=True)
        new_sequence0 = evolve.evolve_sequence_with_q(models.Sequence(
            self.old_sequence),
                                                      q,
                                                      t=1)
        new_sequence1 = evolve.evolve_sequence_with_q(models.Sequence(
            self.old_sequence),
                                                      q,
                                                      t=1)
        new_seq0 = new_sequence0.seq
        new_seq1 = new_sequence1.seq

        self.assertNotEqual(
            self.old_sequence,
            new_seq0)  # this has a very low probability of failing
        self.assertNotEqual(
            self.old_sequence,
            new_seq1)  # this has a very low probability of failing
        self.assertNotEqual(new_seq0, new_seq1)
Exemple #7
0
def benchmark_evolve_tree():
    with open('../../data/split/Seq2_Sus', 'r') as f:
        old_sequence = f.read()[10:].replace('\n', '').lower()

    old_sequence = models.Sequence(old_sequence)

    for i in [3, 6, 9]:
        start_time = time.time()
        # new_sequences, mutations = evolve.evolve(old_sequence, taxa=10, log=True)
        tree = evolve.evolve_tree(old_sequence, taxa=i)
        nodes = len(trees.get_list_of_tree_nodes(tree))
        print("taxa:{} (nodes={}), sec:{}".format(i, nodes,
                                                  time.time() - start_time))
Exemple #8
0
def do_sequence(request,
                seqid,
                rightanswer=None,
                lastanswer=None,
                **ignored_args):
    evals = []
    seq = models.Sequence(seqid)
    firstitem = models.SequenceItem.objects.get(sequence=seq, prev=None)
    sie = SequenceItemEvaluation(request.user, firstitem)
    evals.append(sie)
    completed = sie.completed
    while completed:
        try:
            nxt = models.SequenceItem.objects.get(sequence=seq,
                                                  prev=evals[-1].seqitem)
        except:
            print 'No next'
            break
        else:
            sie = SequenceItemEvaluation(request.user, nxt)
            evals.append(sie)
            completed = sie.completed
    if sie.completed:
        sie = random.choice(evals)
    params = generate_quiz_question_params(
        request,
        sie.seqitem.category,
        rightanswer=rightanswer,
        lastanswer=lastanswer,
        reverse=sie.seqitem.reverse,
        question_type=sie.seqitem.question_type,
    )
    evals.reverse()
    params['seq_history'] = evals
    if sie.seqitem.question_type == models.MULTIPLE_CHOICE:
        params['action'] = '/seq/answer/%s/' % seqid
        return render_to_response('simple_quiz.html', params)
    else:
        params['action'] = '/seq/answer/%s/open' % seqid
        return render_to_response('open_response.html', params)
Exemple #9
0
def new_sequence(request, errors=None):
    if request.method == 'POST':
        instance = models.Sequence()
        mf = SequenceForm(request.POST, instance=instance)
        #if not mf.cleaned_data['parent']: mf.cleaned_data['parent'] = None
        if mf.is_valid():
            mf.save()
            print 'SAVE!'
        else:
            print 'ERROR!'
            request.method = None
            return new_sequence(request, errors=mf.errors)
        return HttpResponseRedirect('/new/')
    else:
        mf = SequenceForm()
        return render_to_response(
            'form.html', {
                'title': 'Sequence',
                'form': mf,
                'errors': errors,
                'desc': 'Create new sequence',
                'action': '/new/sequence/'
            })
Exemple #10
0
def evolve(
    sequence,
    taxa=10,
    t=1e-2,
    omega=1.0,
    kappa=2.0,
    lmbda=1e-4,
    ti_td=0.1,
    codon_freq='F1x4',
    scale_q=True,
    strip_deletions=False,
    log=False,
    verbose=False,
):
    """
    Wrapper around evolve_tree(). Returns a list of evolved sequences.

    Args:
        sequence: string of DNA nucleotides
        taxa: number of daughter sequences to evolve
        t: evolution time or branch length
        omega: dN/dS 
        kappa: ratio of transition to transversion rates
        lmbda: probability of indel at codon
        ti_td: ratio of insertions to deletions
        codon_freq: codon frequency model, also know as equilibrium frequencies (default is F1x4)
        scale_q: scales Q so that the average rate of substitution at
        equilibrium equals 1. Branch lengths are thus expected number of nucleotide
        substitutions per codon. See Goldman (1994).
        log: if True, returns list of evolved sequences AND list of mutations
        verbose: if True, prints parameters
        strip_deletions: False,
    """
    if not isinstance(sequence, str):
        raise TypeError('sequence must be a string')

    if verbose:
        print '\n\n---------------------------------------'
        print 'evolving new sequences with parameters:'
        print '---------------------------------------'
        for i, j in zip([
                'taxa',
                't',
                'omega',
                'kappa',
                'lmbda',
                'ti_td',
                'scale_q',
                'codon_freq',
                'strip_deletions',
                'log',
        ], [
                taxa, t, omega, kappa, lmbda, ti_td, scale_q, codon_freq,
                strip_deletions, log
        ]):
            print '{0:<10} {1:<15}'.format(i, j)
        print '---------------------------------------\n\n'

    sequence = models.Sequence(seq=sequence.lower())
    tree = evolve_tree(**locals())
    leaves = trees.get_list_of_tree_leaves(tree)
    sequences = [i.value.seq for i in leaves]
    if strip_deletions:
        sequences = [i.replace('-', '') for i in sequences]
    if log:
        mutations = [i.value.mutations for i in leaves]
        return sequences, mutations
    return sequences
Exemple #11
0
 def test_codon_freq_FEqual_F3x4(self):
     codon_freq_fequal = codon_frequencies.FEqual(models.Sequence(self.old_sequence))
     codon_freq_f3x4 = codon_frequencies.F3x4(models.Sequence(self.old_sequence))
     self.assertNotEqual(codon_freq_fequal, codon_freq_f3x4)