예제 #1
0
def expectation_generator(args, model, alignment_filename, annotations):
    for aln in Fasta.load(alignment_filename,
                          args.alignment_regexp,
                          Alignment,
                          sequence_selectors=args.sequence_regexp):
        if len(aln.sequences) < 2:
            sys.stderr.write("ERROR: not enough sequences in file\n")
            raise "ERROR: not enough sequences in file"
        seq1, seq2 = tuple(map(Fasta.alnToSeq, aln.sequences[:2]))
        positionGenerator = list(AlignmentBeamGenerator(aln, args.beam_width))

        RX = RepeatGenerator(None, args.repeat_width)
        RY = RepeatGenerator(None, args.repeat_width)
        for rt in ['trf', 'original_repeats']:
            if rt in annotations:
                RX.addRepeats(annotations[rt][aln.names[0]])
                RY.addRepeats(annotations[rt][aln.names[1]])
        RX.buildRepeatDatabase()
        RY.buildRepeatDatabase()
        if 'Repeat' in model.statenameToID:
            model.states[model.statenameToID['Repeat']].addRepeatGenerator(
                RX, RY)

        (transitions, emissions), probability = model.getBaumWelchCounts(
            seq1,
            0,
            len(seq1),
            seq2,
            0,
            len(seq2),
            positionGenerator=positionGenerator)
        yield {
            "probability": probability,
            "transitions": transitions,
            "emissions": emissions,
        }
예제 #2
0
 def precomputeRepeatGenerators(self, realigner):
     RX = RepeatGenerator(
         None,
         realigner.repeat_width,
         realigner.cons_count,
     )
     RY = RepeatGenerator(
         None,
         realigner.repeat_width,
         realigner.cons_count
     )
     for rt, ch in [('trf', 'R'), ('original_repeats', 'r'), ('hmm', 'h')]:
         if rt in realigner.annotations:
             RX.addRepeats(realigner.annotations[rt][realigner.X_name])
             RY.addRepeats(realigner.annotations[rt][realigner.Y_name])
             realigner.drawer.add_repeat_finder_annotation(
                 'X',
                 ch,
                 realigner.annotations[rt][realigner.X_name],
                 (255, 0, 0, 255)
             )
             realigner.drawer.add_repeat_finder_annotation(
                 'Y',
                 ch,
                 realigner.annotations[rt][realigner.Y_name],
                 (255, 0, 0, 255)
             )
     
     if 'trf_cons' in realigner.annotations:
         x_len = len(realigner.X)
         y_len = len(realigner.Y)
         cons = list((
             realigner.annotations['trf_cons'][realigner.X_name] |
             realigner.annotations['trf_cons'][realigner.Y_name]
         ))
         if len(cons) > 0:
             RX.addRepeats([
                 Repeat(i, j, 0, cons[i % len(cons)], "") 
                 for i in range(x_len) for j in range(i + 1, x_len)
             ])
             RY.addRepeats([
                 Repeat(i, j, 0, cons[i % len(cons)], "")
                 for i in range(y_len) for j in range(i + 1, y_len)
             ])
     RX.buildRepeatDatabase()
     RY.buildRepeatDatabase()
     self.addRepeatGenerator(RX, RY)