def expectation_generator(args, model, alignment_filename, annotations): for aln in Fasta.load(alignment_filename, args.alignment_regexp, Alignment, sequence_selectors=args.sequence_regexp): if len(aln.sequences) < 2: sys.stderr.write("ERROR: not enough sequences in file\n") raise "ERROR: not enough sequences in file" seq1, seq2 = tuple(map(Fasta.alnToSeq, aln.sequences[:2])) positionGenerator = list(AlignmentBeamGenerator(aln, args.beam_width)) RX = RepeatGenerator(None, args.repeat_width) RY = RepeatGenerator(None, args.repeat_width) for rt in ['trf', 'original_repeats']: if rt in annotations: RX.addRepeats(annotations[rt][aln.names[0]]) RY.addRepeats(annotations[rt][aln.names[1]]) RX.buildRepeatDatabase() RY.buildRepeatDatabase() if 'Repeat' in model.statenameToID: model.states[model.statenameToID['Repeat']].addRepeatGenerator( RX, RY) (transitions, emissions), probability = model.getBaumWelchCounts( seq1, 0, len(seq1), seq2, 0, len(seq2), positionGenerator=positionGenerator) yield { "probability": probability, "transitions": transitions, "emissions": emissions, }
def precomputeRepeatGenerators(self, realigner): RX = RepeatGenerator( None, realigner.repeat_width, realigner.cons_count, ) RY = RepeatGenerator( None, realigner.repeat_width, realigner.cons_count ) for rt, ch in [('trf', 'R'), ('original_repeats', 'r'), ('hmm', 'h')]: if rt in realigner.annotations: RX.addRepeats(realigner.annotations[rt][realigner.X_name]) RY.addRepeats(realigner.annotations[rt][realigner.Y_name]) realigner.drawer.add_repeat_finder_annotation( 'X', ch, realigner.annotations[rt][realigner.X_name], (255, 0, 0, 255) ) realigner.drawer.add_repeat_finder_annotation( 'Y', ch, realigner.annotations[rt][realigner.Y_name], (255, 0, 0, 255) ) if 'trf_cons' in realigner.annotations: x_len = len(realigner.X) y_len = len(realigner.Y) cons = list(( realigner.annotations['trf_cons'][realigner.X_name] | realigner.annotations['trf_cons'][realigner.Y_name] )) if len(cons) > 0: RX.addRepeats([ Repeat(i, j, 0, cons[i % len(cons)], "") for i in range(x_len) for j in range(i + 1, x_len) ]) RY.addRepeats([ Repeat(i, j, 0, cons[i % len(cons)], "") for i in range(y_len) for j in range(i + 1, y_len) ]) RX.buildRepeatDatabase() RY.buildRepeatDatabase() self.addRepeatGenerator(RX, RY)