def realign_file(args, model, output_filename, alignment_filename): # begin of HACK if args.expand_model: old_tracks = args.tracks args.tracks.add('trf_cons') m = model if args.annotation_model: m = args.annotation_model annotations = compute_annotations(args, alignment_filename, m) if args.expand_model: consensuses = annotations['trf_cons'] args.tracks = old_tracks if 'trf_cons' not in old_tracks: del args.tracks['trf_cons'] # end of HACK with Open(output_filename, 'w') as output_file_object: for aln in Fasta.load( alignment_filename, args.alignment_regexp, Alignment, sequence_selectors=args.sequence_regexp): if len(aln.sequences) < 2: sys.stderr.write("ERROR: not enough sequences in file\n") return 1 if len(args.draw) == 0: drawer = brainwash(AlignmentCanvas)() else: drawer = AlignmentCanvas() drawer.add_original_alignment(aln) aln, unmask_repeats = args.mask_repeats(aln, annotations) seq1, seq2 = tuple(map(Fasta.alnToSeq, aln.sequences[:2])) perf.msg("Data loaded in {time} seconds.") perf.replace() if args.expand_model: # Potrebujem zistit konsenzy A = consensuses[aln.names[0]] B = consensuses[aln.names[1]] cons = list(A.union(B)) real_model = model.expandModel({'consensus': cons}) else: real_model = model realigner = args.algorithm() realigner.setDrawer(drawer) realigner.prepareData(seq1, aln.names[0], seq2, aln.names[1], aln, real_model, annotations, args) aln = realigner.realign(0, len(seq1), 0, len(seq2)) aln = unmask_repeats(aln) perf.msg("Sequence was realigned in {time} seconds.") perf.replace() if len(args.draw) > 0: drawer.add_sequence('X', seq1) drawer.add_sequence('Y', seq2) drawer.add_alignment_line(101, (255, 0, 255, 255), 2, AlignmentPositionGenerator( Alignment([aln[0], aln[2]]))) drawer.draw(args.draw, 2000, 2000) perf.msg("Image was drawn in {time} seconds.") # Save output_file Fasta.saveAlignmentPiece(aln, output_file_object)
def main(files_filename, output_filename, suffix, base_dir): X = "" Y = "" A = "" with Open(output_filename, "w") as ff: files = json.load(Open(files_filename)) total = len(files) done = 0 for filename in files: if done % 100 == 0: print "{}/{} {:.2}%".format(done, total, 100.0 * done / total) if filename == "": Fasta.saveAlignmentPiece([(X_name, X), (Y_name, Y), (A_name, A)], ff) X = "" Y = "" A = "" continue done += 1 old_filename = filename keep = False if filename.count("keep") == 0: filename = filename[:-2] + suffix if base_dir != None: filename = base_dir + "/" + filename.split("/")[-1] try: with Open(filename, "r") as f: l = len("".join(f).strip()) if l == 0: filename = old_filename keep = True except IOError: filename = old_filename keep = True if filename.count("keep") > 0: keep = True aln = list(Fasta.load(filename, ""))[0] assert len(aln) == 3 assert len(aln[0][1]) == len(aln[1][1]) == len(aln[2][1]) X += aln[0][1] if keep: A += "." * len(aln[0][1]) else: A += aln[1][1] Y += aln[2][1] X_name = aln[0][0] A_name = aln[1][0] Y_name = aln[2][0]
def main(files_filename, output_filename, suffix, base_dir): X = "" Y = "" A = "" with Open(output_filename, 'w') as ff: files = json.load(Open(files_filename)) total = len(files) done = 0 for filename in files: if done %100 ==0: print '{}/{} {:.2}%'.format(done, total, 100.0 * done / total) if filename == "": Fasta.saveAlignmentPiece([(X_name, X), (Y_name, Y), (A_name, A)], ff) X = "" Y = "" A = "" continue done += 1 old_filename = filename keep = False if filename.count('keep') == 0: filename = filename[:-2] + suffix if base_dir != None: filename = base_dir + '/' + filename.split('/')[-1] try: with Open(filename, 'r') as f: l = len(''.join(f).strip()) if l == 0: filename = old_filename keep = True except IOError: filename = old_filename keep = True if filename.count('keep') > 0: keep = True aln = list(Fasta.load(filename, ''))[0] assert(len(aln) == 3) assert(len(aln[0][1]) == len(aln[1][1]) == len(aln[2][1])) X += aln[0][1] if keep: A += '.' * len(aln[0][1]) else: A += aln[1][1] Y += aln[2][1] X_name = aln[0][0] A_name = aln[1][0] Y_name = aln[2][0]
def main(input_file, output_file): for trf_executable in trf_paths: if os.path.exists(trf_executable): trf = TRFDriver(trf_executable) #break if not trf: raise "No trf found" repeats = trf.run(input_file) with open(output_file, 'w') as f: for alignment in Fasta.load(input_file, '\.[0-9]*$', Alignment): if len(alignment.sequences) != 2: print 'error' continue #print alignment.names annotation = list('.' * len(alignment.sequences[0])) annotationX = list('.' * len(alignment.sequences[0])) annotationY = list('.' * len(alignment.sequences[0])) trf = None for seq_name in alignment.names: index = None for i in range(len(alignment.names)): if seq_name == alignment.names[i]: index = i translator = alignment.seq_to_aln[index] revtranslator = alignment.aln_to_seq[index] for repeat in repeats[seq_name]: for i in range(translator[repeat.start], translator[repeat.end]): annotation[i] = 'R' j = i - translator[repeat.start] if index == 0: annotationX[i] = repeat.consensus[revtranslator[j] % len(repeat.consensus)] else: annotationY[i] = repeat.consensus[revtranslator[j] % len(repeat.consensus)] d = defaultdict(int) ll = 0 for v in annotation: if v != 'R': if ll > 0: d[ll] += 1 ll = 0 else: ll += 1 #for x, y in sorted(d.iteritems(), key=lambda x: x[1]): # print '{}: {}'.format(x, y) #if len(d.keys()) > 0: # print('Number of repeats: {}, average length: {}, maximum length: {}, minimum length: {}'.format( # sum(d.values()), # sum([x * y for x, y in d.iteritems()])/ max(sum(d.values()), 1), # max(d.keys()), # min(d.keys()) # )) seqX = alignment.sequences nm = alignment.names[0] aln = [(alignment.names[0], alignment.sequences[0].replace('.', '-')), ('consensusX' + nm, ''.join(annotationX)), ('annotation' + nm, ''.join(annotation)), ('consensusY' + nm, ''.join(annotationY)), (alignment.names[1], alignment.sequences[1].replace('.','-'))] Fasta.saveAlignmentPiece(aln, f, -1)
def main(input_file, output_file): for trf_executable in trf_paths: if os.path.exists(trf_executable): trf = TRFDriver(trf_executable) #break if not trf: raise "No trf found" repeats = trf.run(input_file) with open(output_file, 'w') as f: for alignment in Fasta.load(input_file, '\.[0-9]*$', Alignment): if len(alignment.sequences) != 2: print 'error' continue #print alignment.names annotation = list('.' * len(alignment.sequences[0])) annotationX = list('.' * len(alignment.sequences[0])) annotationY = list('.' * len(alignment.sequences[0])) trf = None for seq_name in alignment.names: index = None for i in range(len(alignment.names)): if seq_name == alignment.names[i]: index = i translator = alignment.seq_to_aln[index] revtranslator = alignment.aln_to_seq[index] for repeat in repeats[seq_name]: for i in range(translator[repeat.start], translator[repeat.end]): annotation[i] = 'R' j = i - translator[repeat.start] if index == 0: annotationX[i] = repeat.consensus[ revtranslator[j] % len(repeat.consensus)] else: annotationY[i] = repeat.consensus[ revtranslator[j] % len(repeat.consensus)] d = defaultdict(int) ll = 0 for v in annotation: if v != 'R': if ll > 0: d[ll] += 1 ll = 0 else: ll += 1 #for x, y in sorted(d.iteritems(), key=lambda x: x[1]): # print '{}: {}'.format(x, y) #if len(d.keys()) > 0: # print('Number of repeats: {}, average length: {}, maximum length: {}, minimum length: {}'.format( # sum(d.values()), # sum([x * y for x, y in d.iteritems()])/ max(sum(d.values()), 1), # max(d.keys()), # min(d.keys()) # )) seqX = alignment.sequences nm = alignment.names[0] aln = [ (alignment.names[0], alignment.sequences[0].replace('.', '-')), ('consensusX' + nm, ''.join(annotationX)), ('annotation' + nm, ''.join(annotation)), ('consensusY' + nm, ''.join(annotationY)), (alignment.names[1], alignment.sequences[1].replace('.', '-')) ] Fasta.saveAlignmentPiece(aln, f, -1)