def screen_maf(qa_file, maf_file): """ Screen the .maf file based on the cluster info in the qa_file """ clusters = read_clusters(qa_file) filtered_maf = maf_file + ".filtered" screened_alignments = set() for cluster in clusters: for anchor in cluster: score = anchor[-1] if score != 0: screened_alignments.add(anchor) fp = file(maf_file) reader = maf.Reader(fp) fw = file(filtered_maf, "w") writer = maf.Writer(fw) for rec in reader: alignment = [] for c in rec.components: chr, left, right, strand, score = c.src, c.forward_strand_start, \ c.forward_strand_end, c.strand, rec.score alignment.append((chr, left, right, strand, score)) cluster = alignment_to_cluster(alignment) if cluster[0] in screened_alignments: writer.write(rec) fp.close() print >>sys.stderr, "write (%d) alignments to '%s'" % \ (len(screened_alignments), filtered_maf)
def __main__(): try: maf_reader = maf.Reader( open( sys.argv[1] ) ) except Exception as e: maf_utilities.tool_fail( "Error opening MAF: %s" % e ) try: out = maf.Writer( open( sys.argv[2], "w") ) except Exception as e: maf_utilities.tool_fail( "Error opening file for output: %s" % e ) try: collapse_columns = string_as_bool( sys.argv[3] ) except Exception as e: maf_utilities.tool_fail( "Error determining collapse columns value: %s" % e ) start_count = 0 end_count = 0 for start_count, start_block in enumerate( maf_reader ): for block in maf_utilities.iter_blocks_split_by_species( start_block ): if collapse_columns: block.remove_all_gap_columns() out.write( block ) end_count += 1 out.close() if end_count: print "%i alignment blocks created from %i original blocks." % ( end_count, start_count + 1 ) else: print "No alignment blocks were created."
def main(in_file): base, ext = os.path.splitext(in_file) out_file = "%s-sorted%s" % (base, ext) index_file = in_file + ".index" if not os.path.exists(index_file): build_index(in_file, index_file) # pull out the sizes and positions of each record rec_info = [] with open(in_file) as in_handle: reader = maf.Reader(in_handle) while 1: pos = reader.file.tell() rec = reader.next() if rec is None: break rec_info.append((rec.text_size, pos)) rec_info.sort(reverse=True) # write the records in order, pulling from the index index = maf.Indexed(in_file, index_file) with open(out_file, "w") as out_handle: writer = maf.Writer(out_handle) for size, pos in rec_info: rec = index.get_at_offset(pos) writer.write(rec)
def test_writer(): val = StringIO() writer = maf.Writer(val, {'scoring': 'foobar'}) a = align.Alignment() a.score = 7009 a.components.append( align.Component(src="human_hoxa", start=100, size=9, strand="+", src_size=1000257, text="ACA-TTACT")) a.components.append( align.Component(src="horse_hoxa", start=120, size=10, strand="-", src_size=98892, text="ACAATTGCT")) check_component(a.components[0], "human_hoxa", 100, 9, "+", 1000257, "ACA-TTACT") check_component(a.components[1], "horse_hoxa", 120, 10, "-", 98892, "ACAATTGCT") writer.write(a) assert val.getvalue() == """##maf version=1 scoring=foobar
def __main__(): # Parse command line arguments parser = OptionParser() parser.add_option( "--component_count", action="store", default=None, type="int", help="" ) parser.add_option( "--min_cols", action="store", default=None, type="int", help="" ) parser.add_option( "-e", "--expr", action="store", default=None ) ( options, args ) = parser.parse_args() component_count = options.component_count min_cols = options.min_cols expr = options.expr # Compile expression for SPEED if expr: expr = compile( expr, '<expr arg>', 'eval' ) maf_reader = maf.Reader( sys.stdin ) maf_writer = maf.Writer( sys.stdout ) for m in maf_reader: if component_count and len( m.components ) != component_count: continue if min_cols and m.text_size < min_cols: continue if expr and not bool( eval( expr, { "m": m, "maf": m } ) ): continue maf_writer.write( m )
def main(): options, args = doc_optparse.parse(__doc__) try: sources = args[0].translate(tree_tx).split() ref_2bit = bx.seq.twobit.TwoBitFile(open(args[1])) index = maf.MultiIndexed(args[2:]) out = maf.Writer(sys.stdout) missing_data = bool(options.missingData) use_strand = bool(options.strand) except: doc_optparse.exception() for line in sys.stdin: fields = line.split() ref_src, start, end = fields[0:3] if use_strand and len(fields) > 5: strand = fields[5] else: strand = '+' do_interval(sources, index, out, ref_src, int(start), int(end), ref_2bit, missing_data, strand) out.close()
def test_write_with_synteny(): reader = maf.Reader(StringIO(test_maf_2), parse_e_rows=True) a = reader.next() val = StringIO() writer = maf.Writer(val, {'scoring': 'foobar'}) writer.write(a) actual = val.getvalue() expected = """##maf version=1 scoring=foobar a score=3656.0 s hg17.chr1 2005 34 + 245522847 TGTAACTTAATACCACAACCAGGCATAGGGG--AAA------------- s rheMac2.chr11 9625228 31 + 134511895 TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------ i rheMac2.chr11 C 0 I 1678 s panTro1.chr1 2014 34 + 229575298 TGTAACTTAATACCACAACCAGGCATGGGGG--AAA------------- i panTro1.chr1 C 0 C 0 s bosTau2.chr5 64972365 47 + 76426644 TCCAGCCATGTGTTGTGATCAG--CCAGGGGCTAAAGCCATGGCGGTAG i bosTau2.chr5 C 0 I 1462 s canFam2.chr27 45129665 31 + 48908698 TTTGACTCTGTGCTCTTATCAGGCCCAAGGG------------------ i canFam2.chr27 C 0 I 1664 e danRer3.chr18 2360867 428 + 50308305 I e oryCun1.scaffold_139397 643 1271 - 4771 I e loxAfr1.scaffold_5603 58454 1915 + 68791 I e echTel1.scaffold_212365 4641 1430 + 9822 I e echTel1.scaffold_212365 4641 1430 + 9822 I e rn3.chr4 29161032 1524 - 187371129 I e mm7.chr6 28091695 3290 - 149646834 I """ print actual print "---" print expected assert actual == expected
def main(): maf_reader = maf.Reader(sys.stdin) maf_writer = maf.Writer(sys.stdout) for m in maf_reader: for c in m.components: c.text = c.text.translate(table) maf_writer.write(m) maf_writer.close()
def main(options, args): in_file = args[0] base, ext = os.path.splitext(in_file) out_file = "%s-filtered%s" % (base, ext) index_file = in_file + ".index" if not os.path.exists(index_file): build_index(in_file, index_file) index = maf.Indexed(in_file, index_file) fp = file(in_file) reader = maf.Reader(fp) intervals = [] # give each interval a unique id endpoints = collections.defaultdict( list) # chromosome => list of endpoints filtered_rec = set() j = 0 rec_info = [] while 1: pos = reader.file.tell() rec_info.append((j / 2, pos)) # position of alignment j in file rec = reader.next() if rec is None: break for c in rec.components: chromosome, left, right, weight = c.src, c.forward_strand_start, \ c.forward_strand_end, rec.score interval = Weighted_interval(chromosome, left, right, weight) intervals.append(interval) endpoints[chromosome].append((left, j, -weight)) # left end endpoints[chromosome].append((right, j, weight)) # right end j += 1 for chromosome in sorted(endpoints.keys()): v = endpoints[chromosome] print chromosome, ": start with %d intervals" % (len(v) / 2) filtered_rec |= interval_chain(intervals, v) print "filtered alignment size %d" % len(filtered_rec) fw = file(out_file, "w") writer = maf.Writer(fw) for j, pos in rec_info: if j in filtered_rec: rec = index.get_at_offset(pos) writer.write(rec) fp.close() fw.close()
def main(): args = get_args() for f in glob.glob(os.path.join(args.indir, '*.maf')): inmaf = maf.Reader(open(f)) outname = os.path.splitext(os.path.basename(f))[0] + ".rename.maf" outpth = os.path.join(args.outdir, outname) outf = maf.Writer(open(outpth, 'w')) for aln in inmaf: # change name aln.components[0].src = args.target + aln.components[0].src aln.components[1].src = args.query + aln.components[1].src outf.write(aln) outf.close()
def main(): min_good = int(sys.argv[1]) min_species = int(sys.argv[2]) maf_reader = maf.Reader(sys.stdin) maf_writer = maf.Writer(sys.stdout) for m in maf_reader: good = 0 for col in m.column_iter(): if col.count('*') <= min_species: good += 1 if good >= min_good: maf_writer.write(m)
def main(options, args): infile, chr1, chr2 = args in_file = args[0] base, ext = os.path.splitext(in_file) out_file = "%(base)s.%(chr1)s_vs_%(chr2)s_filtered%(ext)s" % locals() fp = file(in_file) fw = file(out_file, "w") reader = maf.Reader(fp) writer = maf.Writer(fw) for rec in reader: c1, c2 = rec.components[0].src, rec.components[1].src if (chr1, chr2) == (c1, c2) or (chr1, chr2) == (c2, c1): writer.write(rec)
def __main__(): #if len( sys.argv ) > 1: fraction = float( sys.argv[1] ) if len(sys.argv) > 1: sample_size = int(sys.argv[1]) maf_reader = maf.Reader(sys.stdin) maf_writer = maf.Writer(sys.stdout) mafs = list(maf_reader) # for m in maf_reader: mafs.append( m ) random.shuffle(mafs) if not sample_size: sample_size = len(mafs) for i in range(0, sample_size): maf_writer.write(mafs[i])
def MafScorer(pwm,species,inmaf): index = 0 scoremax,width = None,None for maf in align_maf.Reader( inmaf ): #try: if True: val = MafBlockScorer(pwm,species,maf) for scoremax,width,headers in val: yield scoremax,index,headers #scoremax,width,headers = MafBlockScorer(pwm,species,maf) try: pass except: print >>sys.stderr, "Failed on:" syserr = align_maf.Writer( sys.stderr ) syserr.write( maf ) #print >>sys.stderr,headers if width: print >>sys.stderr,width if scoremax: print >>sys.stderr,len(scoremax) syserr.close() sys.exit(1) index += width yield scoremax,index,headers
def __main__(): parser = OptionParser() parser.add_option("-c", "--cols", action="store") (options, args) = parser.parse_args() maf_reader = maf.Reader(sys.stdin) maf_writer = maf.Writer(sys.stdout) if not options.cols: raise Exception("Cols argument is required") cols = int(options.cols) count = 0 for m in maf_reader: maf_writer.write(m) count += m.text_size if count >= cols: return
def MafScorer(pwm, species, inmaf): index = 0 scoremax, width = None, None for maf in align_maf.Reader(inmaf): # try: if True: val = MafBlockScorer(pwm, species, maf) for scoremax, width, headers in val: yield scoremax, index, headers try: pass except Exception: print("Failed on:", file=sys.stderr) syserr = align_maf.Writer(sys.stderr) syserr.write(maf) if width: print(width, file=sys.stderr) if scoremax: print(len(scoremax), file=sys.stderr) syserr.close() sys.exit(1) index += width yield scoremax, index, headers
def _write_maf_file(self, out_file, aligns): with open(out_file, "w") as out_handle: writer = maf.Writer(out_handle) for align in aligns: writer.write(align) return out_file
import sys from galaxy import eggs import pkg_resources; pkg_resources.require( "bx-python" ) from bx.align import maf from galaxy.tools.util import maf_utilities from galaxy.util import string_as_bool assert sys.version_info[:2] >= ( 2, 4 ) def __main__(): try: maf_reader = maf.Reader( open( sys.argv[1] ) ) except Exception, e: maf_utilities.tool_fail( "Error opening MAF: %s" % e ) try: out = maf.Writer( open( sys.argv[2], "w") ) except Exception, e: maf_utilities.tool_fail( "Error opening file for output: %s" % e ) try: collapse_columns = string_as_bool( sys.argv[3] ) except Exception, e: maf_utilities.tool_fail( "Error determining collapse columns value: %s" % e ) start_count = 0 end_count = 0 for start_count, start_block in enumerate( maf_reader ): for block in maf_utilities.iter_blocks_split_by_species( start_block ): if collapse_columns: block.remove_all_gap_columns() out.write( block ) end_count += 1