Ejemplo n.º 1
0
def screen_maf(qa_file, maf_file):
    """
    Screen the .maf file based on the cluster info in the qa_file
    """
    clusters = read_clusters(qa_file)
    filtered_maf = maf_file + ".filtered"

    screened_alignments = set()
    for cluster in clusters:
        for anchor in cluster:
            score = anchor[-1]
            if score != 0:
                screened_alignments.add(anchor)

    fp = file(maf_file)
    reader = maf.Reader(fp)

    fw = file(filtered_maf, "w")
    writer = maf.Writer(fw)

    for rec in reader:
        alignment = []
        for c in rec.components:
            chr, left, right, strand, score = c.src, c.forward_strand_start, \
                    c.forward_strand_end, c.strand, rec.score
            alignment.append((chr, left, right, strand, score))

        cluster = alignment_to_cluster(alignment)
        if cluster[0] in screened_alignments:
            writer.write(rec)

    fp.close()

    print >>sys.stderr, "write (%d) alignments to '%s'" % \
            (len(screened_alignments), filtered_maf)
def __main__():
    try:
        maf_reader = maf.Reader( open( sys.argv[1] ) )
    except Exception as e:
        maf_utilities.tool_fail( "Error opening MAF: %s" % e )
    try:
        out = maf.Writer( open( sys.argv[2], "w") )
    except Exception as e:
        maf_utilities.tool_fail( "Error opening file for output: %s" % e )
    try:
        collapse_columns = string_as_bool( sys.argv[3] )
    except Exception as e:
        maf_utilities.tool_fail( "Error determining collapse columns value: %s" % e )

    start_count = 0
    end_count = 0
    for start_count, start_block in enumerate( maf_reader ):
        for block in maf_utilities.iter_blocks_split_by_species( start_block ):
            if collapse_columns:
                block.remove_all_gap_columns()
            out.write( block )
            end_count += 1
    out.close()

    if end_count:
        print "%i alignment blocks created from %i original blocks." % ( end_count, start_count + 1 )
    else:
        print "No alignment blocks were created."
Ejemplo n.º 3
0
def main(in_file):
    base, ext = os.path.splitext(in_file)
    out_file = "%s-sorted%s" % (base, ext)
    index_file = in_file + ".index"
    if not os.path.exists(index_file):
        build_index(in_file, index_file)

    # pull out the sizes and positions of each record
    rec_info = []
    with open(in_file) as in_handle:
        reader = maf.Reader(in_handle)
        while 1:
            pos = reader.file.tell()
            rec = reader.next()
            if rec is None:
                break
            rec_info.append((rec.text_size, pos))
    rec_info.sort(reverse=True)

    # write the records in order, pulling from the index
    index = maf.Indexed(in_file, index_file)
    with open(out_file, "w") as out_handle:
        writer = maf.Writer(out_handle)
        for size, pos in rec_info:
            rec = index.get_at_offset(pos)
            writer.write(rec)
Ejemplo n.º 4
0
def test_writer():

    val = StringIO()
    writer = maf.Writer(val, {'scoring': 'foobar'})

    a = align.Alignment()
    a.score = 7009

    a.components.append(
        align.Component(src="human_hoxa",
                        start=100,
                        size=9,
                        strand="+",
                        src_size=1000257,
                        text="ACA-TTACT"))
    a.components.append(
        align.Component(src="horse_hoxa",
                        start=120,
                        size=10,
                        strand="-",
                        src_size=98892,
                        text="ACAATTGCT"))

    check_component(a.components[0], "human_hoxa", 100, 9, "+", 1000257,
                    "ACA-TTACT")
    check_component(a.components[1], "horse_hoxa", 120, 10, "-", 98892,
                    "ACAATTGCT")

    writer.write(a)

    assert val.getvalue() == """##maf version=1 scoring=foobar
Ejemplo n.º 5
0
def __main__():

    # Parse command line arguments

    parser = OptionParser()
    parser.add_option( "--component_count", action="store", default=None, type="int", help="" )
    parser.add_option( "--min_cols", action="store", default=None, type="int", help="" )
    parser.add_option( "-e", "--expr", action="store", default=None )

    ( options, args ) = parser.parse_args()

    component_count = options.component_count
    min_cols = options.min_cols
    expr = options.expr

    # Compile expression for SPEED
    if expr: expr = compile( expr, '<expr arg>', 'eval' )

    maf_reader = maf.Reader( sys.stdin )
    maf_writer = maf.Writer( sys.stdout )

    for m in maf_reader:

        if component_count and len( m.components ) != component_count: continue
        if min_cols and m.text_size < min_cols: continue
        if expr and not bool( eval( expr, { "m": m, "maf": m } ) ): continue

        maf_writer.write( m )
Ejemplo n.º 6
0
def main():

    options, args = doc_optparse.parse(__doc__)
    try:
        sources = args[0].translate(tree_tx).split()
        ref_2bit = bx.seq.twobit.TwoBitFile(open(args[1]))
        index = maf.MultiIndexed(args[2:])

        out = maf.Writer(sys.stdout)
        missing_data = bool(options.missingData)
        use_strand = bool(options.strand)
    except:
        doc_optparse.exception()

    for line in sys.stdin:
        fields = line.split()
        ref_src, start, end = fields[0:3]
        if use_strand and len(fields) > 5:
            strand = fields[5]
        else:
            strand = '+'
        do_interval(sources, index, out, ref_src, int(start), int(end),
                    ref_2bit, missing_data, strand)

    out.close()
Ejemplo n.º 7
0
def test_write_with_synteny():
    reader = maf.Reader(StringIO(test_maf_2), parse_e_rows=True)
    a = reader.next()
    val = StringIO()
    writer = maf.Writer(val, {'scoring': 'foobar'})
    writer.write(a)
    actual = val.getvalue()
    expected = """##maf version=1 scoring=foobar
a score=3656.0
s hg17.chr1                   2005   34 + 245522847 TGTAACTTAATACCACAACCAGGCATAGGGG--AAA------------- 
s rheMac2.chr11            9625228   31 + 134511895 TGTAACCTCTTACTGCAACAAGGCACAGGGG------------------ 
i rheMac2.chr11                                     C 0 I 1678                                        
s panTro1.chr1                2014   34 + 229575298 TGTAACTTAATACCACAACCAGGCATGGGGG--AAA------------- 
i panTro1.chr1                                      C 0 C 0                                           
s bosTau2.chr5            64972365   47 +  76426644 TCCAGCCATGTGTTGTGATCAG--CCAGGGGCTAAAGCCATGGCGGTAG 
i bosTau2.chr5                                      C 0 I 1462                                        
s canFam2.chr27           45129665   31 +  48908698 TTTGACTCTGTGCTCTTATCAGGCCCAAGGG------------------ 
i canFam2.chr27                                     C 0 I 1664                                        
e danRer3.chr18            2360867  428 +  50308305 I                                                 
e oryCun1.scaffold_139397      643 1271 -      4771 I                                                 
e loxAfr1.scaffold_5603      58454 1915 +     68791 I                                                 
e echTel1.scaffold_212365     4641 1430 +      9822 I                                                 
e echTel1.scaffold_212365     4641 1430 +      9822 I                                                 
e rn3.chr4                29161032 1524 - 187371129 I                                                 
e mm7.chr6                28091695 3290 - 149646834 I                                                 

"""
    print actual
    print "---"
    print expected
    assert actual == expected
Ejemplo n.º 8
0
def main():

    maf_reader = maf.Reader(sys.stdin)
    maf_writer = maf.Writer(sys.stdout)

    for m in maf_reader:
        for c in m.components:
            c.text = c.text.translate(table)
        maf_writer.write(m)

    maf_writer.close()
Ejemplo n.º 9
0
def main(options, args):

    in_file = args[0]
    base, ext = os.path.splitext(in_file)
    out_file = "%s-filtered%s" % (base, ext)
    index_file = in_file + ".index"
    if not os.path.exists(index_file):
        build_index(in_file, index_file)
    index = maf.Indexed(in_file, index_file)

    fp = file(in_file)
    reader = maf.Reader(fp)

    intervals = []  # give each interval a unique id
    endpoints = collections.defaultdict(
        list)  # chromosome => list of endpoints
    filtered_rec = set()
    j = 0
    rec_info = []
    while 1:
        pos = reader.file.tell()
        rec_info.append((j / 2, pos))  # position of alignment j in file
        rec = reader.next()
        if rec is None:
            break
        for c in rec.components:
            chromosome, left, right, weight = c.src, c.forward_strand_start, \
                    c.forward_strand_end, rec.score

            interval = Weighted_interval(chromosome, left, right, weight)
            intervals.append(interval)
            endpoints[chromosome].append((left, j, -weight))  # left end
            endpoints[chromosome].append((right, j, weight))  # right end
            j += 1

    for chromosome in sorted(endpoints.keys()):
        v = endpoints[chromosome]
        print chromosome, ": start with %d intervals" % (len(v) / 2)
        filtered_rec |= interval_chain(intervals, v)

    print "filtered alignment size %d" % len(filtered_rec)

    fw = file(out_file, "w")
    writer = maf.Writer(fw)

    for j, pos in rec_info:
        if j in filtered_rec:
            rec = index.get_at_offset(pos)
            writer.write(rec)

    fp.close()
    fw.close()
Ejemplo n.º 10
0
def main():
    args = get_args()
    for f in glob.glob(os.path.join(args.indir, '*.maf')):
        inmaf = maf.Reader(open(f))
        outname = os.path.splitext(os.path.basename(f))[0] + ".rename.maf"
        outpth = os.path.join(args.outdir, outname)
        outf = maf.Writer(open(outpth, 'w'))
        for aln in inmaf:
            # change name
            aln.components[0].src = args.target + aln.components[0].src
            aln.components[1].src = args.query + aln.components[1].src
            outf.write(aln)
        outf.close()
Ejemplo n.º 11
0
def main():

    min_good = int(sys.argv[1])
    min_species = int(sys.argv[2])

    maf_reader = maf.Reader(sys.stdin)
    maf_writer = maf.Writer(sys.stdout)

    for m in maf_reader:
        good = 0
        for col in m.column_iter():
            if col.count('*') <= min_species:
                good += 1
        if good >= min_good:
            maf_writer.write(m)
Ejemplo n.º 12
0
def main(options, args):
    infile, chr1, chr2 = args

    in_file = args[0]
    base, ext = os.path.splitext(in_file)
    out_file = "%(base)s.%(chr1)s_vs_%(chr2)s_filtered%(ext)s" % locals()

    fp = file(in_file)
    fw = file(out_file, "w")

    reader = maf.Reader(fp)
    writer = maf.Writer(fw)
    for rec in reader:
        c1, c2 = rec.components[0].src, rec.components[1].src
        if (chr1, chr2) == (c1, c2) or (chr1, chr2) == (c2, c1):
            writer.write(rec)
Ejemplo n.º 13
0
def __main__():

    #if len( sys.argv ) > 1: fraction = float( sys.argv[1] )
    if len(sys.argv) > 1: sample_size = int(sys.argv[1])

    maf_reader = maf.Reader(sys.stdin)
    maf_writer = maf.Writer(sys.stdout)

    mafs = list(maf_reader)

    # for m in maf_reader: mafs.append( m )

    random.shuffle(mafs)

    if not sample_size: sample_size = len(mafs)

    for i in range(0, sample_size):
        maf_writer.write(mafs[i])
def MafScorer(pwm,species,inmaf):

    index = 0
    scoremax,width = None,None
    for maf in align_maf.Reader( inmaf ):
        #try:
        if True:
            val = MafBlockScorer(pwm,species,maf)
            for scoremax,width,headers in val: yield scoremax,index,headers
            #scoremax,width,headers = MafBlockScorer(pwm,species,maf)
        try: pass
        except:
            print >>sys.stderr, "Failed on:"
            syserr = align_maf.Writer( sys.stderr )
            syserr.write( maf )
            #print >>sys.stderr,headers
            if width: print >>sys.stderr,width
            if scoremax: print >>sys.stderr,len(scoremax)
            syserr.close()
            sys.exit(1)
        index += width
        yield scoremax,index,headers
Ejemplo n.º 15
0
def __main__():
    parser = OptionParser()
    parser.add_option("-c", "--cols", action="store")

    (options, args) = parser.parse_args()

    maf_reader = maf.Reader(sys.stdin)
    maf_writer = maf.Writer(sys.stdout)

    if not options.cols:
        raise Exception("Cols argument is required")
    cols = int(options.cols)

    count = 0

    for m in maf_reader:

        maf_writer.write(m)

        count += m.text_size

        if count >= cols:
            return
Ejemplo n.º 16
0
def MafScorer(pwm, species, inmaf):

    index = 0
    scoremax, width = None, None
    for maf in align_maf.Reader(inmaf):
        # try:
        if True:
            val = MafBlockScorer(pwm, species, maf)
            for scoremax, width, headers in val:
                yield scoremax, index, headers
        try:
            pass
        except Exception:
            print("Failed on:", file=sys.stderr)
            syserr = align_maf.Writer(sys.stderr)
            syserr.write(maf)
            if width:
                print(width, file=sys.stderr)
            if scoremax:
                print(len(scoremax), file=sys.stderr)
            syserr.close()
            sys.exit(1)
        index += width
        yield scoremax, index, headers
Ejemplo n.º 17
0
 def _write_maf_file(self, out_file, aligns):
     with open(out_file, "w") as out_handle:
         writer = maf.Writer(out_handle)
         for align in aligns:
             writer.write(align)
     return out_file
Ejemplo n.º 18
0
import sys
from galaxy import eggs
import pkg_resources; pkg_resources.require( "bx-python" )
from bx.align import maf
from galaxy.tools.util import maf_utilities
from galaxy.util import string_as_bool

assert sys.version_info[:2] >= ( 2, 4 )

def __main__():    
    try:
        maf_reader = maf.Reader( open( sys.argv[1] ) )
    except Exception, e:
        maf_utilities.tool_fail( "Error opening MAF: %s" % e )
    try:
        out = maf.Writer( open( sys.argv[2], "w") )
    except Exception, e:
        maf_utilities.tool_fail( "Error opening file for output: %s" % e )
    try:
        collapse_columns = string_as_bool( sys.argv[3] )
    except Exception, e:
        maf_utilities.tool_fail( "Error determining collapse columns value: %s" % e )
    
    start_count = 0
    end_count = 0
    for start_count, start_block in enumerate( maf_reader ):
        for block in maf_utilities.iter_blocks_split_by_species( start_block ):
            if collapse_columns:
                block.remove_all_gap_columns()
            out.write( block )
            end_count += 1