def read_dir(self, d):
        lookup = dict()
        scores_by_what = dict()
        if not os.path.exists(d):
            for p in DATA_SEARCH_PATH:
                if os.path.exists(os.path.join(p, d)):
                    d = os.path.join(p, d)
                    break
        for file in os.listdir(d):
            if file.endswith(".match"):
                for line in (open(os.path.join(d, file))):
                    if line.startswith("#"): continue
                    line = line.strip()
                    f = line.split()
                    if len(f) >= 4:
                        chr, start, end, what = f[0:4]
                    else:
                        continue

                    if not chr in lookup: lookup[chr] = intervals.Intersecter()
                    lookup[chr].add_interval(
                        intervals.Interval(int(start), int(end), what))
            else:
                k = file.split('.')[0]
                if not k in scores_by_what:
                    scores_by_what[k] = FileBinnedArray(
                        open(os.path.join(d, file)))

        if lookup == {}:
            self.lookup = None
            self.scores = scores_by_what
            #return None, scores_by_what
        else:
            self.lookup = lookup
            self.scores = scores_by_what
예제 #2
0
def __main__():

    # Parse Command Line

    options, args = doc_optparse.parse(__doc__)

    try:
        range_filename = args[0]
        refindex = int(args[1])
        if options.mincols: mincols = int(options.mincols)
        else: mincols = 10
    except:
        doc_optparse.exit()

    # Load Intervals

    intersecter = intervals.Intersecter()
    for line in file(range_filename):
        fields = line.split()
        intersecter.add_interval(
            intervals.Interval(int(fields[0]), int(fields[1])))

    # Start axt on stdout

    out = bx.align.axt.Writer(sys.stdout)

    # Iterate over input axt

    for axt in bx.align.axt.Reader(sys.stdin):
        ref_component = axt.components[refindex]
        # Find overlap with reference component
        intersections = intersecter.find(ref_component.start,
                                         ref_component.end)
        # Keep output axt ordered
        intersections.sort()
        # Write each intersecting block
        for interval in intersections:
            start = max(interval.start, ref_component.start)
            end = min(interval.end, ref_component.end)
            sliced = axt.slice_by_component(refindex, start, end)
            good = True
            for c in sliced.components:
                if c.size < 1:
                    good = False
            if good and sliced.text_size > mincols: out.write(sliced)

    # Close output axt

    out.close()
def main():

    intersecters = {}

    # Read ranges

    for chr, start, end in read_intervals( misc.open_compressed( sys.argv[1] ) ):
        if not intersecters.has_key( chr ): intersecters[ chr ] = intervals.Intersecter()
        intersecters[ chr ].add_interval( intervals.Interval( start, end ) )

    # Count intersection

    total = 0

    for chr, start, end in read_intervals( misc.open_compressed( sys.argv[2] ) ):
        if intersecters.has_key( chr ):
            intersection = intersecters[ chr ].find( start, end )
            if intersection: 
                #print chr, intersection
                total += 1

    print total
def __main__():

    # Parse Command Line

    options, args = doc_optparse.parse(__doc__)

    try:
        assert len(args) > 0
    except:
        doc_optparse.exit()

    # Load Intervals

    intersector = intervals.Intersecter()

    for f in args:
        for line in file(f):
            if line.startswith("#") or line.isspace(): continue
            fields = line.split()
            intersector.add_interval(
                intervals.Interval(int(fields[0]), int(fields[1])))

    # Start MAF on stdout

    out = bx.align.maf.Writer(sys.stdout)

    # Iterate over input MAF

    for maf in bx.align.maf.Reader(sys.stdin):
        # Find overlap with reference component
        intersections = intersector.find(maf.components[0].start,
                                         maf.components[0].end)
        # Write only if no overlap
        if len(intersections) == 0:
            out.write(maf)

    # Close output MAF

    out.close()
def __main__():

    # Parse Command Line

    options, args = doc_optparse.parse( __doc__ )

    try:
        range_filename = args[ 0 ]
        try: 
            refindex = int( args[ 1 ] )
            refname = None
        except: 
            refindex = None
            refname = args[ 1 ]
        if options.mincols: mincols = int( options.mincols )
        else: mincols = 10
        if options.prefix: prefix = options.prefix
        else: prefix = ""
    except:
        doc_optparse.exit()

    # Load Intervals

    intersecters = dict()    
    for line in file( range_filename ):
        fields = line.split()
        src = prefix + fields[0]
        if not src in intersecters: intersecters[src] = intervals.Intersecter()
        intersecters[src].add_interval( intervals.Interval( int( fields[1] ), int( fields[2] ) ) )

    # Start MAF on stdout

    out = bx.align.maf.Writer( sys.stdout )

    # Iterate over input MAF

    for maf in bx.align.maf.Reader( sys.stdin ):
        if refname: 
            sourcenames = [ cmp.src.split('.')[0] for cmp in maf.components ]
            try: refindex = sourcenames.index( refname )
            except:
                continue

        ref_component = maf.components[ refindex ]
        # Find overlap with reference component
        if not ( ref_component.src in intersecters ): continue
        intersections = intersecters[ ref_component.src ].find( ref_component.start, ref_component.end )
        # Keep output maf ordered
        intersections.sort()
        # Write each intersecting block
        for interval in intersections: 
            start = max( interval.start, ref_component.start )
            end = min( interval.end, ref_component.end )
            sliced = maf.slice_by_component( refindex, start, end ) 
            good = True
            for c in sliced.components: 
                if c.size < 1: 
                    good = False
            if good and sliced.text_size > mincols: out.write( sliced )
         
    # Close output MAF

    out.close()
예제 #6
0
        stop_err(
            'Invalid genome build, this tool currently only works with data from build hg17.  Click the pencil icon in your history item to correct the build if appropriate.'
        )

    # Open the h5 file
    h5 = openFile(h5_fname, mode="r")
    # Load intervals and names for the subregions
    intersecters = {}
    for i, line in enumerate(file(mapping_fname)):
        line = line.rstrip('\r\n')
        if line and not line.startswith('#'):
            chr, start, end, name = line.split()[0:4]
            if not intersecters.has_key(chr):
                intersecters[chr] = intervals.Intersecter()
            intersecters[chr].add_interval(
                intervals.Interval(int(start), int(end), name))

    # Find the subregion containing each input interval
    skipped_lines = 0
    first_invalid_line = 0
    invalid_line = ''
    out_file = open(out_fname, "w")
    warnings = []
    warning = ''
    for i, line in enumerate(file(in_fname)):
        line = line.rstrip('\r\n')
        if line.startswith('#'):
            if i == 0:
                out_file.write("%s\tscore\n" % line)
            else:
                out_file.write("%s\n" % line)