def read_dir(self, d): lookup = dict() scores_by_what = dict() if not os.path.exists(d): for p in DATA_SEARCH_PATH: if os.path.exists(os.path.join(p, d)): d = os.path.join(p, d) break for file in os.listdir(d): if file.endswith(".match"): for line in (open(os.path.join(d, file))): if line.startswith("#"): continue line = line.strip() f = line.split() if len(f) >= 4: chr, start, end, what = f[0:4] else: continue if not chr in lookup: lookup[chr] = intervals.Intersecter() lookup[chr].add_interval( intervals.Interval(int(start), int(end), what)) else: k = file.split('.')[0] if not k in scores_by_what: scores_by_what[k] = FileBinnedArray( open(os.path.join(d, file))) if lookup == {}: self.lookup = None self.scores = scores_by_what #return None, scores_by_what else: self.lookup = lookup self.scores = scores_by_what
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) try: range_filename = args[0] refindex = int(args[1]) if options.mincols: mincols = int(options.mincols) else: mincols = 10 except: doc_optparse.exit() # Load Intervals intersecter = intervals.Intersecter() for line in file(range_filename): fields = line.split() intersecter.add_interval( intervals.Interval(int(fields[0]), int(fields[1]))) # Start axt on stdout out = bx.align.axt.Writer(sys.stdout) # Iterate over input axt for axt in bx.align.axt.Reader(sys.stdin): ref_component = axt.components[refindex] # Find overlap with reference component intersections = intersecter.find(ref_component.start, ref_component.end) # Keep output axt ordered intersections.sort() # Write each intersecting block for interval in intersections: start = max(interval.start, ref_component.start) end = min(interval.end, ref_component.end) sliced = axt.slice_by_component(refindex, start, end) good = True for c in sliced.components: if c.size < 1: good = False if good and sliced.text_size > mincols: out.write(sliced) # Close output axt out.close()
def main(): intersecters = {} # Read ranges for chr, start, end in read_intervals( misc.open_compressed( sys.argv[1] ) ): if not intersecters.has_key( chr ): intersecters[ chr ] = intervals.Intersecter() intersecters[ chr ].add_interval( intervals.Interval( start, end ) ) # Count intersection total = 0 for chr, start, end in read_intervals( misc.open_compressed( sys.argv[2] ) ): if intersecters.has_key( chr ): intersection = intersecters[ chr ].find( start, end ) if intersection: #print chr, intersection total += 1 print total
def __main__(): # Parse Command Line options, args = doc_optparse.parse(__doc__) try: assert len(args) > 0 except: doc_optparse.exit() # Load Intervals intersector = intervals.Intersecter() for f in args: for line in file(f): if line.startswith("#") or line.isspace(): continue fields = line.split() intersector.add_interval( intervals.Interval(int(fields[0]), int(fields[1]))) # Start MAF on stdout out = bx.align.maf.Writer(sys.stdout) # Iterate over input MAF for maf in bx.align.maf.Reader(sys.stdin): # Find overlap with reference component intersections = intersector.find(maf.components[0].start, maf.components[0].end) # Write only if no overlap if len(intersections) == 0: out.write(maf) # Close output MAF out.close()
def __main__(): # Parse Command Line options, args = doc_optparse.parse( __doc__ ) try: range_filename = args[ 0 ] try: refindex = int( args[ 1 ] ) refname = None except: refindex = None refname = args[ 1 ] if options.mincols: mincols = int( options.mincols ) else: mincols = 10 if options.prefix: prefix = options.prefix else: prefix = "" except: doc_optparse.exit() # Load Intervals intersecters = dict() for line in file( range_filename ): fields = line.split() src = prefix + fields[0] if not src in intersecters: intersecters[src] = intervals.Intersecter() intersecters[src].add_interval( intervals.Interval( int( fields[1] ), int( fields[2] ) ) ) # Start MAF on stdout out = bx.align.maf.Writer( sys.stdout ) # Iterate over input MAF for maf in bx.align.maf.Reader( sys.stdin ): if refname: sourcenames = [ cmp.src.split('.')[0] for cmp in maf.components ] try: refindex = sourcenames.index( refname ) except: continue ref_component = maf.components[ refindex ] # Find overlap with reference component if not ( ref_component.src in intersecters ): continue intersections = intersecters[ ref_component.src ].find( ref_component.start, ref_component.end ) # Keep output maf ordered intersections.sort() # Write each intersecting block for interval in intersections: start = max( interval.start, ref_component.start ) end = min( interval.end, ref_component.end ) sliced = maf.slice_by_component( refindex, start, end ) good = True for c in sliced.components: if c.size < 1: good = False if good and sliced.text_size > mincols: out.write( sliced ) # Close output MAF out.close()
stop_err( 'Invalid genome build, this tool currently only works with data from build hg17. Click the pencil icon in your history item to correct the build if appropriate.' ) # Open the h5 file h5 = openFile(h5_fname, mode="r") # Load intervals and names for the subregions intersecters = {} for i, line in enumerate(file(mapping_fname)): line = line.rstrip('\r\n') if line and not line.startswith('#'): chr, start, end, name = line.split()[0:4] if not intersecters.has_key(chr): intersecters[chr] = intervals.Intersecter() intersecters[chr].add_interval( intervals.Interval(int(start), int(end), name)) # Find the subregion containing each input interval skipped_lines = 0 first_invalid_line = 0 invalid_line = '' out_file = open(out_fname, "w") warnings = [] warning = '' for i, line in enumerate(file(in_fname)): line = line.rstrip('\r\n') if line.startswith('#'): if i == 0: out_file.write("%s\tscore\n" % line) else: out_file.write("%s\n" % line)