def regions_complement(regions): comp_regions = encode.regions() for region_name in regions.keys(): # store an alias for the region with name region_name - just readability data = regions[region_name] comp_regions[region_name] = encode.feature_region( data.length, (), region_name ) # for each feature interval, store the non-region *before* it prev_end = -1 for fi in data.iter_feature_regions(): if fi.start - prev_end > 1: comp_regions[region_name].add(interval(prev_end+1, fi.start-1)) prev_end = fi.end # store the non-feature interval after the last feature interval if prev_end < data.length - 1: comp_regions[region_name].add(interval(prev_end+1, data.length-1)) return comp_regions
if verbose: print >> output, "Region files parsed." # close all of the open files for fp in split_files: fp.close() lengths_file.close() if verbose: import time startTime = time.time() import encode baseRegions = [] for i in xrange(len(regions_s_to_split)): test = encode.regions() baseRegions.append(test) # for each named region in for key in regions_s_to_split[0].keys(): # store the region boundaries for each area region_boundaries = [] # a list of region's for this key region_list = [ regions[key] for regions in regions_s_to_split ] # make sure all of the lengths are the same assert len(set( region.length for region in region_list )) == 1 # for each regions in the regions # note that we require the regions 'line up'