def interval2genes(interval, skip=('CGH', '-')): """Squash intervals into named genes.""" curr_name = None curr_chrom = None curr_start = None curr_end = None curr_len = 0 for chrom, start, end, name in ngfrills.parse_regions(interval): if name in skip: continue if chrom != curr_chrom or name != curr_name: if curr_name is not None: # yield (curr_chrom, curr_start, curr_end, curr_name) if curr_len > 1: # Emit yield (curr_chrom, curr_start, curr_end, curr_name) else: print("Single-probe gene is probably CGH:", curr_name, file=sys.stderr) # Reset curr_name = name curr_chrom = chrom curr_start = start curr_len = 0 # Extend curr_end = end curr_len += 1 if curr_name is not None: if curr_len > 1: # Emit yield (curr_chrom, curr_start, curr_end, curr_name) else: print("Single-probe gene is probably CGH:", curr_name, file=sys.stderr)
def exclude_regions(bed_fname, access_rows): ex_by_chrom = dict( group_regions_by_chromosome(parse_regions(bed_fname, coord_only=True))) if len(ex_by_chrom) == 0: # Nothing to exclude -> emit the input regions unmodified for row in access_rows: yield row else: # Check if each input region overlaps an excluded region for chrom, a_rows in group_regions_by_chromosome(access_rows): if chrom in ex_by_chrom: logging.info("%s: Subtracting excluded regions", chrom) exclude_rows = iter(ex_by_chrom[chrom]) ex_start, ex_end = next_or_inf(exclude_rows) for a_start, a_end in a_rows: for row in exclude_in_region(exclude_rows, chrom, a_start, a_end, ex_start, ex_end): yield row else: logging.info("%s: No excluded regions", chrom) for a_start, a_end in a_rows: yield (chrom, a_start, a_end)
def exclude_regions(bed_fname, access_rows): ex_by_chrom = dict(group_regions_by_chromosome( parse_regions(bed_fname, coord_only=True))) if len(ex_by_chrom) == 0: # Nothing to exclude -> emit the input regions unmodified for row in access_rows: yield row else: # Check if each input region overlaps an excluded region for chrom, a_rows in group_regions_by_chromosome(access_rows): if chrom in ex_by_chrom: logging.info("%s: Subtracting excluded regions", chrom) exclude_rows = iter(ex_by_chrom[chrom]) ex_start, ex_end = next_or_inf(exclude_rows) for a_start, a_end in a_rows: for row in exclude_in_region(exclude_rows, chrom, a_start, a_end, ex_start, ex_end): yield row else: logging.info("%s: No excluded regions", chrom) for a_start, a_end in a_rows: yield (chrom, a_start, a_end)