Exemple #1
0
    def __init__(self,
                 root,
                 train=True,
                 normalize=True,
                 window=16384,
                 translate=False,
                 pitch_shift=0,
                 jitter=0.,
                 stride=512):
        self.normalize = normalize
        self.window = window
        self.pitch_shift = pitch_shift
        self.jitter = jitter
        self.translate = translate
        self.stride = stride

        self.root = os.path.join(os.path.expanduser(root),
                                 'whois/train_data_09222019/')
        if train: labelfile = os.path.join(self.root, 'train.tsv')
        else: labelfile = os.path.join(self.root, 'dev.tsv')

        fs = 44100.
        self.size = 0
        self.data = dict()
        self.labels = dict()
        self._base_idx = dict()
        self._cumsize = dict()
        with open(labelfile) as f:
            for i, (wav, start, dur, loc, date,
                    master) in enumerate(csv.reader(f, delimiter='\t')):
                if i == 0: continue
                if wav not in self.data.keys():
                    xfs, x = wavfile.read(os.path.join(self.root, 'wav', wav))
                    xp = np.arange((fs / xfs) * len(x), dtype=np.float32)
                    x = np.interp((xfs / fs) * xp,
                                  np.arange(len(x), dtype=np.float32),
                                  x).astype(np.float32)
                    self.data[wav] = x
                    self.labels[wav] = IntervalTree()
                    self._base_idx[wav] = self.size
                    self._cumsize[self.size] = wav
                    self.size += len(x) // self.stride

                if float(dur) < 0.1: continue
                self.labels[wav][int(float(start) *
                                     fs):int((float(start) + float(dur)) *
                                             fs)] = 1

        self._sorted_base = sorted(self._cumsize.keys())
        print('Loaded dataset with {} datapoints'.format(self.size))
Exemple #2
0
def make_gaps_tree(in_file):
    # A dictionary to store an interval tree for each chromosome header.
    all_trees = dict()
    x = SeqReader(in_file)
    for header, sequence in x.parse_fasta():
        # Remove the greater than sign and only get first token if delimited by spaces
        header = header[1:].split(' ')[0]
        all_trees[header] = IntervalTree()
        gap_sequence = GapSequence(sequence)
        all_coordinates = [(m.start(0), m.end(0))
                           for m in gap_sequence.get_gap_coords()]
        for i in all_coordinates:
            all_trees[header][i[0]:i[1]] = i
    return all_trees
Exemple #3
0
def get_unique_loci(intervals):

    grouped_intervals = defaultdict(list)
    for genome, seqid, start, end in intervals:
        grouped_intervals[(genome, seqid)].append(Interval(start, end))

    unique_loci = list()
    for (genome, seqid), intvls in grouped_intervals.items():
        itree = IntervalTree(intvls)
        itree.merge_overlaps()
        for intvl in itree:
            unique_loci.append((genome, seqid, intvl.begin, intvl.end))

    return unique_loci
Exemple #4
0
def make_intervaltree(df: pd.DataFrame) -> IntervalTree:
    intervals = []
    if df.empty:
        raise Exception(
            "Error! Try to make intervaltree from empty dataframe.")
    for idx, entry in df.iterrows():
        #if entry.response_size == entry.offset: # first operation
        start = entry['offset'] - entry['response_size']
        if start == entry['offset']:
            print("Emtpy interval! .. skip!")
            continue
        intervals.append(Interval(start, entry['offset'],
                                  (entry['kind'], idx)))
    return IntervalTree(intervals)
Exemple #5
0
    def __init__(self, *args, **kwargs) -> None:
        """Initialize the EdgeCollection object."""

        # initialize the base class
        super().__init__(*args, **kwargs)

        # indicator whether the network has multi-edges
        self._multiple: bool = kwargs.pop('multiedges', False)

        # initialize an intervaltree to save events
        self._events = IntervalTree()

        # class of objects
        self._default_class: Any = TemporalEdge
Exemple #6
0
def find_clusters(index_tree):
    """
    Define a root cluster for each smoothing maximum

    Args:
        index_tree (IntervalTree): data are lists of tuples of 4 elements (min or max, cds region, genomic position,
                       smoothing score).

    Returns:
        clusters_tree (IntervalTree): data are dict of dict
    """

    clusters_tree = IntervalTree()

    for interval in index_tree:
        clusters = defaultdict(dict)
        j = 0
        indexes = interval.data

        # Iterate through all maximum and generate a cluster per maximum
        generator_maxs = (i for i in indexes if i[0] == 1)
        for maximum in generator_maxs:
            i = indexes.index(maximum)
            # Add maximum
            clusters[j]['max'] = (maximum[1], maximum[2], maximum[3])
            # Add margins
            # if maximum not in first nor last position
            if i != 0 and i != len(indexes) - 1:
                # if no contiguous left max
                if indexes[i - 1][0] != 1:
                    clusters[j]['left_m'] = (indexes[i - 1][1], indexes[i - 1][2], indexes[i - 1][3])
                else:
                    clusters[j]['left_m'] = (maximum[1], maximum[2], maximum[3])
                # if no contiguous right max
                if indexes[i + 1][0] != 1:
                    clusters[j]['right_m'] = (indexes[i + 1][1], indexes[i + 1][2], indexes[i + 1][3])
                else:
                    clusters[j]['right_m'] = (maximum[1], maximum[2], maximum[3])
            # if first position
            elif i == 0:
                clusters[j]['left_m'] = (maximum[1], maximum[2], maximum[3])
                clusters[j]['right_m'] = (indexes[i + 1][1], indexes[i + 1][2], indexes[i + 1][3])
            # if last position
            else:
                clusters[j]['left_m'] = (indexes[i - 1][1], indexes[i - 1][2], indexes[i - 1][3])
                clusters[j]['right_m'] = (maximum[1], maximum[2], maximum[3])
            j += 1
        clusters_tree.addi(interval[0], interval[1], clusters)

    return clusters_tree
Exemple #7
0
    def _augment(self, source, times=1, gain=-8):
        aug_samples = self._load_samples(source)
        tree = IntervalTree()

        aug_durs = self._map(
            aug_samples, lambda s: int(math.ceil(s.file.duration * 1000.0)))
        total_aug_dur = sum(aug_durs)
        position = 0
        for i, sample in enumerate(aug_samples):
            duration = aug_durs[i]
            tree[position:position + duration] = sample
            position += duration

        def prepare_sample(s):
            s.write()
            return int(math.ceil(s.file.duration * 1000.0))

        orig_durs = self._map(self.samples, prepare_sample)
        total_orig_dur = sum(orig_durs)

        positions = []
        position = 0
        for i, sample in enumerate(self.samples):
            duration = orig_durs[i]
            positions.append((position, sample))
            position += duration

        def augment_sample(pos_sample):
            position, sample = pos_sample
            orig_seg = sample.read_audio_segment()
            orig_dur = len(orig_seg)
            aug_seg = AudioSegment.silent(duration=orig_dur)
            sub_pos = position
            for i in range(times):
                inters = tree[sub_pos:sub_pos + orig_dur]
                for inter in inters:
                    seg = inter.data.read_audio_segment()
                    offset = inter.begin - sub_pos
                    if offset < 0:
                        seg = seg[-offset:]
                        offset = 0
                    aug_seg = aug_seg.overlay(seg, position=offset)
                sub_pos = (sub_pos + total_orig_dur) % total_aug_dur
            aug_seg = aug_seg + (orig_seg.dBFS - aug_seg.dBFS + gain)
            orig_seg = orig_seg.overlay(aug_seg)
            sample.write_audio_segment(orig_seg)

        self._map(positions, augment_sample)
        print('Augmented %d samples in buffer.' % len(self.samples))
Exemple #8
0
def get_exon_locs(protfile=PROTFILE, chrom=CHROMOSOMES):
  """
  :param gene_path: Path to directory of Ensembl genes
  :return: an IntervalTree of gene locations on the appropriate chromosome
  """

  genelocs = {chromID: IntervalTree() for chromID in chrom}

  if not os.path.isfile(protfile):
    sys.stderr.write('Could not open ' + protfile + '\n')
    return genelocs

  x = gzip.open(protfile) if protfile.endswith('gz') else open(protfile)

  locstart = 'chromosome:' + GENOME_BUILD + ':'
  geneintervals = {}

  for l in x:
    if l.startswith('>'):
      loc = l[l.find(locstart) + len(locstart):].split()[0]
      chromID = loc.split(':')[0]
      chromID = ('' if chromID.startswith('chr') else 'chr') + chromID

      if chromID not in chrom: continue

      exons = loc.split(':')[1].replace('join(', '').replace('complement(', '').replace(')', '').split(',')
      exons = [tuple(sorted([int(a.split('..')[0]), int(a.split('..')[1])])) for a in exons]

      geneID = l[l.find('gene:') + 5:].split()[0]  # e.g., ENSG00000000457.9

      if chromID not in geneintervals:
        geneintervals[chromID] = {}
      if geneID not in geneintervals[chromID]:
        geneintervals[chromID][geneID] = set()

      # Add each POSITIVE exon SEPARATELY!
      for exon_start, exon_end in exons:  # these are 1-indexed, but we need 0-indexed...
        if exon_start > -1 and exon_end > -1:
          geneintervals[chromID][geneID].add((exon_start, exon_end))
  x.close()

  # Store all the information as interval trees:
  for chromID in geneintervals.keys():
    for geneID in geneintervals[chromID].keys():
      for exon_start, exon_end in geneintervals[chromID][geneID]:
        genelocs[chromID].addi(exon_start, exon_end + (1 if exon_end == exon_start else 0),
                               geneID + '_' + str(exon_start) + '-' + str(exon_end))

  return genelocs
Exemple #9
0
def score(clusters_tree, regions, mutations_element):
    """
    Score clusters with fraction of mutations formula and number of cluster's mutations

    Args:
        clusters_tree( IntervalTree): genomic regions are intervals, data are trimmed clusters (dict of dict)
        regions (IntervalTree): IntervalTree where intervals are genomic positions of an element
        mutations_element (int): number of mutations in the element

    Returns:
        score_clusters_tree (IntervalTree): genomic regions are intervals, data are scored clusters (dict of dict)
    """
    score_clusters_tree = IntervalTree()
    root = m.sqrt(2)

    for interval in clusters_tree:
        clusters = interval.data.copy()
        for cluster, values in clusters.items():
            score_ = 0
            mutated_positions_d = defaultdict(int)
            # Get number of mutations on each mutated position
            for mutation in values['mutations']:
                mutated_positions_d[mutation.position] += 1
            # Map mutated position and smoothing maximum to region
            for position, count in mutated_positions_d.items():
                map_mut_pos = set()
                map_smo_max = set()
                if regions[position]:
                    for i in regions[position]:
                        map_mut_pos = i
                    for i in regions[values['max'][1]]:
                        map_smo_max = i
                    # Calculate distance of position to smoothing maximum
                    if map_mut_pos[0] == map_smo_max[0]:
                        distance_to_max = abs(position - values['max'][1])
                    elif map_mut_pos[0] < map_smo_max[0]:
                        distance_to_max = (map_mut_pos[1] - position) + (values['max'][1] - map_smo_max[0])
                    else:
                        distance_to_max = (map_smo_max[1] - values['max'][1]) + (position - map_mut_pos[0])
                    # Calculate fraction of mutations
                    numerator = (count / mutations_element) * 100
                    # Calculate cluster score
                    denominator = m.pow(root, distance_to_max)
                    score_ += (numerator / denominator)
            # Update
            clusters[cluster]['score'] = score_ * len(values['mutations'])
        score_clusters_tree.addi(interval[0], interval[1], clusters)

    return score_clusters_tree
Exemple #10
0
 def __init__(self, interval_tuples:Iterator[Tuple[Chrom,int,int,GeneName]]):
     '''interval_tuples is like [('22', 12321, 12345, 'APOL1'), ...]'''
     self._its: Dict[Chrom,IntervalTree] = {}
     gene_start_tuples_by_chrom: Dict[Chrom,List[Tuple[int,GeneName]]] = {}
     gene_end_tuples_by_chrom: Dict[Chrom,List[Tuple[int,GeneName]]] = {}
     for (chrom, pos_start, pos_end, gene_name) in interval_tuples:
         if chrom not in self._its:
             self._its[chrom] = IntervalTree()
             gene_start_tuples_by_chrom[chrom] = []
             gene_end_tuples_by_chrom[chrom] = []
         self._its[chrom].add(Interval(pos_start, pos_end, gene_name))
         gene_start_tuples_by_chrom[chrom].append((pos_start, gene_name))
         gene_end_tuples_by_chrom[chrom].append((pos_end, gene_name))
     self._gene_starts = {chrom:BisectFinder(tuples) for chrom,tuples in gene_start_tuples_by_chrom.items()}
     self._gene_ends = {chrom:BisectFinder(tuples) for chrom,tuples in gene_end_tuples_by_chrom.items()}
    def get_interval_cu(self, cu_id):
        # MEM LD
        mem_ld_cycle, mem_ld_interval = self.get_interval_cu_cond(
            cu_id, 'LIKE "%MEM LD%"')

        mem_ld_interval_tree = IntervalTree(
            Interval(*iv) for iv in mem_ld_interval)

        # MEM ST
        mem_st_cycle, mem_st_interval = self.get_interval_cu_cond(
            cu_id, 'LIKE "%MEM ST%"')

        mem_st_interval_tree = IntervalTree(
            Interval(*iv) for iv in mem_st_interval)

        # OTHER
        other_cycle, other_interval = self.get_interval_cu_cond(
            cu_id, 'NOT LIKE "%MEM LD%"')

        other_interval_tree = IntervalTree(
            Interval(*iv) for iv in other_interval)

        cycle = self.get_max('inst', 'start + length',
                             ' WHERE cu=' + str(cu_id))
        # print cycle, mem_cycle, other_cycle

        info = {}
        info['mem_ld'] = mem_ld_interval_tree
        info['mem_st'] = mem_st_interval_tree
        info['other'] = other_interval_tree
        info['cycle_all'] = cycle
        info['cycle_mem_ld'] = mem_ld_cycle
        info['cycle_mem_st'] = mem_st_cycle
        info['cycle_other'] = other_cycle

        return info
Exemple #12
0
def generate_interval_tree(peak_properties):
    """Conctruct an interval tree containing the elution windows of the analytes.

    Args:
        peak_properties (dict): Description

    Returns:
        IntervalTree: Description
    """
    tree = IntervalTree()
    for key, data in peak_properties.items():
        start = data["scan_start_time"]
        end = start + data["peak_width"]
        tree[start:end] = key
    return tree
def scan_tree(intervals):
    """construct an interval tree using supplied genomic intervals, check all elements on the tree against iself and return any that hit 2 or more intervals (i.e. itself + 1 other)"""

    retlist = set()
    t = IntervalTree(Interval(*iv) for iv in intervals)

    for g in intervals:

        if len(t.overlap(g[0], g[1])) > 1:
            #            print( t.overlap( g[0], g[1]) )
            o = t.overlap(g[0], g[1])
            for x in o:
                retlist.add(x.data)

    return retlist
Exemple #14
0
def test_copy_cast():
    t = trees['ivs1']()

    tcopy = IntervalTree(t)
    tcopy.verify()
    assert t == tcopy

    tlist = list(t)
    for iv in tlist:
        assert iv in t
    for iv in t:
        assert iv in tlist

    tset = set(t)
    assert tset == t.items()
    def calc_current_cnv_lineage(self, start, end, cluster_num, phylogeny):
        lineage_clusters, _ = phylogeny.get_lineage(cluster_num)

        pat_intervals = self.paternal_tree.copy()
        pat_intervals.slice(start)
        pat_intervals.slice(end)
        pat_tree = IntervalTree()
        for i in pat_intervals.envelop(start, end):
            if i.data.cluster_num in lineage_clusters:
                pat_tree.add(i)
        pat_tree.split_overlaps()
        pat_tree.merge_overlaps(data_reducer=self.sum_levels)

        mat_intervals = self.maternal_tree.copy()
        mat_intervals.slice(start)
        mat_intervals.slice(end)
        mat_tree = IntervalTree()
        for i in mat_intervals.envelop(start, end):
            if i.data.cluster_num in lineage_clusters:
                mat_tree.add(i)
        mat_tree.split_overlaps()
        mat_tree.merge_overlaps(data_reducer=self.sum_levels)

        return pat_tree, mat_tree
    def generate_phase_switching(self):
        phase_switches = {}
        for chrom, size in self.csize.items():
            tree = IntervalTree()
            start = 1
            correct_phase = True
            while start < size:
                interval_len = np.floor(np.random.exponential(1e6))
                tree[start:start+interval_len] = correct_phase
                correct_phase = not correct_phase
                start += interval_len

            phase_switches[chrom] = tree

        return phase_switches
Exemple #17
0
def read_targets(in_bed, targets):
    with open(in_bed, 'rt') as ifile:
        for line in ifile:
            fields = line.rstrip().split()
            if len(fields) < 3:
                continue
            chrom = fields[0][3:] if fields[0].startswith('chr') else fields[0]
            start = int(
                fields[1]) + 1  # bed encodes first chromosomal position as 0
            stop = int(
                fields[2])  # bed stores open intervals, so no need to add 1
            chrom_targets = targets.setdefault(chrom, IntervalTree())
            chrom_targets.addi(
                start, stop + 1
            )  # IntervalTree stores open end intevals, so we need to add 1 to stop.
Exemple #18
0
def test_original_sequence():
    t = IntervalTree()
    t.addi(17.89, 21.89)
    t.addi(11.53, 16.53)
    t.removei(11.53, 16.53)
    t.removei(17.89, 21.89)
    t.addi(-0.62, 4.38)
    t.addi(9.24, 14.24)
    t.addi(4.0, 9.0)
    t.removei(-0.62, 4.38)
    t.removei(9.24, 14.24)
    t.removei(4.0, 9.0)
    t.addi(12.86, 17.86)
    t.addi(16.65, 21.65)
    t.removei(12.86, 17.86)
Exemple #19
0
def get_single_iv_tree(curr_path):
    log_info_msg("[get_single_iv_tree] enter")
    curr_tree = IntervalTree()
    retval = read_snapshot_bitmap(curr_path, add_by_lba_cb, curr_tree)
    if not retval:
        # log_err_msg("[get_single_iv_tree] read_snapshot_bitmap failed")
        xlogging.raise_and_logging_error(
            r'读取位图文件失败',
            r'[get_single_iv_tree] get read_snapshot_bitmap failed')
        return None

    count = len(curr_tree)
    log_dbg_msg("[get_single_iv_tree] count={}".format(count))

    return curr_tree
def test_chop():
    t = IntervalTree([Interval(0, 10)])
    t.chop(3, 7)
    assert len(t) == 2
    assert sorted(t)[0] == Interval(0, 3)
    assert sorted(t)[1] == Interval(7, 10)

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 7)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(7, 10)

    t = IntervalTree([Interval(0, 10)])
    t.chop(5, 10)
    assert len(t) == 1
    assert sorted(t)[0] == Interval(0, 5)

    t = IntervalTree([Interval(0, 10)])
    t.chop(-5, 15)
    assert len(t) == 0

    t = IntervalTree([Interval(0, 10)])
    t.chop(0, 10)
    assert len(t) == 0
Exemple #21
0
 def __init__(self,
              scheduler: Scheduler,
              name: str,
              id: int,
              resources_list: Resources = None,
              capacity_bytes: int = 0):
     super().__init__(scheduler,
                      name,
                      id,
                      resources_list,
                      resource_sharing=True)
     self.capacity = capacity_bytes
     self._job_allocations: Dict[JobId, Interval] = {
     }  # job_id -> [(start, end, num_bytes)]
     self._interval_tree = IntervalTree()
Exemple #22
0
def test_update():
    t = IntervalTree()
    interval = Interval(0, 1)
    s = set([interval])

    t.update(s)
    assert isinstance(t, IntervalTree)
    assert len(t) == 1
    assert set(t).pop() == interval

    interval = Interval(2, 3)
    t.update([interval])
    assert isinstance(t, IntervalTree)
    assert len(t) == 2
    assert sorted(t)[1] == interval
def find_remaining(
    itrees: Mapping[str, IntervalTree],
    nstretches: Mapping[str, IntervalTree],
    scaffolds: Mapping[str, SeqRecord],
) -> None:
    for scaffold, seq in scaffolds.items():
        contigs = itrees[scaffold]
        nstretch = nstretches[scaffold]

        # This is just to remove the data from the intervals.
        # Having data prevents them from being removed with difference.
        intervals = [Interval(i.begin, i.end) for i in contigs]
        intervals.extend(Interval(i.begin, i.end) for i in nstretch)
        covered = IntervalTree(intervals)
        # Strict=false means that adjacent but non-overlapping
        # will also be merged.
        covered.merge_overlaps(strict=False)

        remaining = IntervalTree([Interval(0, len(seq))]) | covered
        remaining.split_overlaps()
        remaining.difference_update(covered)

        itrees[scaffold].update(remaining)
    return
Exemple #24
0
def mouse_gene_intervals():
    df = read_gtf_as_dataframe(GENCODE_MM10_FILE)
    df = df[df.feature == 'gene' & df.feature_type == 'protein_coding']
    print(len(df))
    trees = {chromosome_strand: IntervalTree() for chromosome_strand in product(MOUSE_CHROMOSOMES, ['+', '-'])}
    for _, row in df.iterrows():
        if row['end'] > row['start']:
            # end is included, start count at 0 instead of 1
            trees[row['seqname'] + row['strand']][row['start'] - 1:row['end']
                                  ] = (row['gene_id'])

    logging.info('Built mouse exon tree with {} nodes'
                 .format(sum([len(tree) for tree in trees.values()])))

    return trees
Exemple #25
0
def create_trees():
    """
    Makes a dict of callables that create the trees named.
    """
    pbar = ProgressBar(len(intervals.ivs))
    print('Creating trees from interval lists...')
    trees = {}
    for name, ivs in intervals.ivs.items():
        pbar()
        module = from_import('test.data', name)
        if hasattr(module, 'tree'):
            trees[name] = module.tree
        else:
            trees[name] = IntervalTree(ivs).copy
    return trees
Exemple #26
0
def create_interval_tree():
    rating_intervals = IntervalTree()
    rating_intervals[0:250] = '0:250'
    rating_intervals[250:500] = '251:500'
    rating_intervals[500:750] = '501:750'
    rating_intervals[750:1000] = '751:1000'
    rating_intervals[1000:1250] = '1001:1250'
    rating_intervals[1250:1500] = '1251:1500'
    rating_intervals[1500:1750] = '1501:1750'
    rating_intervals[1750:2000] = '1751:2000'
    rating_intervals[2000:2250] = '2001:2250'
    rating_intervals[2250:2500] = '2251:2500'
    rating_intervals[2500:4000] = '2501+'

    return rating_intervals
Exemple #27
0
def load_coverage_df(exon_padding, tx_accession, samples):
    transcript = genes.load_transcripts()[tx_accession]
    tree = IntervalTree(
        [Interval(exon.begin, exon.end) for exon in transcript.exons])
    ds = [
        load_coverage(sample, transcript.chrom, tree, transcript)
        for sample in samples
    ]
    df_coverage = pd.concat(
        [ds[0]["chrom"], ds[0]["pos"], ds[0]["exon_no"]] +
        [d.iloc[:, 3] for d in ds],
        axis="columns",
    )
    df_coverage.sort_values("pos", inplace=True)
    return df_coverage
Exemple #28
0
def test_copy_cast():
    t = IntervalTree.from_tuples(data.ivs1.data)

    tcopy = IntervalTree(t)
    tcopy.verify()
    assert t == tcopy

    tlist = list(t)
    for iv in tlist:
        assert iv in t
    for iv in t:
        assert iv in tlist

    tset = set(t)
    assert tset == t.items()
Exemple #29
0
 def _build_regions(self):
     self._tree = IntervalTree()
     for sect in [
             s for s in self._elf.sections
             if (s.region and s.region.is_flash)
     ]:
         start = sect.start
         length = sect.length
         # Skip empty sections.
         if length == 0:
             continue
         sect.data  # Go ahead and read the data from the file.
         self._tree.addi(start, start + length, sect)
         LOG.debug("created flash section [%x:%x] for section %s", start,
                   start + length, sect.name)
Exemple #30
0
    def read(self, length, offset, fh):
        """
        Read data from this GhostFile.
        :param length:
        :param offset:
        :param fh:
        :return:
        """
        if offset >= self.__filesize or length == 0:
            return b''

        data = b''

        intervals = IntervalTree(self.__rewritten_intervals[offset:offset+length])
        intervals.merge_overlaps()
        intervals.slice(offset)
        intervals.slice(offset + length)
        intervals = sorted(intervals[offset:offset+length])
        assert offset < self.__filesize
        assert intervals[0].begin >= offset and intervals[-1].end <= offset + length if len(intervals) > 0 else True

        if len(intervals) == 0:
            return b'\x00' * min(length, self.__filesize - offset)

        assert len(intervals) > 0

        # Used to fill any hole at the start of the read range
        end_prev_interval = offset

        # Read the data
        for interv in intervals:
            # Fill any hole before this interval
            data += b'\x00' * (interv.begin - end_prev_interval)

            os.lseek(fh, interv.begin, os.SEEK_SET)
            data += os.read(fh, interv.length())

            end_prev_interval = interv.end

        # Fill any hole at the end of the read range
        data += b'\x00' * (offset + length - intervals[-1].end)

        if offset + length > self.__filesize:
            data = data[0:self.__filesize-offset]

        assert len(data) <= length
        assert offset + len(data) <= self.__filesize
        return data