Python IntervalTree.envelop Examples

Programming Language: Python

Namespace/Package Name: intervaltree

Class/Type: IntervalTree

Method/Function: envelop

Examples at hotexamples.com: 9

Python IntervalTree.envelop - 9 examples found. These are the top rated real world Python examples of intervaltree.IntervalTree.envelop extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

IntervalTree(30)

from_tuples(30)

overlap(30)

merge_overlaps(30)

search(30)

addi(30)

add(24)

chop(21)

end(16)

items(15)

at(14)

begin(14)

overlaps(14)

copy(10)

merge_equals(9)

removei(9)

clear(8)

envelop(8)

remove_overlap(6)

extend(6)

discard(4)

remove(3)

remove_envelop(3)

chop_intervals_that_envelope_range(3)

insert_interval(3)

is_empty(3)

discardi(2)

print_structure(2)

overlaps_point(2)

append(2)

add_interval(2)

merge_neighbors(2)

range(1)

pop(1)

score(1)

after(1)

find_nested(1)

iter(1)

intersection(1)

intersect(1)

insert(1)

after_interval(1)

find(1)

__iter__(1)

downstream_of_interval(1)

difference_update(1)

containsi(1)

computeUtilizationHistogram(1)

computeCountHistogram(1)

before_interval(1)

Example #1

Show file

File: aslr_oracle.py Project: tarun-hacker/SkCodecFuzzer

class AslrOracle:
  def __init__(self):
    self.queries = 0

    self.InitCache()

  def CheckAddress(self, address):
    return self.CheckRange(address, 0x1000)

  def InitCache(self):
    self.cached_queries = 0
    self.good_regions = IntervalTree()
    self.bad_regions = IntervalTree()

  def InsertToCache(self, start, end, valid):
    if valid:
      self.good_regions.add(Interval(start, end + 1))
      self.good_regions.merge_overlaps()
    else:
      self.bad_regions.add(Interval(start, end))

  def CheckCache(self, start, end):
    good_overlaps = self.good_regions.overlap(start, end)
    for overlap in good_overlaps:
      if (overlap[0] <= start) and (overlap[1] >= end):
        self.cached_queries += 1
        return True

    bad_overlaps = self.bad_regions.envelop(start, end)
    if len(bad_overlaps) > 0:
      self.cached_queries += 1
      return False

    return None

Example #2

Show file

class SimpleDnMedium(DnMedium):
    def __init__(self) -> None:
        self.msgs = IntervalTree()

    def add_dn(self, msg: LoraMsg) -> None:
        t0 = Simulation.time2ticks(msg.xbeg)
        t1 = t0 + Simulation.time2ticks(msg.tpreamble())
        self.msgs[t0:t1] = msg

    @staticmethod
    def overlap(i1: Interval, i2: Interval) -> int:
        return min(i1.end, i2.end) - max(i1.begin, i2.begin)  # type: ignore

    def get_dn(self,
               rxon: int,
               rxtout: int,
               freq: int,
               rps: int,
               nsym: int = 4) -> Optional[LoraMsg]:
        rxw = Interval(rxon, rxon + rxtout)
        tpn = Simulation.time2ticks(LoraMsg.symtime(rps, nsym))
        for i in self.msgs.overlap(rxw[0], rxw[1]):
            m = i.data  # type: LoraMsg
            if m.match(freq, rps) and SimpleDnMedium.overlap(i, rxw) >= tpn:
                break
        else:
            return None
        self.msgs.remove(i)
        return m

    def prune(self, ticks: int) -> None:
        exp = self.msgs.envelop(0, ticks)
        if exp:
            self.msgs.remove_envelop(0, ticks)
        return exp

Example #3

Show file

def find_candidate(Interval_list,
                   window=10,
                   min_primary=0,
                   min_support=0,
                   secondary_thres=0.0,
                   primary_thres=1.0):
    '''
    Find candidate exon boundary (i.e. intron boundary) within a given range.
    Parameter:
        begin:
            start (left-most) position of the range to be searched (0-based)
        end:
            end (right-most) possition of the range to be searched (0-based)
        tree:
            IntervalTree containing all boundary pairs 
        window: 
            window size for group surrounding boundaries (difference 
            of boundary in each size of the intron will be grouped together if 
            the absolute difference < window size)
        min_support:
            The best supported boundary need will be included only when the num
            of support reaches the minimum
        secondary_thres:
            only the junctions with multiple well supported boundary will
            be included. Well supported junction is defined as 
            secondary_thres * support num of the most supported boundary.
    '''
    # get boundaries with in searching window, sorted by the number of support
    intervals_tree = IntervalTree()
    for interval in Interval_list:
        intervals_tree.addi(interval.begin, interval.end, interval.data)

    candidate_boundaries = []
    while intervals_tree:
        interval = max(intervals_tree, key=lambda x: x.data)
        best_support = interval.data
        if interval.data < min_primary:  # lower bound of the support
            return candidate_boundaries

        #candidate_boundaries.append(interval)
        intervals_tree.remove(interval)

        # include surrounding boundaries
        enveloped_interval = intervals_tree.envelop(interval.begin - window,
                                                    interval.end + window)
        neighbour_found = []
        for i in enveloped_interval:
            if i.begin <= interval.begin + window and \
                    i.end >= interval.end - window:
                if i.data > secondary_thres * best_support:
                    neighbour_found.append((interval, i))
                intervals_tree.remove(i)
        if neighbour_found:
            neighbour_found.append((interval, interval))
            count = sum([x.data for y, x in neighbour_found])
            if count >= min_support and best_support / count <= primary_thres:
                candidate_boundaries += neighbour_found
    return candidate_boundaries

Example #4

Show file

def original_print():
    it = IntervalTree()
    it.addi(1, 3, "dude")
    it.addi(2, 4, "sweet")
    it.addi(6, 9, "rad")
    for iobj in it:
        print(it[iobj.begin, iobj.end])  # set(), should be using :

    for iobj in it:
        print(it.envelop(iobj.begin, iobj.end))

Example #5

Show file

class SimpleMedium(Medium):
    def __init__(self, put_up: Optional[Callable[[LoraMsg], None]]) -> None:
        self._put_up = put_up
        self.msgs = IntervalTree()

    def reset_medium(self) -> None:
        self.msgs.clear()

    def add_dn(self, msg: LoraMsg) -> None:
        t0 = Simulation.time2ticks(msg.xbeg)
        t1 = t0 + Simulation.time2ticks(msg.tpreamble())
        self.msgs[t0:t1] = msg

    @staticmethod
    def overlap(i1: Interval, i2: Interval) -> int:
        return min(i1.end, i2.end) - max(i1.begin, i2.begin)  # type: ignore

    def get_dn(self,
               rxon: int,
               rxtout: int,
               freq: int,
               rps: int,
               nsym: int = 4,
               peek=False) -> Optional[LoraMsg]:
        rxw = Interval(rxon, rxon + rxtout)
        tpn = Simulation.time2ticks(LoraMsg.symtime(rps, nsym))
        for i in self.msgs.overlap(rxw[0], rxw[1]):
            m = i.data  # type: LoraMsg
            if m.match(freq, rps) and (peek
                                       or SimpleMedium.overlap(i, rxw) >= tpn):
                break
        else:
            return None
        if not peek:
            self.msgs.remove(i)
        return m

    def prune(self, ticks: int) -> List[LoraMsg]:
        exp = cast(List[Interval], self.msgs.envelop(0, ticks))
        if exp:
            self.msgs.remove_envelop(0, ticks)
        return [iv[2] for iv in exp]

Example #6

Show file

File: query_test.py Project: alex-r-bigelow/intervaltree

def test_empty_queries():
    t = IntervalTree()
    e = set()

    assert len(t) == 0
    assert t.is_empty()
    assert t[3] == e
    assert t[4:6] == e
    assert t.begin() == 0
    assert t.end() == 0
    assert t[t.begin():t.end()] == e
    assert t.overlap(t.begin(), t.end()) == e
    assert t.envelop(t.begin(), t.end()) == e
    assert t.items() == e
    assert set(t) == e
    assert set(t.copy()) == e
    assert t.find_nested() == {}
    assert t.range().is_null()
    assert t.range().length() == 0
    t.verify()

Example #7

Show file

File: query_test.py Project: chaimleib/intervaltree

def test_empty_queries():
    t = IntervalTree()
    e = set()

    assert len(t) == 0
    assert t.is_empty()
    assert t[3] == e
    assert t[4:6] == e
    assert t.begin() == 0
    assert t.end() == 0
    assert t[t.begin():t.end()] == e
    assert t.overlap(t.begin(), t.end()) == e
    assert t.envelop(t.begin(), t.end()) == e
    assert t.items() == e
    assert set(t) == e
    assert set(t.copy()) == e
    assert t.find_nested() == {}
    assert t.range().is_null()
    assert t.range().length() == 0
    t.verify()

Example #8

Show file

class AmpliconSet:
    def __init__(
        self,
        name,
        amplicons,
        tolerance=5,
        shortname=None,
    ):
        """AmpliconSet supports various membership operations"""
        if not shortname:
            # base-54 hash
            self.shortname = chr(((sum(map(ord, name)) - ord("A")) % 54) + 65)
        self.tree = IntervalTree()
        self.name = name
        self.seqs = {}
        self.amplicons = amplicons
        self.amplicon_ids = {}

        primer_lengths = set()
        sequences = {}
        for amplicon_name in amplicons:
            amplicon = amplicons[amplicon_name]
            self.amplicon_ids[amplicon.shortname] = amplicon

            for primer in amplicon.left:
                sequences[primer.seq] = amplicon
                primer_lengths.add(len(primer.seq))
            for primer in amplicon.right:
                # note: you may want to reverse complement this
                sequences[primer.seq] = amplicon
                primer_lengths.add(len(primer.seq))

            # interval containment tolerance
            start = amplicon.start - tolerance
            end = amplicon.end + tolerance
            self.tree[start:end] = amplicon

        self.min_primer_length = min(primer_lengths)
        # the internal sequences table allows lookup by primer sequence
        for k, v in sequences.items():
            self.seqs[k[:self.min_primer_length]] = v

    def __eq__(self, other):
        return type(other) is type(self) and self.__dict__ == other.__dict__

    @classmethod
    def from_json(cls, fn, tolerance=5):
        raise NotImplementedError

    @classmethod
    def from_tsv(cls, fn, name=None, **kwargs):
        amplicons = {}
        required_cols = {
            "Amplicon_name",
            "Primer_name",
            "Left_or_right",
            "Sequence",
            "Position",
        }
        n = 0
        with open(fn) as f:
            reader = csv.DictReader(f, delimiter="\t")
            missing_cols = required_cols.difference(set(reader.fieldnames))
            if len(missing_cols) > 0:
                missing_cols = ",".join(sorted(list(missing_cols)))
                raise Exception(
                    f"Amplicon scheme TSV missing these columns: {missing_cols}. Got these columns: {reader.fieldnames}"
                )

            for d in reader:
                if d["Amplicon_name"] not in amplicons:
                    amplicons[d["Amplicon_name"]] = Amplicon(
                        d["Amplicon_name"], shortname=n)
                    n += 1

                left = d["Left_or_right"].lower() == "left"
                # We assume that primer is always left+forward, or right+reverse
                forward = left
                pos = int(d["Position"])
                primer = Primer(d["Primer_name"], d["Sequence"], left, forward,
                                pos)
                amplicons[d["Amplicon_name"]].add(primer)

        name = fn if not name else name
        return cls(name, amplicons, **kwargs)

    def match(self, start, end):
        """Identify a template's mapped interval based on the start and end
        positions

        returns a set of matching amplicons
        """

        # amplicons which contain the start and end
        hits = self.tree[start].intersection(self.tree[end])

        # amplicons contained by the start and end
        # this should never happen in tiled amplicons
        enveloped = self.tree.envelop(start, end)

        if enveloped:
            return None

        if len(hits) == 0:
            return None
        elif len(hits) > 2:
            # there should not be any more than 2 ambiguous matches under any
            # known primer set. The interval tree can confirm this at the time
            # the primer set is parsed
            raise Exception
        else:
            return [hit.data for hit in hits]

    def get_tags(self, read):
        pass

    def set_tags(self, read):
        pass

Example #9

Show file

File: Protein.py Project: leiqichn/dom2vec

class Protein:
    """
	We will represent a protein as its domains
	3 ways:
	1) protein can have overlapping domains
	2) protein has only no-overlapping domains
	3) protein has known length so gap domain can be added
	"""
    def __init__(self,
                 with_overlap,
                 with_redundant,
                 with_gap,
                 hit_line="",
                 proteins_id_len="",
                 interpro_local_format=False):
        """
		Protein Class init

		Parameters
		----------
		with_overlap : bool
			output overlapping domain annotation (True), otherwise not overlapping domain annotation will be created (False)
		with_redundant : bool
			if with_overlap is False then create non overlapping (but possibly redundant) domains (True),
			otherwise create non overlapping and non redundant domain annotation (False)
		with_gap : bool
			add GAP domain for each protein subsequence >30 amino acids without domain hit (True),
			otherwise don't add GAP domain (False)
		hit_line : str
			domain hits line
		proteins_id_len :  file
			proteins id length file handle
		interpro_local_format : bool
			preprocess output format produced by local interproscan run (True),
			otherwise preprocess Interpro downloaded protein2ipr format (False)

		Returns
		-------
		None
		"""
        self.with_overlap = with_overlap
        self.with_redundant = with_redundant
        self.with_gap = with_gap
        self.domain_interval_tree = IntervalTree()
        self.domains_with_gaps = []
        self.gap_min_size = 30
        self.length = 0
        self.interpro_exist_all_domains = []
        if hit_line != "":
            if interpro_local_format:  # interpro local run format
                # get the interpro annotation of protein line based on:
                # https://github.com/ebi-pf-team/interproscan/wiki/OutputFormats
                assert len(
                    hit_line.split("\t")
                ) >= 11, "AssertionError: line: {} has less than 11 tabs.".format(
                    hit_line)
                self.uniprot_id = self.get_uniprot_id(hit_line)
                self.domains = {}
                self.add_domain(hit_line)
                if with_gap:
                    self.length = int(hit_line.split("\t")[2])
                    assert self.length > 0, "AssertionError: protein with id {} has length <=0.".format(
                        self.length)
            else:  # prot2ipr format
                assert isinstance(
                    hit_line, str
                ), "AssertionError: Input of protein should be a String line."
                hit_line = hit_line.strip()
                self.uniprot_id = self.get_uniprot_id(hit_line)
                self.domains = {}
                self.add_domain(hit_line)
                if with_gap:
                    self.length = self.get_prot_length(proteins_id_len)
                    assert self.length > 0, "AssertionError: protein with id {} has length <= 0.".format(
                        self.length)
        else:
            self.uniprot_id = ""

    def get_prot_length(self, proteins_id_len):
        """
		Get protein length

		Parameters
		----------
		proteins_id_len : file
			protein id length file handle

		Returns
		-------
		prot_len : int
			protein length
		"""
        prot_len = -1
        prot_found = False
        try:
            while prot_found == False:
                prot_id_len = next(proteins_id_len)
                # print("current len:{}".format(prot_id_len))
                if prot_id_len.strip().split("\t")[0] == self.uniprot_id:
                    prot_len = int(prot_id_len.strip().split("\t")[1])
                    prot_found = True

        except (StopIteration):
            print("EOF")
        return prot_len

    @staticmethod
    def get_prot_id(hit_line):
        """
		Get protein id

		Parameters
		----------
		hit_line : str
			domain hit line

		Returns
		-------
		str
			protein id
		"""
        return hit_line.split("\t")[0]

    def get_uniprot_id(self, hit_line):
        """
		Get uniprot id

		Parameters
		----------
		hit_line : str
			domain hit line

		Returns
		-------
		str
			protein id
		"""
        return hit_line.split("\t")[0]

    def add_domain(self, hit_line):
        """
		Add domain in Protein object

		Parameters
		----------
		hit_line : str
			domain hit line

		Returns
		-------
		None
		"""
        if self.with_overlap:
            self.add_overlap(hit_line)
        elif self.with_overlap is False or self.with_redundant:
            self.add_no_overlap(hit_line)

    def add_overlap(self, hit_line):
        """
		Add domain hit in overlapping fashion

		Parameters
		----------
		hit_line : str
			domain hit line
		Returns
		-------
		None
		"""
        domain = Domain(hit_line)
        self.interpro_exist_all_domains.append(domain.interpro_id_exists)
        if domain.end_pos > domain.start_pos:
            # construct start_stop index
            start_stop = str(domain.start_pos) + str(domain.end_pos)
            start_stop = float(start_stop)
            if start_stop not in self.domains:
                self.domains[start_stop] = domain
            else:
                # allow for 100 domain annotations to have the same start and end
                start_stop = start_stop + 0.01
                self.domains[start_stop] = domain

    def add_no_overlap(self, hit_line):
        """
		Add domain in no overlapping fashion

		Parameters
		----------
		hit_line : str
			domain hit

		Returns
		-------
		None
		"""
        domain = Domain(hit_line)
        self.interpro_exist_all_domains.append(domain.interpro_id_exists)
        if domain.end_pos > domain.start_pos:
            self.domain_interval_tree.addi(domain.start_pos, domain.end_pos,
                                           domain)

    def to_tabs(self):
        """
		Convert saved domain hits for a protein to output tabular line

		Parameters
		----------

		Returns
		-------
		str
		"""
        if self.with_overlap:
            # print("Overlap")
            return self.to_tabs_overlap()
        elif self.with_redundant is False:
            # print("No overlap")
            return self.to_tabs_no_overlap()
        else:
            # print("No redundant")
            return self.to_tabs_no_redundant()

    def find_strong_no_overlap_domains(self, parent_domain, already_resolved):
        """
		Find all no strong overlap domains with maximum length
		1) Resolve overlapping domains that overlap for less than 0.99% of their length
		to no strong overlap domains
		No strong overlap: |-----"--|-----"
		Strong overlap: |----"--"--|
		2) Find enveloppe domains
		3) From the rest of the domains, find the one with maximum length

		Parameters
		----------
		parent_domain : str
			anchor domain to start overlapping search
		candidate_overlap_domains : list of str
			list of overlapping domains

		Returns
		-------
		strong_overlap_domains, no_strong_overlap_domains
		lists of strong overlapping domains (resolved), no strong overlapping (not (yet) resolved)
		"""
        envelopped_domains = self.domain_interval_tree.envelop(
            parent_domain.begin, parent_domain.end)
        overlapping_domains = self.domain_interval_tree.overlap(
            parent_domain.begin, parent_domain.end)
        candidate_domains = overlapping_domains - envelopped_domains - already_resolved

        strong_overlap_domains = set()
        no_strong_overlap_domains = set()
        for candidate_domain in list(candidate_domains):
            # As parent has the maximum length, there are two choices:
            # 1) candidate domain is strongly overlapping with the parent => add it to strong_overlap_domains (resolved)
            # 2) candidate domain is no strongly overlapping so => add it to no_strong_overlap_domains (not_resolved)
            candidate_domain_len = candidate_domain.end - candidate_domain.begin + 1
            if candidate_domain.begin >= parent_domain.begin:
                # |---parent---|
                #          |---child---|
                overlap_len = parent_domain.end - candidate_domain.begin + 1
            else:
                #    |---parent---|
                # |---child---|
                overlap_len = candidate_domain.end - parent_domain.begin + 1

            if float(overlap_len
                     ) / candidate_domain_len >= 0.8:  # Strong overlap
                strong_overlap_domains.add(candidate_domain)
                assert candidate_domain.data.length <= parent_domain.data.length, "AssertionError: prot:{} candidate domain {} is longer than parent domain {}".format(
                    self.uniprot_id, candidate_domain.data.evidence_db_id,
                    parent_domain.data.evidence_db_id)
            else:  # no strong overlap
                if candidate_domain.data.interpro_id == parent_domain.data.interpro_id:  # if no strong overlap but the same interpro id take the longest one
                    assert candidate_domain.data.length <= parent_domain.data.length, "AssertionError: prot:{} candidate domain {} is longer than parent domain {}".format(
                        self.uniprot_id, candidate_domain.data.evidence_db_id,
                        parent_domain.data.evidence_db_id)
                    strong_overlap_domains.add(candidate_domain)
                else:
                    no_strong_overlap_domains.add(candidate_domain)

        strong_overlap_domains.update(
            envelopped_domains
        )  # add envelopped domains to strong_overlap domains
        return strong_overlap_domains, no_strong_overlap_domains

    def find_no_redundant_domains(self, parent_domain, already_resolved):
        """
		Find no redundant domains

		Parameters
		----------
		parent_domain : str
			anchor domain to start overlapping search
		already_resolved : set of str
			set of already resolved for redundancy domains
		Returns
		-------
		"""
        overlapping_domains = self.domain_interval_tree.overlap(
            parent_domain.begin, parent_domain.end)
        candidate_domains = overlapping_domains - already_resolved
        redundant_domains = set()
        no_redundant_domains = set()

        for candidate_domain in list(candidate_domains):

            # As parent has the maximum length, there are two choices:
            # 1) candidate domain has the same interpro id => add it to redundant (resolved)
            # 2) candidate domain has not the same interpro id => add it to no redundant (not_resolved)
            if candidate_domain.data.interpro_id == parent_domain.data.interpro_id:
                redundant_domains.add(candidate_domain)
            else:
                no_redundant_domains.add(candidate_domain)

        return redundant_domains, no_redundant_domains

    def find_no_redundant_max_len(self):
        """
		Find all domains that are not redundant (having unique interpro id) and are maximally long

		Parameters
		----------

		Returns
		-------
		list of IntervalTree.node
			list of IntervalTree nodes as the no redundant maximum length domains
		"""
        resolved = set()
        domains_no_redundant_max = []

        domains_len_srt = [domain for domain in self.domain_interval_tree]
        domains_len_srt.sort(key=lambda dom_node: dom_node.data.length,
                             reverse=True)

        for domain_node in domains_len_srt:
            if domain_node not in resolved:
                redundant_domains, no_redundant_domains = self.find_no_redundant_domains(
                    domain_node, resolved)
                domains_no_redundant_max.append(domain_node)
                resolved.update(redundant_domains)
        return domains_no_redundant_max

    def find_no_overlap_max_len(self):
        """
		Find all domains that are not overlapping and are maximally long

		Parameters
		----------

		Returns
		-------
		list of IntervalTree.node
			list of not overlapping maximum length domains
		"""
        resolved = set()
        domains_no_overlap_max = []

        domains_len_srt = [domain for domain in self.domain_interval_tree]
        domains_len_srt.sort(key=lambda dom_node: dom_node.data.length,
                             reverse=True)
        """
		Idea: After sorting the domains by length in descending order, then
		pick each domain and check for 
		envelopped domains -> resolved
		strong overlap domains -> resolved
		no strong overlap domains -> not resolved, the for loop will either add it as max no overlap or as resolved
		"""
        for domain_node in domains_len_srt:
            if domain_node not in resolved:
                strong_overlap_domains, strong_no_overlap_domains = self.find_strong_no_overlap_domains(
                    domain_node, resolved)
                domains_no_overlap_max.append(domain_node.data)

                resolved.update(strong_overlap_domains)
        return domains_no_overlap_max

    def construct_gap_hitline(self, gap_start, gap_stop):
        """
		Construct GAP domain tabular line

		Parameters
		----------
		gap_start : int
			GAP start position in protein amino sequence
		gap_stop : int
			GAP end position in protein amino sequence

		Returns
		-------
		str
			GAP domain tabular line
		"""
        return "\t".join([
            self.uniprot_id, "GAP", "gap", "gap_no_evid",
            str(gap_start),
            str(gap_stop)
        ])

    def add_gaps_no_redundant(self, domains_srt):
        """
		Add GAP domains in no redundant domain annotations

		Parameters
		----------
		domains_srt : list of Domain
			domains sorted per start/end position

		Returns
		-------
		None
		"""
        start_gap = 1
        previous_domain = None  # interval tree node
        is_first_domain = True
        for domain_interval in domains_srt:
            if is_first_domain:  # first domain
                if domain_interval.begin - start_gap + 1 > self.gap_min_size:  # add start GAP
                    assert domain_interval.begin > 1, "AssertionError: Start gap can be added if the very first domain is not starting at 1."
                    self.domains_with_gaps.append(
                        Domain(
                            self.construct_gap_hitline(
                                start_gap, domain_interval.begin - 1)))
                    start_gap = domain_interval.end + 1
                is_first_domain = False
            else:
                # check if the current domain and the previous are overlapping if yes then you can't add a gap
                # if no check the space between them
                overlap_domains = self.domain_interval_tree.overlap(
                    domain_interval.begin, domain_interval.end)
                no_redundant_overlap_domains = overlap_domains.intersection(
                    set(domains_srt))

                if previous_domain not in no_redundant_overlap_domains:  # not overlapping domains => check for space to add a GAP
                    if domain_interval.begin - start_gap + 1 > self.gap_min_size:  # add middle GAP
                        self.domains_with_gaps.append(
                            Domain(
                                self.construct_gap_hitline(
                                    start_gap, domain_interval.begin - 1)))
            # adding gap or no append current domain interval and update start_gap
            self.domains_with_gaps.append(domain_interval.data)
            start_gap = domain_interval.end + 1
            previous_domain = domain_interval

        # To check for end GAP, you should get the maximum end_pos of non redundant domain
        max_end_pos = max([dom.end for dom in domains_srt])
        max_end_pos = max_end_pos + 1
        if self.length - max_end_pos + 1 > self.gap_min_size:
            self.domains_with_gaps.append(
                Domain(self.construct_gap_hitline(start_gap, self.length)))

    def add_gaps(self, domains_srt):
        """
		Add gaps in domain annotations

		Parameters
		----------
		domains_srt : list of Domain
			domain sorted per start/end position

		Returns
		-------
		None
		"""
        start_gap = 1
        for domain in domains_srt:  # check for GAP in the start and middle of the protein
            # |--- --- protein --- ---|
            #     |--dom1--| |--dom2--|
            # |GAP|
            if domain.start_pos - start_gap + 1 > self.gap_min_size:
                self.domains_with_gaps.append(
                    Domain(
                        self.construct_gap_hitline(start_gap,
                                                   domain.start_pos)))
            start_gap = domain.end_pos + 1
            self.domains_with_gaps.append(domain)

        # check for gap in the end of the protein seq
        # |--- --- protein --- ---|
        # |--dom1--| |--dom2--|
        #                     |GAP|
        if self.length - domain.end_pos + 1 > self.gap_min_size:
            self.domains_with_gaps.append(
                Domain(
                    self.construct_gap_hitline(domain.end_pos + 1,
                                               self.length)))

    def to_tabs_no_redundant(self):
        """
		Convert tabular info for protein in no redundant domain annotations (tabular output as well)

		Parameters
		----------

		Returns
		-------
		str
			no redundant domain tabular output line
		"""
        # find no redundant domains with maximum length
        domains_no_redundant_max_len = self.find_no_redundant_max_len()
        # sort by start position
        domains_no_redundant_max_len.sort(key=lambda domain: domain.begin,
                                          reverse=False)
        if self.with_gap:
            self.add_gaps_no_redundant(domains_no_redundant_max_len)
            self.domains_with_gaps.sort(
                key=lambda domain: domain.start_pos,
                reverse=False)  # sort by start position
            domains_no_redundant = " ".join(
                [domain.interpro_id for domain in self.domains_with_gaps])
            domains_evidence_db_ids = " ".join(
                [domain.evidence_db_id for domain in self.domains_with_gaps])
        else:
            domains_no_redundant = " ".join([
                domain.data.interpro_id
                for domain in domains_no_redundant_max_len
            ])
            domains_evidence_db_ids = " ".join([
                domain.data.evidence_db_id
                for domain in domains_no_redundant_max_len
            ])

        return self.uniprot_id + "\t" + domains_no_redundant + "\t" + domains_evidence_db_ids + "\n"

    def to_tabs_no_overlap(self):
        """
		Convert tabular info for protein in no overlapping domain annotations (tabular output as well)

		Parameters
		----------

		Returns
		-------
		str
			no overlapping domain tabular output line
		"""
        # find non overlaping domains with maximum length
        domains_no_overlap_max_len = self.find_no_overlap_max_len()
        # sort by start position
        domains_no_overlap_max_len.sort(key=lambda domain: domain.start_pos,
                                        reverse=False)
        if self.with_gap:
            self.add_gaps(domains_no_overlap_max_len)
            domains_no_overlap = " ".join(
                [domain.interpro_id for domain in self.domains_with_gaps])
            domains_evidence_db_ids = " ".join(
                [domain.evidence_db_id for domain in self.domains_with_gaps])
        else:
            domains_no_overlap = " ".join(
                [domain.interpro_id for domain in domains_no_overlap_max_len])
            domains_evidence_db_ids = " ".join([
                domain.evidence_db_id for domain in domains_no_overlap_max_len
            ])
        return self.uniprot_id + "\t" + domains_no_overlap + "\t" + domains_evidence_db_ids + "\n"

    def to_tabs_overlap(self):
        """
		Convert tabular info for protein in overlapping domain annotations (tabular output as well)

		Parameters
		----------

		Returns
		-------
		str
			overlapping domain tabular output line
		"""
        # for gaps you shall give a list out of the sorted dictionary sorted(self.domains)
        if self.with_gap:
            self.add_gaps([
                self.domains[start_stop]
                for start_stop in sorted(self.domains.keys())
            ])
            domains_overlap = " ".join(
                [domain.interpro_id for domain in self.domains_with_gaps])
            domains_evid_db_ids = " ".join(
                [domain.evidence_db_id for domain in self.domains_with_gaps])
        else:
            domains_overlap = " ".join([
                self.domains[start_stop].interpro_id
                for start_stop in self.domains
            ])
            domains_evid_db_ids = " ".join([
                self.domains[start_stop].evidence_db_id
                for start_stop in self.domains
            ])
        return self.uniprot_id + "\t" + domains_overlap + "\t" + domains_evid_db_ids + "\n"