Ejemplo n.º 1
0
 def _candidates(self, mz_list, query_mz, ms1_tol):
     pmz_list = SortedList([m.precursor_mz for m in mz_list])
     lower = query_mz - ms1_tol
     upper = query_mz + ms1_tol
     start = pmz_list.bisect(lower)
     end = pmz_list.bisect(upper)
     return mz_list[start:end]
Ejemplo n.º 2
0
def test_bisect():
    slt = SortedList()
    assert slt.bisect(10) == 0
    slt = SortedList(range(100), load=17)
    slt.update(range(100))
    slt._check()
    assert slt.bisect(10) == 22
    assert slt.bisect(200) == 200
def test_bisect():
    slt = SortedList()
    assert slt.bisect(0) == 0
    slt = SortedList(range(100), load=17)
    slt.update(range(100))
    slt._check()
    assert slt.bisect(50) == 100
    assert slt.bisect(200) == 200
Ejemplo n.º 4
0
 def _candidates(self,query_mz,ms1_tol):
     from sortedcontainers import SortedList
     pmz_list = SortedList([m.precursor_mz for m in self.sorted_record_list])
     lower = query_mz - ms1_tol
     upper = query_mz + ms1_tol
     start = pmz_list.bisect(lower)
     end = pmz_list.bisect(upper)
     return self.sorted_record_list[start:end]
def test_bisect():
    slt = SortedList()
    assert slt.bisect(10) == 0
    slt = SortedList(range(100))
    slt._reset(17)
    slt.update(range(100))
    slt._check()
    assert slt.bisect(10) == 22
    assert slt.bisect(200) == 200
Ejemplo n.º 6
0
    def goodTriplets(self, nums1: List[int], nums2: List[int]) -> int:
        A, B = nums1, nums2
        # Index of a (from A) in B.
        pos = [0] * len(A)
        for idx, b in enumerate(B):
            pos[b] = idx

        # Build pre_a[i]: number of elements on a[i]'s left in both A and B.
        # pos_in_b: sorted indexes (in B) of all the visited elements in A.
        pos_in_b, pre_a = SortedList([pos[A[0]]]), [0]
        for a in A[1:]:
            pos_in_b.add(pos[a])
            pre_a.append(pos_in_b.bisect_left(pos[a]))

        # Build suf_a[i]: number of elements on a[i]'s right in both A and B.
        pos_in_b, suf_a = SortedList([pos[A[-1]]]), [0]
        for a in reversed(A[:len(A) - 1]):
            idx = pos_in_b.bisect(pos[a])
            suf_a.append(len(pos_in_b) - idx)
            pos_in_b.add(pos[a])
        suf_a.reverse()

        # Sum up all unique triplets centered on A[i].
        ans = 0
        for x, y in zip(pre_a, suf_a):
            ans += x * y
        return ans
 def findFarmland(self, land: List[List[int]]) -> List[List[int]]:
     M, N = len(land), len(land[0])
     pending = SortedList([[float("inf")] * 4], key=lambda farm: farm[1])
     res = []
     for i in range(M + 1):
         j = 0
         next_farm = pending[0]
         while j < N:
             if j == next_farm[1]:
                 if i != M and land[i][j] == 1:
                     j = next_farm[3] + 1
                     next_farm = pending[pending.bisect_left([None, j])]
                 else:
                     pending.remove(next_farm)
                     next_farm[2] = i - 1
                     res.append(next_farm)
                     next_farm = pending[pending.bisect(next_farm)]
             else:
                 if i != M and land[i][j] == 1:
                     new_farm = [i, j, None, None]
                     while j < N and land[i][j] == 1:
                         j += 1
                     j -= 1
                     new_farm[3] = j
                     pending.add(new_farm)
                 else:
                     pass
             j += 1
     return res
Ejemplo n.º 8
0
def findValue(index1, index2):
    if index1 == index2: return 0.0
    lst = SortedList(dict[index1])
    index = lst.bisect((index2, ))
    #	print lst
    #	print index, len(lst);
    lst[index]
    return float(lst[index][1])
Ejemplo n.º 9
0
 def createSortedArray(self, instructions: List[int]) -> int:
     """O(NlogN) / O(N)"""
     ans = 0
     sorted_insts = SortedList()
     for inst in instructions:  # O(N)
         l = sorted_insts.bisect_left(inst)  # O(logN)
         r = len(sorted_insts) - sorted_insts.bisect(inst)  # O(logN)
         ans += min(l, r)
         sorted_insts.add(inst)  # O(logN)
     return ans % (10**9 + 7)
Ejemplo n.º 10
0
 def createSortedArray(self, instructions: List[int]) -> int:
     answer = 0
     from sortedcontainers import SortedList
     instructionsSorted = SortedList()
     for instruction in instructions:
         left = instructionsSorted.bisect_left(instruction)
         right = len(instructionsSorted) - instructionsSorted.bisect(instruction)
         answer += left if left < right else right
         instructionsSorted.add(instruction)
     return answer % (10**9 + 7)
Ejemplo n.º 11
0
 def createSortedArray(self, instructions: List[int]) -> int:
     answer = 0
     instructionsSorted = SortedList()
     for instruction in instructions:
         left = instructionsSorted.bisect_left(instruction)
         right = len(instructionsSorted) - instructionsSorted.bisect(
             instruction)
         print(instructionsSorted, instruction, left, right)
         answer += left if left < right else right
         instructionsSorted.add(instruction)
     return answer % (10**9 + 7)
Ejemplo n.º 12
0
    def countSmaller(self, nums: List[int]) -> List[int]:
        n = len(nums)
        res = [0] * n
        sl = SortedList([nums[-1]])

        for i in range(n - 2, -1, -1):
            index = sl.bisect(nums[i] - 1)

            res[i] = index

            sl.add(nums[i])

        return res
Ejemplo n.º 13
0
class MyCalendar:
    def __init__(self):
        # add -INF/INF to the head and the end to make it easy to check leftmost and rightmost boundary
        self.cal = SortedList([(float('-inf'), float('-inf')),
                               (float('inf'), float('inf'))])

    def book(self, start: int, end: int) -> bool:
        interval = (start, end)
        # either bisect(bisect_right)/bisect_left works
        i = self.cal.bisect(interval)

        if self.cal[i - 1][1] <= start and end <= self.cal[i][0]:
            self.cal.add(interval)
            return True

        return False
Ejemplo n.º 14
0
 def minInteger(self, num: str, k: int) -> str:
     N=len(num)
     q = defaultdict(deque)
     
     for i in range(N):
         q[num[i]].append(i)
     
     res = list(num)
     seen = SortedList()
     for i in range(N):
         res[i] = num[i]
         for n in digits:
             if not q[n]:
                 continue
             
             # exact index considering all past swaps
             j = q[n][0] + len(seen) - seen.bisect(q[n][0])
             used = j-i # how many k it will consume
             if 0 <= used <= k:
                 k -= used
                 seen.add(q[n][0])
                 res[i] = num[q[n].popleft()]
                 break
     return ''.join(res)
Ejemplo n.º 15
0
class DataPack(BasePack[Entry, Link, Group]):
    # pylint: disable=too-many-public-methods
    """
    A :class:`DataPack` contains a piece of natural language text and a
    collection of NLP entries (annotations, links, and groups). The natural
    language text could be a document, paragraph or in any other granularity.

    Args:
        doc_id (str, optional): A universal id of this data pack.
    """
    def __init__(self, doc_id: Optional[str] = None):
        super().__init__()
        self._text = ""

        self.annotations: SortedList[Annotation] = SortedList()
        self.links: List[Link] = []
        self.groups: List[Group] = []
        self.generics: List[Generic] = []

        self.replace_back_operations: ReplaceOperationsType = []
        self.processed_original_spans: List[Tuple[Span, Span]] = []
        self.orig_text_len: int = 0

        self.index: DataIndex = DataIndex()
        self.meta: Meta = Meta(doc_id)

    def __getstate__(self):
        """
        In serialization,
        1) will serialize the annotation sorted list as a normal list;
        2) will not serialize the indexes
        """
        state = super(DataPack, self).__getstate__()
        state['annotations'] = list(state['annotations'])
        state.pop('index')

        return state

    def __setstate__(self, state):
        """
        In deserialization, we
        1) transform the annotation list back to a sorted list;
        2) initialize the indexes.
        """
        super(DataPack, self).__setstate__(state)
        self.annotations = SortedList(self.annotations)
        self.index = DataIndex()
        self.index.update_basic_index(list(self.annotations))
        self.index.update_basic_index(self.links)
        self.index.update_basic_index(self.groups)

        for a in self.annotations:
            a.set_pack(self)

        for a in self.links:
            a.set_pack(self)

        for a in self.groups:
            a.set_pack(self)

        for a in self.generics:
            a.set_pack(self)

    # pylint: disable=no-self-use
    def validate(self, entry: EntryType) -> bool:
        return isinstance(entry, SinglePackEntries)

    @property
    def text(self):
        return self._text

    def get_span_text(self, span: Span):
        return self._text[span.begin:span.end]

    def set_text(
            self,
            text: str,
            replace_func: Optional[Callable[[str],
                                            ReplaceOperationsType]] = None):

        if len(self._text) > 0:
            logger.warning("The new text is overwriting the original one, "
                           "which might cause unexpected behavior.")

        span_ops = [] if replace_func is None else replace_func(text)

        # The spans should be mutually exclusive
        (self._text, self.replace_back_operations,
         self.processed_original_spans,
         self.orig_text_len) = io_utils.modify_text_and_track_ops(
             text, span_ops)

    def get_original_text(self):
        """Get original unmodified text from the :class:`DataPack` object.
        :return: Original text after applying the `replace_back_operations`
        of :class:`DataPack` object to the modified text
        """
        original_text, _, _, _ = io_utils.modify_text_and_track_ops(
            self._text, self.replace_back_operations)
        return original_text

    def get_original_span(self,
                          input_processed_span: Span,
                          align_mode: str = "relaxed"):
        """
        Function to obtain span of the original text that aligns with the
        given span of the processed text.

        Args:
            input_processed_span: Span of the processed text for which the
            corresponding span of the original text is desired
            align_mode: The strictness criteria for alignment in the ambiguous
            cases, that is, if a part of input_processed_span spans a part
            of the inserted span, then align_mode controls whether to use the
            span fully or ignore it completely according to the following
            possible values

            - "strict" - do not allow ambiguous input, give ValueError
            - "relaxed" - consider spans on both sides
            - "forward" - align looking forward, that is, ignore the span
            towards the left, but consider the span towards the right
            - "backward" - align looking backwards, that is, ignore the span
            towards the right, but consider the span towards the left

        Returns:
            Span of the original text that aligns with input_processed_span

        Example:
            * Let o-up1, o-up2, ... and m-up1, m-up2, ... denote the unprocessed
              spans of the original and modified string respectively. Note that
              each o-up would have a corresponding m-up of the same size.
            * Let o-pr1, o-pr2, ... and m-pr1, m-pr2, ... denote the processed
              spans of the original and modified string respectively. Note that
              each o-p is modified to a corresponding m-pr that may be of a
              different size than o-pr.
            * Original string:
              <--o-up1--> <-o-pr1-> <----o-up2----> <----o-pr2----> <-o-up3->
            * Modified string:
              <--m-up1--> <----m-pr1----> <----m-up2----> <-m-pr2-> <-m-up3->
            * Note that `self.inverse_original_spans` that contains modified
              processed spans and their corresponding original spans, would look
              like - [(o-pr1, m-pr1), (o-pr2, m-pr2)]

            >> data_pack = DataPack()
            >> original_text = "He plays in the park"
            >> data_pack.set_text(original_text,\
            >>                    lambda _: [(Span(0, 2), "She"))]
            >> data_pack.text
            "She plays in the park"
            >> input_processed_span = Span(0, len("She plays"))
            >> orig_span = data_pack.get_original_span(input_processed_span)
            >> data_pack.get_original_text()[orig_span.begin: orig_span.end]
            "He plays"
        """
        assert align_mode in ["relaxed", "strict", "backward", "forward"]

        req_begin = input_processed_span.begin
        req_end = input_processed_span.end

        def get_original_index(input_index: int, is_begin_index: bool,
                               mode: str) -> int:
            """
            Args:
                input_index: begin or end index of the input span
                is_begin_index: if the index is the begin index of the input
                span or the end index of the input span
                mode: alignment mode
            Returns:
                Original index that aligns with input_index
            """
            if len(self.processed_original_spans) == 0:
                return input_index

            len_processed_text = len(self._text)
            orig_index = None
            prev_end = 0
            for (inverse_span, original_span) in self.processed_original_spans:
                # check if the input_index lies between one of the unprocessed
                # spans
                if prev_end <= input_index < inverse_span.begin:
                    increment = original_span.begin - inverse_span.begin
                    orig_index = input_index + increment
                # check if the input_index lies between one of the processed
                # spans
                elif inverse_span.begin <= input_index < inverse_span.end:
                    # look backward - backward shift of input_index
                    if is_begin_index and mode in ["backward", "relaxed"]:
                        orig_index = original_span.begin
                    if not is_begin_index and mode == "backward":
                        orig_index = original_span.begin - 1

                    # look forward - forward shift of input_index
                    if is_begin_index and mode == "forward":
                        orig_index = original_span.end
                    if not is_begin_index and mode in ["forward", "relaxed"]:
                        orig_index = original_span.end - 1

                # break if the original index is populated
                if orig_index is not None:
                    break
                prev_end = inverse_span.end

            if orig_index is None:
                # check if the input_index lies between the last unprocessed
                # span
                inverse_span, original_span = self.processed_original_spans[-1]
                if inverse_span.end <= input_index < len_processed_text:
                    increment = original_span.end - inverse_span.end
                    orig_index = input_index + increment
                else:
                    # check if there input_index is not valid given the
                    # alignment mode or lies outside the processed string
                    raise ValueError(f"The input span either does not adhere "
                                     f"to the {align_mode} alignment mode or "
                                     f"lies outside to the processed string.")
            return orig_index

        orig_begin = get_original_index(req_begin, True, align_mode)
        orig_end = get_original_index(req_end - 1, False, align_mode) + 1

        return Span(orig_begin, orig_end)

    def add_entry(self, entry: EntryType) -> EntryType:
        """
        Force add an :class:`~forte.data.ontology.top.Entry` object to
        the :class:`DataPack` object.
        Allow duplicate entries in a pack.

        Args:
            entry (Entry): An :class:`~forte.data.ontology.top.Entry`
                object to be added to the pack.

        Returns:
            The input entry itself
        """
        return self.__add_entry_with_check(entry, True)

    def add_or_get_entry(self, entry: EntryType) -> EntryType:
        """
        Try to add an :class:`~forte.data.ontology.top.Entry` object to
        the :class:`DataPack` object.
        If a same entry already exists, will return the existing entry
        instead of adding the new one. Note that we regard two entries as the
        same if their :meth:`~forte.data.ontology.top.Entry.eq` have
        the same return value, and users could
        override :meth:`~forte.data.ontology.top.Entry.eq` in their
        custom entry classes.

        Args:
            entry (Entry): An :class:`~forte.data.ontology.top.Entry`
                object to be added to the pack.

        Returns:
            If a same entry already exists, returns the existing
            entry. Otherwise, return the (input) entry just added.
        """
        return self.__add_entry_with_check(entry, False)

    def __add_entry_with_check(self,
                               entry: EntryType,
                               allow_duplicate: bool = True) -> EntryType:
        """
        Internal method to add an :class:`Entry` object to the
        :class:`DataPack` object.

        Args:
            entry (Entry): An :class:`Entry` object to be added to the datapack.
            allow_duplicate (bool): Whether we allow duplicate in the datapack.

        Returns:
            The input entry itself
        """
        if isinstance(entry, Annotation):
            target = self.annotations
        elif isinstance(entry, Link):
            target = self.links
        elif isinstance(entry, Group):
            target = self.groups
        else:
            target = self.generics
            # raise ValueError(
            #     f"Invalid entry type {type(entry)}. A valid entry "
            #     f"should be an instance of Annotation, Link, or Group."
            # )

        add_new = allow_duplicate or (entry not in target)

        if add_new:
            self.record_entry(entry)

            if isinstance(target, list):
                target.append(entry)
            else:
                # For the sorted list case.
                target.add(entry)

            # update the data pack index if needed
            self.index.update_basic_index([entry])
            if self.index.link_index_on and isinstance(entry, Link):
                self.index.update_link_index([entry])
            if self.index.group_index_on and isinstance(entry, Group):
                self.index.update_group_index([entry])
            self.index.deactivate_coverage_index()
            return entry
        else:
            return target[target.index(entry)]

    def delete_entry(self, entry: EntryType):
        """
        Delete an :class:`~forte.data.ontology.top.Entry` object from
        the :class:`DataPack`.

        Args:
            entry (Entry): An :class:`~forte.data.ontology.top.Entry`
                object to be deleted from the pack.

        """
        begin = 0

        if isinstance(entry, Annotation):
            target = self.annotations
            begin = target.bisect_left(entry)
        elif isinstance(entry, Link):
            target = self.links
        elif isinstance(entry, Group):
            target = self.groups
        else:
            raise ValueError(
                f"Invalid entry type {type(entry)}. A valid entry "
                f"should be an instance of Annotation, Link, or Group.")

        for i, e in enumerate(target[begin:]):
            if e.tid == entry.tid:
                target.pop(i + begin)
                break

        # update basic index
        self.index.remove_entry(entry)

        # set other index invalid
        self.index.turn_link_index_switch(on=False)
        self.index.turn_group_index_switch(on=False)
        self.index.deactivate_coverage_index()

    @classmethod
    def validate_link(cls, entry: EntryType) -> bool:
        return isinstance(entry, Link)

    @classmethod
    def validate_group(cls, entry: EntryType) -> bool:
        return isinstance(entry, Group)

    def get_data(self,
                 context_type: Type[Annotation],
                 request: Optional[DataRequest] = None,
                 skip_k: int = 0) -> Iterator[Dict[str, Any]]:
        """
        Example:

            .. code-block:: python

                requests = {
                    base_ontology.Sentence:
                        {
                            "component": ["dummy"],
                            "fields": ["speaker"],
                        },
                    base_ontology.Token: ["pos", "sense""],
                    base_ontology.EntityMention: {
                        "unit": "Token",
                    },
                }
                pack.get_data(base_ontology.Sentence, requests)

        Args:
            context_type (str): The granularity of the data context, which
                could be any ``Annotation`` type.
            request (dict): The entry types and fields required.
                The keys of the requests dict are the required entry types
                and the value should be either:

                - a list of field names or
                - a dict which accepts three keys: `"fields"`, `"component"`,
                  and `"unit"`.

                    - By setting `"fields"` (list), users
                      specify the requested fields of the entry. If "fields"
                      is not specified, only the default fields will be
                      returned.
                    - By setting `"component"` (list), users
                      can specify the components by which the entries are
                      generated. If `"component"` is not specified, will return
                      entries generated by all components.
                    - By setting `"unit"` (string), users can
                      specify a unit by which the annotations are indexed.

                Note that for all annotation types, `"text"` and `"span"`
                fields are returned by default; for all link types, `"child"`
                and `"parent"` fields are returned by default.
            skip_k (int): Will skip the first `skip_k` instances and generate
                data from the `offset` + 1 th instance.

        Returns:
            A data generator, which generates one piece of data (a dict
            containing the required entries, fields, and context).
        """
        annotation_types: Dict[Type[Annotation], Union[Dict, List]] = dict()
        link_types: Dict[Type[Link], Union[Dict, List]] = dict()
        group_types: Dict[Type[Group], Union[Dict, List]] = dict()
        if request is not None:
            for key, value in request.items():
                if issubclass(key, Annotation):
                    annotation_types[key] = value
                elif issubclass(key, Link):
                    link_types[key] = value
                elif issubclass(key, Group):
                    group_types[key] = value

        context_args = annotation_types.get(context_type)

        context_components, _, context_fields = self._parse_request_args(
            context_type, context_args)

        valid_context_ids: Set[int] = self.get_ids_by_type(context_type)
        if context_components:
            valid_component_id: Set[int] = set()
            for component in context_components:
                valid_component_id |= self.get_ids_by_component(component)
            valid_context_ids &= valid_component_id

        skipped = 0
        # must iterate through a copy here because self.annotations is changing
        for context in list(self.annotations):
            if (context.tid not in valid_context_ids
                    or not isinstance(context, context_type)):
                continue
            if skipped < skip_k:
                skipped += 1
                continue

            data = dict()
            data["context"] = self.text[context.span.begin:context.span.end]
            data["offset"] = context.span.begin

            for field in context_fields:
                data[field] = getattr(context, field)

            if annotation_types:
                for a_type, a_args in annotation_types.items():
                    if issubclass(a_type, context_type):
                        continue
                    if a_type.__name__ in data.keys():
                        raise KeyError(
                            f"Requesting two types of entries with the "
                            f"same class name {a_type.__name__} at the "
                            f"same time is not allowed")
                    data[a_type.__name__] = \
                        self._generate_annotation_entry_data(
                            a_type, a_args, data, context
                        )
            if link_types:
                for l_type, l_args in link_types.items():
                    if l_type.__name__ in data.keys():
                        raise KeyError(
                            f"Requesting two types of entries with the "
                            f"same class name {l_type.__name__} at the "
                            f"same time is not allowed")
                    data[l_type.__name__] = self._generate_link_entry_data(
                        l_type, l_args, data, context)

            if group_types:
                # pylint: disable=unused-variable
                for g_type, g_args in group_types.items():
                    pass

            yield data

    def _parse_request_args(self, a_type, a_args):
        # request which fields generated by which component
        components = None
        unit = None
        fields = set()
        if isinstance(a_args, dict):
            components = a_args.get("component")
            if components is not None and not isinstance(components, Iterable):
                raise TypeError(
                    f"Invalid request format for 'components'. "
                    f"The value of 'components' should be of an iterable type."
                )
            unit = a_args.get("unit")
            if unit is not None and not isinstance(unit, str):
                raise TypeError(f"Invalid request format for 'unit'. "
                                f"The value of 'unit' should be a string.")
            a_args = a_args.get("fields", set())

        if isinstance(a_args, Iterable):
            fields = set(a_args)
        elif a_args is not None:
            raise TypeError(
                f"Invalid request format for '{a_type}'. "
                f"The request should be of an iterable type or a dict.")

        # # check the existence of fields
        #
        # self.field_records
        #
        # for meta_key, meta_val in self.internal_metas.items():
        #     if issubclass(meta_key, a_type):
        #         for meta_c, meta_f in meta_val.fields_created.items():
        #             if components is None or meta_c in components:
        #                 if not fields.issubset(meta_f):
        #                     raise KeyError(
        #                         f"The {a_type} generated by {meta_c} doesn't "
        #                         f"have the fields requested.")

        fields.add("tid")
        return components, unit, fields

    def _generate_annotation_entry_data(self, a_type: Type[Annotation],
                                        a_args: Union[Dict,
                                                      Iterable], data: Dict,
                                        cont: Optional[Annotation]) -> Dict:

        components, unit, fields = self._parse_request_args(a_type, a_args)

        a_dict: Dict[str, Any] = dict()

        a_dict["span"] = []
        a_dict["text"] = []
        for field in fields:
            a_dict[field] = []

        unit_begin = 0
        if unit is not None:
            if unit not in data.keys():
                raise KeyError(f"{unit} is missing in data. You need to "
                               f"request {unit} before {a_type}.")
            a_dict["unit_span"] = []

        cont_begin = cont.span.begin if cont else 0
        annotations: List[Annotation] = self.get_entries(  # type: ignore
            a_type, cont, components)

        for annotation in annotations:
            # we provide span, text (and also tid) by default
            a_dict["span"].append((annotation.span.begin, annotation.span.end))
            a_dict["text"].append(annotation.text)

            for field in fields:
                if field in ("span", "text"):
                    continue
                if field == "context_span":
                    a_dict[field].append((annotation.span.begin - cont_begin,
                                          annotation.span.end - cont_begin))
                    continue

                a_dict[field].append(getattr(annotation, field))

            if unit is not None:
                while not self.index.in_span(data[unit]["tid"][unit_begin],
                                             annotation.span):
                    unit_begin += 1

                unit_span_begin = unit_begin
                unit_span_end = unit_span_begin + 1

                while self.index.in_span(data[unit]["tid"][unit_span_end],
                                         annotation.span):
                    unit_span_end += 1

                a_dict["unit_span"].append((unit_span_begin, unit_span_end))

        for key, value in a_dict.items():
            a_dict[key] = np.array(value)

        return a_dict

    def _generate_link_entry_data(self, a_type: Type[Link],
                                  a_args: Union[Dict, Iterable], data: Dict,
                                  cont: Optional[Annotation]) -> Dict:

        components, unit, fields = self._parse_request_args(a_type, a_args)

        if unit is not None:
            raise ValueError(f"Link entries cannot be indexed by {unit}.")

        a_dict: Dict[str, Any] = dict()
        for field in fields:
            a_dict[field] = []
        a_dict["parent"] = []
        a_dict["child"] = []

        links: List[Link] = self.get(a_type, cont, components)  # type: ignore

        for link in links:
            parent_type = link.ParentType.__name__
            child_type = link.ChildType.__name__

            if parent_type not in data.keys():
                raise KeyError(
                    f"The Parent entry of {a_type} is not requested."
                    f" You should also request {parent_type} with "
                    f"{a_type}")
            if child_type not in data.keys():
                raise KeyError(f"The child entry of {a_type} is not requested."
                               f" You should also request {child_type} with "
                               f"{a_type}")

            a_dict["parent"].append(
                np.where(data[parent_type]["tid"] == link.parent)[0][0])
            a_dict["child"].append(
                np.where(data[child_type]["tid"] == link.child)[0][0])

            for field in fields:
                if field in ("parent", "child"):
                    continue

                a_dict[field].append(getattr(link, field))

        for key, value in a_dict.items():
            a_dict[key] = np.array(value)
        return a_dict

    def get_entries(
        self,
        entry_type: Type[EntryType],
        range_annotation: Optional[Annotation] = None,
        components: Optional[Union[str,
                                   List[str]]] = None) -> Iterable[EntryType]:
        """
        Get ``entry_type`` entries from the span of ``range_annotation`` in a
        DataPack.

        Args:
            entry_type (type): The type of entries requested.
            range_annotation (Annotation, optional): The range of entries
                requested. If `None`, will return valid entries in the range of
                whole data_pack.
            components (str or list, optional): The component generating the
                entries requested. If `None`, will return valid entries
                generated by any component.
        """

        range_begin = range_annotation.span.begin if range_annotation else 0
        range_end = (range_annotation.span.end
                     if range_annotation else self.annotations[-1].span.end)

        # valid type
        valid_id = self.get_ids_by_type(entry_type)
        # valid component
        if components is not None:
            if isinstance(components, str):
                components = [components]
            valid_component_id: Set[int] = set()
            for component in components:
                valid_component_id |= self.get_ids_by_component(component)
            valid_id &= valid_component_id
        # valid span
        if range_annotation is not None:
            coverage_index = self.index.coverage_index(type(range_annotation),
                                                       entry_type)
            if coverage_index is not None:
                valid_id &= coverage_index[range_annotation.tid]

        if issubclass(entry_type, Annotation):
            begin_index = self.annotations.bisect(
                Annotation(self, range_begin, range_begin))
            end_index = self.annotations.bisect(
                Annotation(self, range_end, range_end))
            for annotation in self.annotations[begin_index:end_index]:
                if annotation.tid not in valid_id:
                    continue
                if (range_annotation is None or self.index.in_span(
                        annotation, range_annotation.span)):
                    yield annotation

        elif issubclass(entry_type, (Link, Group)):
            for entry_id in valid_id:
                entry: EntryType = self.get_entry(entry_id)  # type: ignore
                if (range_annotation is None
                        or self.index.in_span(entry, range_annotation.span)):
                    yield entry

    def get(self,
            entry_type: Type[EntryType],
            range_annotation: Optional[Annotation] = None,
            component: Optional[str] = None) -> Iterable[EntryType]:
        return self.get_entries(entry_type, range_annotation, component)
Ejemplo n.º 16
0
class BarGridKernel(Kernel):
    '''
    Store a kernel of n dimensions as a list of bars in the space of dimension
    (n-1). A bar is given by its start and its end coordinates, and corresponds
    to the hull of the viable points in the last dimension for each coordinates
    in the space of dimension (n-1). The order of the dimensions may have been
    changed, and the last dimension of these data may not correspond to the
    last dimension of the viability problem. Therefore the attribute
    ``permutation`` give a matrix describing the permutation of the dimension.
    '''

    def __init__(self, originCoords, oppositeCoords, intervalNumberperaxis, permutation=None, kernelMinPoint=None, kernelMaxPoint=None,data=[], metadata={}):
        super(BarGridKernel, self).__init__(metadata)
        self.originCoords = np.array(originCoords, float)
        self.oppositeCoords = np.array(oppositeCoords, float)
        self.intervalNumberperaxis = np.array(intervalNumberperaxis, int)
        self.bars = SortedList(data)
        if permutation is None:
            self.permutation = np.eye(len(originCoords),dtype = int)
        else:
            self.permutation = permutation
        if kernelMinPoint is None:
            self.kernelMinPoint = []
            for i in range(len(originCoords)):
                 self.kernelMinPoint.append(intervalNumberperaxis[i])
        else :
            self.kernelMinPoint = kernelMinPoint
        if kernelMaxPoint is None:
            self.kernelMaxPoint = [-1] * len(originCoords)
        else :
            self.kernelMaxPoint = kernelMaxPoint
        self.metadata.update(self.getDataAttributes())

    @staticmethod
    def getFormatCode():
        return "bars"

    def getDataAttributes(self):
        da = super(BarGridKernel, self).getDataAttributes()
        da['origin'] = self.originCoords
        da['opposite'] = self.oppositeCoords
        da['intervals'] = self.intervalNumberperaxis
        da['permutation']= self.permutation
        da['maxPoint']= self.kernelMaxPoint
        da['minPoint']= self.kernelMinPoint

        return da

    @classmethod
    def initFromHDF5(cls, metadata, attrs, data):
        '''
        Create an object of class BarGridKernel from attributes and data loaded
        from an HDF5 file. This method is intended to be used by the method
        hdf5common.readKernel
        '''
        return cls(
            originCoords=attrs['origin'],
            oppositeCoords=attrs['opposite'],
            intervalNumberperaxis=attrs['intervals'],
            permutation=attrs['permutation'],
            kernelMinPoint=attrs['minPoint'],
            kernelMaxPoint=attrs['maxPoint'],
            data=data.tolist(),
            metadata=metadata
        )

    def getData(self):
        return np.array(list(self.bars), dtype='int64')

    def getIntervalSizes(self):
        '''
        Give the coordinates of the point of the grid with minimal coordinates
        '''
        intervalsizes = []
        intervalsizes = list((self.oppositeCoords-self.originCoords)/self.intervalNumberperaxis)
        return intervalsizes

    def getMinFrameworkBounds(self):
        return list(self.originCoords-np.array(self.getIntervalSizes())/2)

    def getMaxFrameworkBounds(self):
        return list(self.oppositeCoords+np.array(self.getIntervalSizes())/2)

    def getMinBounds(self):
        '''
        Give the coordinates of the point of the vino with minimal coordinates
        '''
        minbounds = []
        intervalSizes = np.array(self.getIntervalSizes())
        permutOriginCoords = np.dot(self.permutation, self.originCoords)
        permutOppositeCoords = np.dot(self.permutation, self.oppositeCoords)
        permutIntervalNumberperaxis = np.dot(self.permutation, self.intervalNumberperaxis)
        minbounds = list(np.dot(np.transpose(self.permutation),permutOriginCoords+(permutOppositeCoords-permutOriginCoords)*self.kernelMinPoint/permutIntervalNumberperaxis))
        minbounds = minbounds - intervalSizes/2

        return minbounds

    def getMaxBounds(self):
        '''
        Give the coordinates of the point of the vino with maximal coordinates
        '''
        maxbounds = []
        intervalSizes = np.array(self.getIntervalSizes())
        permutOriginCoords = np.dot(self.permutation, self.originCoords)
        permutOppositeCoords = np.dot(self.permutation, self.oppositeCoords)
        permutIntervalNumberperaxis = np.dot(self.permutation, self.intervalNumberperaxis)
        maxbounds = list(np.dot(np.transpose(self.permutation),permutOriginCoords+(permutOppositeCoords-permutOriginCoords)*self.kernelMaxPoint/permutIntervalNumberperaxis))
        maxbounds = maxbounds + intervalSizes/2
        return maxbounds

    def getDataToPlot(self):
        data = []

        permutOriginCoords = np.dot(self.permutation, self.originCoords)
        permutOppositeCoords = np.dot(self.permutation, self.oppositeCoords)
        permutIntervalNumberperaxis = np.dot(self.permutation, self.intervalNumberperaxis)

        for i in range(len(self.bars)):
           data.append([
               permutOriginCoords+(permutOppositeCoords-permutOriginCoords)*np.array(self.bars[i][:-1])/permutIntervalNumberperaxis,
               permutOriginCoords[-1]+(permutOppositeCoords[-1]-permutOriginCoords[-1])*self.bars[i][-1]/permutIntervalNumberperaxis[-1]
           ])

        perm = np.dot(self.permutation,np.arange(len(self.originCoords)))
        data = [self.getMinFrameworkBounds()+self.getMaxFrameworkBounds()+self.getIntervalSizes()+[perm], data]

        return data

    def getTotalPointNumber(self):
        return sum([elt[-1] - elt[-2] + 1 for elt in self.bars])

    def toRegularGridKernel(self):
        '''
        Convert the kernel to the regular grid representation.
        Returns an instance of RegularGridKernel.
        The returned grid is trimed to not include empty portion of grid.
        '''
        minPoint = np.array(self.kernelMinPoint)
        maxPoint = np.array(self.kernelMaxPoint)
        dimensionsExtents = maxPoint - minPoint + 1
        grid = RegularGridKernel(self.originCoords, self.intervalNumberperaxis,
                               dimensionsExtents, metadata=self.metadata)

        for bar in self.bars:
            barPosition = (bar[:-2]-minPoint[:-1]).tolist()
            grid.grid[tuple(barPosition)].put(list(range(bar[-2], bar[-1] + 1)), True)

        return grid

    def intersectionwithBarGridKernel(self,othergrid):
        '''
        Returns an instance of BarGridKernel which is the intersection of two BarGridKernels
        with the same underlying grid characteristics
        '''
        data = []
        grid = BarGridKernel(self.originCoords,self.oppositeCoords,self.intervalNumberperaxis,self.permutation,None,None,data,self.metadata)
        barsindex = 0
        otherbarsindex = 0
        while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)):
            actualbarposition = self.bars[barsindex][:-2]
#            print("actualbarposition[0] ::%d " %actualbarposition[0])

            while (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] < self.bars[barsindex][:-2]):
                otherbarsindex = otherbarsindex + 1
            while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] == self.bars[barsindex][:-2]):
                if othergrid.bars[otherbarsindex][-1] < self.bars[barsindex][-2]:
                    otherbarsindex = otherbarsindex + 1
                elif othergrid.bars[otherbarsindex][-2] > self.bars[barsindex][-1]:
                    barsindex = barsindex + 1
                elif othergrid.bars[otherbarsindex][-1] > self.bars[barsindex][-1]:
                    grid.addBar(self.bars[barsindex][:-2], max(othergrid.bars[otherbarsindex][-2], self.bars[barsindex][-2]), self.bars[barsindex][-1])
                    barsindex = barsindex + 1
                elif othergrid.bars[otherbarsindex][-1] < self.bars[barsindex][-1]:
                    grid.addBar(self.bars[barsindex][:-2], max(othergrid.bars[otherbarsindex][-2], self.bars[barsindex][-2]), othergrid.bars[otherbarsindex][-1])
                    otherbarsindex = otherbarsindex + 1
                else:
                    grid.addBar(self.bars[barsindex][:-2], max(othergrid.bars[otherbarsindex][-2], self.bars[barsindex][-2]), othergrid.bars[otherbarsindex][-1])
                    otherbarsindex = otherbarsindex + 1
                    barsindex = barsindex + 1

            while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] > self.bars[barsindex][:-2]):
                barsindex = barsindex + 1

        return grid

    def MinusBarGridKernel(self,othergrid):
        '''
        Returns an instance of BarGridKernel which is the element of the BarGridKernels
        which are not in the other one. The Bargridkernel have the same underlying grid characteristics
        '''
        data = []
        grid = BarGridKernel(self.originCoords,self.oppositeCoords,self.intervalNumberperaxis,self.permutation,None,None,data,self.metadata)
        barsindex = 0
        otherbarsindex = 0
        while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)):
            while (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] < self.bars[barsindex][:-2]):
                otherbarsindex = otherbarsindex + 1
            alreadycut = False
            while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] == self.bars[barsindex][:-2]):
                if othergrid.bars[otherbarsindex][-1] < self.bars[barsindex][-2]:
                    otherbarsindex = otherbarsindex + 1
                elif othergrid.bars[otherbarsindex][-1] >= self.bars[barsindex][-1]:
                    if othergrid.bars[otherbarsindex][-2] > self.bars[barsindex][-2]:
                        if alreadycut:
                            grid.addBar(self.bars[barsindex][:-2], remember, othergrid.bars[otherbarsindex][-2]-1)
                        else :
                            grid.addBar(self.bars[barsindex][:-2], self.bars[barsindex][-2], othergrid.bars[otherbarsindex][-2]-1)
                    barsindex = barsindex + 1
                    alreadycut = False
                else :
                    if othergrid.bars[otherbarsindex][-2] > self.bars[barsindex][-2]:
                        if alreadycut:
                            grid.addBar(self.bars[barsindex][:-2], remember, othergrid.bars[otherbarsindex][-2]-1)
                        else :
                            grid.addBar(self.bars[barsindex][:-2], self.bars[barsindex][-2], othergrid.bars[otherbarsindex][-2]-1)
                    alreadycut = True
                    remember = othergrid.bars[otherbarsindex][-1]+1
                    otherbarsindex = otherbarsindex + 1

            while (barsindex < len(self.bars)) and (otherbarsindex < len(othergrid.bars)) and (othergrid.bars[otherbarsindex][:-2] > self.bars[barsindex][:-2]):
                    if alreadycut:
                        grid.addBar(self.bars[barsindex][:-2], remember, self.bars[barsindex][-1])
                        barsindex = barsindex + 1
                        alreadycut = False
                    else :
                        grid.addBar(self.bars[barsindex][:-2], self.bars[barsindex][-2], self.bars[barsindex][-1])
                        barsindex = barsindex + 1
        if (otherbarsindex >= len(othergrid.bars)):
            while (barsindex < len(self.bars)):
                    if alreadycut:
                        grid.addBar(self.bars[barsindex][:-2], remember, self.bars[barsindex][-1])
                        barsindex = barsindex + 1
                        alreadycut = False
                    else :
                        grid.addBar(self.bars[barsindex][:-2], self.bars[barsindex][-2], self.bars[barsindex][-1])
                        barsindex = barsindex + 1

        return grid


    def toBarGridKernel(self, newOriginCoords, newOppositeCoords, newIntervalNumberperaxis):
        '''
        Convert a BarGridKernel to another BarGridKernel with another underlying grid.
        Returns an instance of BarGridKernel.
        '''
        dimension = len(self.originCoords)
        actualbarposition = np.zeros(dimension-1,int)
        barsindex = 0
        # converting to numpy arrays
        newOriginCoords = np.array(newOriginCoords,float)
        newIntervalNumberperaxis = np.array(newIntervalNumberperaxis,float)
        permutnewOriginCoords = np.dot(self.permutation, newOriginCoords)
        # permuting coordinates
        permutnewIntervalNumberperaxis = np.dot(self.permutation, newIntervalNumberperaxis)
        permutnewpas = np.dot(self.permutation,(np.array(newOppositeCoords,float) - newOriginCoords) / newIntervalNumberperaxis)
        permutOriginCoords = np.dot(self.permutation, self.originCoords)
        permutinversepas = np.dot(self.permutation, self.intervalNumberperaxis / (self.oppositeCoords - self.originCoords))
        data = []
        grid = BarGridKernel(newOriginCoords,newOppositeCoords,newIntervalNumberperaxis,self.permutation,None,None,data,self.metadata)
#        oups = 0
#        while (oups < 1) :
#            oups = 1
        while(actualbarposition[0]<permutnewIntervalNumberperaxis[0]+1):
            realpoint = permutnewOriginCoords[:-1] + actualbarposition * permutnewpas[:-1]
            intpoint = (realpoint-permutOriginCoords[:-1]) * permutinversepas[:-1]
            intpoint = [int(e+0.5) for e in intpoint]
            while (barsindex < len(self.bars)) and (self.bars[barsindex][:2] < intpoint):
                barsindex = barsindex+1
            barinprocess = False
#            print intpoint
            while (barsindex < len(self.bars)) and (self.bars[barsindex][:-2] == intpoint):
                inf = self.bars[barsindex][-2]
                realinf = inf/permutinversepas[-1] +permutOriginCoords[-1]
                intinf = int((realinf-permutnewOriginCoords[-1])/permutnewpas[-1]+0.5)
                sup = self.bars[barsindex][-1]
                realsup = sup/permutinversepas[-1] +permutOriginCoords[-1]
                intsup = int((realsup-permutnewOriginCoords[-1])/permutnewpas[-1]+0.5)
#                print realinf
#                print realsup
#                print intinf
#                print intsup

                if (intinf<permutnewIntervalNumberperaxis[-1]+1) or (intsup >=0):
                    if barinprocess == True :
                        if intinf == grid.bars[-1][-1]:
                            grid.bars[-1][-1] = min(intsup,permutnewIntervalNumberperaxis[-1]+1)
                        else :
                            grid.addBar(actualbarposition.tolist(), max(intinf,0), min(intsup,permutnewIntervalNumberperaxis[-1]+1))
                    else :
                        grid.addBar(actualbarposition.tolist(), max(intinf,0), min(intsup,permutnewIntervalNumberperaxis[-1]+1))
                        barinprocess = True
                barsindex = barsindex+1
            for i in range(dimension-1):
                if ((i == dimension - 2) or (actualbarposition[dimension-2-i]<permutnewIntervalNumberperaxis[dimension-2-i])):
                    actualbarposition[dimension-2-i] = actualbarposition[dimension-2-i]+1
                    break
                else :
                    actualbarposition[dimension-2-i] = 0
        return grid


    def addBar(self, coords, inf, sup):
        # First, we collect the bars already present at the position 'coords'
        # and we merge the bar to add with the existing ones
        # two bars will be merged if at least they touch themselves
        # "touch" means that a lower bound of one bar is equals to the (upper bound of the other one) + 1
        insertion_point = self.bars.bisect(coords)
        merged = False
        mergedBarsToRemove=[]
        rightExpanded = False
        while insertion_point<len(self.bars) and self.bars[insertion_point][:-2]==coords:
            if inf > self.bars[insertion_point][-1] + 1:
                # the new bar doesn't touch the right of the current one
                # we should test if it isn't equals to the upper bound + 1 to ensure that it doesn't touch
                insertion_point += 1
                continue;
            if rightExpanded:
                # a previous bar has been modified, we check that it doesn't cross or touch the current bar
                if self.bars[insertion_point][-2] <= self.bars[insertion_point-1][-1] + 1:
                    # the previous bar now intersects the lower bound of the current one
                    # so let's merge the two bars
                    self.bars[insertion_point][-2] = self.bars[insertion_point-1][-2]
                    if self.bars[insertion_point][-2] <= self.bars[insertion_point-1][-2]:
                        # the previous bar completly overlaps the current one
                        self.bars[insertion_point][-2] = self.bars[insertion_point-1][-2]
                        mergedBarsToRemove.append(insertion_point-1)
                        rightExpanded = True
            elif inf >= self.bars[insertion_point][-2] and inf <= self.bars[insertion_point][-1] + 1:
                # the lower bound of the inserted bar is inside the current one
                merged = True
                if sup > self.bars[insertion_point][-1]:
                    # the upper bound is outside the current bar, so we update the upper bound
                    self.bars[insertion_point][-1] = sup
                    rightExpanded = True
            elif inf < self.bars[insertion_point][-2]:
                # the lower bound of the inserted bar is before the current one
                if sup >= self.bars[insertion_point][-2] - 1:
                    # the inserted bar crosses or touches the current bar, so we update the lower bound
                    self.bars[insertion_point][-2] = inf
                    merged = True
                    if sup > self.bars[insertion_point][-1]:
                        # the inserted bound is globally bigger than the current one, so we update also the upper bound
                        self.bars[insertion_point][-1] = sup
                        rightExpanded = True
            insertion_point += 1
        for index in reversed(mergedBarsToRemove):
            del self.bars[index]
        if not merged:
            self.bars.add(coords[:] + [inf,sup])
        self.kernelMinPoint[:-1] = [min(x) for x in zip(self.kernelMinPoint[:-1],coords)]
        self.kernelMinPoint[-1] = min(self.kernelMinPoint[-1], inf)
        self.kernelMaxPoint[:-1] = [max(x) for x in zip(self.kernelMaxPoint[:-1],coords)]
        self.kernelMaxPoint[-1] = max(self.kernelMaxPoint[-1], sup)

    def getBars(self):
        return self.bars

    def isInSet(self, point):
        '''
        Returns if point belongs to the BarGridKernel.
        This method will find the cell where to lookup a bar containing the point. If not,
        the point is not considered in the set.
        Technical details:
        In a BarGrid, each cell of the (n-1) dimensional space is stored by its
        index inside a matrix between two opposite points and the number of cells in each dimension.
        First, this method will compute the index(es) of the cell where to lookup for bars,
        then it will look for the last dimension to check if the point is inside a cell
        covered by a bar in the selected cell of the (n-1) dimensional space.
        Note that if the point is exactly between several cells (on a vertex or on an edge),
        the method will check the bars of all the touching cells.
        Let's consider s(i) the size of a cell (the step size) on the dimension i,
        p(i) the coordinates of the point requested on the dimension i, and
        c(i) the coordinates of the center of a cell of the BarGrid in the dimension i,
        thus we consider that the point p is inside the cell c if:
         - p(i)>=c(i)-s(i)/2 (after the left side of the cell)
         - p(i)<=c(i)+s(i)/2 (before the right side of the cell)
         - for i belongs to [0;n-1]
        '''
        result = False
        point = np.array(point,float)
        # first we need to project the point into the cells coordinate system
        point_int = self.intervalNumberperaxis * (point - self.originCoords)/(self.oppositeCoords - self.originCoords)
        point_int = np.dot(self.permutation, np.transpose(point_int))
        points = [point_int]
        for i,coord in enumerate(point_int):
            new_points = []
            for p in points:
                if (coord%1)==0.5: # the point is exactly between two cells on the current dimension
                    left = [pp for pp in p]
                    left[i] = int(math.floor(left[i]))
                    right = [pp for pp in p]
                    right[i] = int(math.ceil(right[i]))
                    new_points.extend([left, right])
                else:
                    # we just need to round to the nearest cell center
                    new_point = [pp for pp in p]
                    new_point[i] = int(round(new_point[i]))
                    new_points.append(new_point)
            points=new_points
        l = len(point)
        # we will look at each bar if they are positioned in the coordinates
        # in (n-1) dimensions space than our point
        for point in points:
            candidateBar = False
            for bar in self.bars:
                if point[:-1] == bar[:-2]:
                    # we have reached the interesting zone
                    candidateBar = True
                    # is our point in the bar?
                    if (point[l-1] >= bar[l-1]-0.5) and (point[l-1] <= bar[l] + 0.5):
                        result = True
                        break
                elif candidateBar:
                    # we have passed the position in (n-1) dimensions space, so we can't find candidates anymore
                    break
        return result

    def permute(self,permutation):
      '''
      Create a BarGrid corresponding to the same data as the initial one but with a different permutation of the variables :  np.dot(np.transpose(permutation),self.permutation)
      instead of self.permutation
      '''

      griddata = []
      unitbars = []
      dimension = len(self.originCoords)
      matid = np.identity(dimension,dtype = int)
      b = False
      for i in range(dimension):
        for j in range(dimension):
            if permutation[i][j] != matid[i][j]:
                b = True
      if b:
          permutegrid = BarGridKernel(self.originCoords,self.oppositeCoords,self.intervalNumberperaxis,np.dot(np.transpose(permutation),self.permutation),np.dot(np.transpose(permutation),self.kernelMinPoint),np.dot(np.transpose(permutation),self.kernelMaxPoint),griddata,self.metadata)

          if permutation[dimension-1][dimension-1] == 0:
                barposition = [0]*(dimension-1)
                increment = [0]*len(barposition)
                increment.append(1)
                oldincrement = list(np.dot(permutation,np.array(increment,int)))[:-1]
                oldindex = oldincrement.index(1)
                newincrement = list(np.dot(np.transpose(permutation),np.array(increment,int)))[:-1]
                newindex = newincrement.index(1)
                NmaxUsefullBars = list(np.dot(self.permutation, self.intervalNumberperaxis))[oldindex]
                barposition.append(0)

                permutnewIntervalNumberperaxis = np.dot(permutegrid.permutation, permutegrid.intervalNumberperaxis)
                if (newindex == 0):
                    indexbar = 1
                else :
                    indexbar = 0

                if dimension == 2:
        #                   print "barposition"
        #                   print barposition
                            usefuloldbars = []
                            newbars = []
                            oldbarposition = list(np.dot(permutation,np.array(barposition,int)))[:-1]
                            for i in range(NmaxUsefullBars+1):
                                oldbarposition[oldindex] = i
        #                       print oldbarposition
                                insertion_point = self.bars.bisect(oldbarposition)
                                while insertion_point<len(self.bars) and self.bars[insertion_point][:-2]==oldbarposition:
                                    usefuloldbars.append(self.bars[insertion_point])
                                    insertion_point = insertion_point+1
        #                   print usefuloldbars

                            for oldbar in usefuloldbars:
        #                       print "newoldbar"
        #                       print oldbar
                                level = oldbar[oldindex]
                                unitbar = barposition[:-1] + [level,level]
                                mini = oldbar[-2]
                                maxi = oldbar[-1]
                                newbartoupdateindex = mini
                                if newbars:
                                    k = 0
                                    while k <len(newbars):
                                        newbar = newbars[k]
        #                               print "ole"
        #                               print newbars
        #                               print newbar
        #                               print k
                                        if (newbar[newindex] > maxi):
                                            break
                                        elif (newbar[newindex] >= mini):
        #                                   print newbar[newindex]
                                            if (newbar[newindex] > newbartoupdateindex):
                                                for l in range(newbartoupdateindex,newbar[newindex]):
                                                    unitbar[newindex] = l
                                                    newbars.insert(k,copy.copy(unitbar))
                                                    k=k+1
                                                newbartoupdateindex = newbar[newindex]
                                            if (newbar[-1] == level-1):
                                                newbar[-1] = level
                                                newbartoupdateindex = newbar[newindex]+1
                                        k = k+1
                                    for l in range(newbartoupdateindex,maxi+1):
                                        unitbar[newindex] = l
                                        newbars.insert(k,copy.copy(unitbar))
                                        k = k+1
                                else :
                                    for l in range(mini,maxi+1):
                                        unitbar[newindex] = l
                                        newbars.append(copy.copy(unitbar))
        #                   print "newbars"
        #                   print newbars
                            permutegrid.bars.update(newbars)

                else:
                        while(barposition[indexbar]<permutnewIntervalNumberperaxis[indexbar]+1):
        #                   print "barposition"
        #                   print barposition
                            usefuloldbars = []
                            newbars = []
                            oldbarposition = list(np.dot(permutation,np.array(barposition,int)))[:-1]
                            for i in range(NmaxUsefullBars+1):
                                oldbarposition[oldindex] = i
        #                       print oldbarposition
                                insertion_point = self.bars.bisect(oldbarposition)
                                while insertion_point<len(self.bars) and self.bars[insertion_point][:-2]==oldbarposition:
                                    usefuloldbars.append(self.bars[insertion_point])
                                    insertion_point = insertion_point+1
        #                   print usefuloldbars

                            for oldbar in usefuloldbars:
        #                       print "newoldbar"
        #                       print oldbar
                                level = oldbar[oldindex]
                                unitbar = barposition[:-1] + [level,level]
                                mini = oldbar[-2]
                                maxi = oldbar[-1]
                                newbartoupdateindex = mini
                                if newbars:
                                    k = 0
                                    while k <len(newbars):
                                        newbar = newbars[k]
        #                               print "ole"
        #                               print newbars
        #                               print newbar
        #                               print k
                                        if (newbar[newindex] > maxi):
                                            break
                                        elif (newbar[newindex] >= mini):
        #                                   print newbar[newindex]
                                            if (newbar[newindex] > newbartoupdateindex):
                                                for l in range(newbartoupdateindex,newbar[newindex]):
                                                    unitbar[newindex] = l
                                                    newbars.insert(k,copy.copy(unitbar))
                                                    k=k+1
                                                newbartoupdateindex = newbar[newindex]
                                            if (newbar[-1] == level-1):
                                                newbar[-1] = level
                                                newbartoupdateindex = newbar[newindex]+1
                                        k = k+1
                                    for l in range(newbartoupdateindex,maxi+1):
                                        unitbar[newindex] = l
                                        newbars.insert(k,copy.copy(unitbar))
                                        k = k+1
                                else :
                                    for l in range(mini,maxi+1):
                                        unitbar[newindex] = l
                                        newbars.append(copy.copy(unitbar))
        #                   print "newbars"
        #                   print newbars
                            permutegrid.bars.update(newbars)

                            for i in range(dimension-1):
                                if ((dimension-2-i) != newindex):
                                    if ((i == dimension - 2- indexbar) or (barposition[dimension-2-i]<permutnewIntervalNumberperaxis[dimension-2-i])):
                                        barposition[dimension-2-i] = barposition[dimension-2-i]+1
                                        break
                                    else :
                                        barposition[dimension-2-i] = 0
          else:
              for bar in self.bars :
                  tpermutation = np.transpose(permutation)
                  permutegrid.bars.add(list(np.dot(tpermutation,bar[:-1]))+[bar[-1]])
      else:
          permutegrid = BarGridKernel(self.originCoords,self.oppositeCoords,self.intervalNumberperaxis,np.dot(np.transpose(permutation),self.permutation),np.dot(np.transpose(permutation),self.kernelMinPoint),np.dot(np.transpose(permutation),self.kernelMaxPoint),list(self.bars),self.metadata)

      return permutegrid

    def buildNewBars(self,barposition,permutation,data):
        newdata = []
        unitbar = []
        increment = [0]*len(barposition)
        increment.append(1)
        newincrement = list(np.dot(np.transpose(permutation),np.array(increment,int)))[:-1]
        newindex = newincrement.index(1)
        oldincrement = list(np.dot(permutation,np.array(increment,int)))[:-1]
        oldindex = oldincrement.index(1)

        NmaxNewBars = list(np.dot(self.permutation, self.intervalNumberperaxis))[-1]
#        print newincrement
#        print newindex
#        print oldincrement
#        print oldindex

#        print NmaxNewBars

        for oldbar in data:
            level = oldbar[oldindex]
            unitbar = barposition + [level,level]
            mini = oldbar[-2]
            maxi = oldbar[-1]
            newbartoupdateindex = mini
            if newdata:
                for k in range(len(newdata)):
#                   print "ole"
#                    print newdata
#                   print k
                    newbar = newdata[k]
                    if (newbar[newindex] >= mini) and (newbar[newindex] <= maxi):
#                       print newbar[newindex]
                        if (newbar[newindex] > newbartoupdateindex):
                            for l in range(newbartoupdateindex,newbar[newindex]):
                                unitbar[newindex] = l
                                newdata.insert(k,copy.copy(unitbar))
                                k=k+1
                        if (newbar[-1] == level-1):
                            newbar[-1] = level
                            newbartoupdateindex = newbar[newindex]+1
                for l in range(newbartoupdateindex,maxi+1):
                    unitbar[newindex] = l
                    newdata.insert(len(newdata),copy.copy(unitbar))
            else :
                for l in range(mini,maxi+1):
                    unitbar[newindex] = l
                    newdata.append(copy.copy(unitbar))

        return newdata

    def findUsefullBars(self,barposition,permutation):
        data = []
        increment = [0]*len(barposition)
        barposition.append(0)
        increment.append(1)
        oldincrement = list(np.dot(permutation,np.array(increment,int)))[:-1]
        index = oldincrement.index(1)
        oldbarposition = list(np.dot(permutation,np.array(barposition,int)))[:-1]
        NmaxUsefullBars = list(np.dot(self.permutation, self.intervalNumberperaxis))[index]
#        print oldbarposition
#        print oldincrement
#        print index
#        print NmaxUsefullBars

        for i in range(NmaxUsefullBars+1):
            oldbarposition[index] = i
            insertion_point = self.bars.bisect(oldbarposition)
#            print oldbarposition
#            print insertion_point
            while insertion_point<len(self.bars) and self.bars[insertion_point][:-2]==oldbarposition:
                data.append(self.bars[insertion_point])
                insertion_point = insertion_point+1

        return data
Ejemplo n.º 17
0
from random import uniform
from sortedcontainers import SortedList

x = [uniform(0,1) for i in xrange(20000)]
val = x[4500]
%time x.index(val)

rev_dict = dict(zip(x, range(20000)))
%time rev_dict[val]

rev_bis = SortedList((v,i) for (i,v) in enumerate(x))
%time rev_bis.bisect((val,))




Ejemplo n.º 18
0
# 315. Count of Smaller Numbers After Self
# https://leetcode.com/problems/count-of-smaller-numbers-after-self

from sortedcontainers import SortedList
​
class Solution:
    def countSmaller(self, nums: List[int]) -> List[int]:
        n = len(nums)
        res = [0] * n
        sl = SortedList([nums[-1]])
        
        for i in range(n - 2, -1, -1):
            index = sl.bisect(nums[i] - 1)
            
            res[i] = index
            
            sl.add(nums[i])
        
        return res
Ejemplo n.º 19
0
class PortfolioHistory(SortedDict):
    """Represents the historical holdings of a portfolio.

  Usually this class should only be instantiated by GetPortfolio.
  """
    def __init__(self, user_id):
        super(PortfolioHistory, self).__init__()
        self._user_id = user_id
        with sql.GetCursor() as cursor:
            cursor.execute(
                'SELECT type, timestamp, in_symbol, in_amount, out_symbol, out_amount '
                'FROM transactions where user_id = %s' % user_id)
            self._transactions = SortedList([
                Transaction(type=t[0],
                            timestamp=t[1],
                            in_symbol=t[2],
                            in_amount=t[3],
                            out_symbol=t[4],
                            out_amount=t[5]) for t in cursor.fetchall()
            ])
        self.InitFromTransactions()

    def InitFromTransactions(self):
        # TODO(brandonsalmon): If it becomes necessary, we can greatly improve
        # the performance of !buy, !sell, !trade, by adding a transaction cursor
        # and not reinitializing all transactions every time.
        self.clear()
        for t in self._transactions:
            if t.type == "INIT":
                self[t.timestamp] = {}
                continue
            if t.timestamp not in self:
                bisect_point = self.bisect(t.timestamp)
                if (bisect_point) is 0:
                    copy = {}
                else:
                    copy = self[self._list[bisect_point - 1]].copy()
                self[t.timestamp] = copy
            if t.in_symbol:
                if t.in_symbol not in self[t.timestamp]:
                    self[t.timestamp][t.in_symbol] = 0
                self[t.timestamp][t.in_symbol] += t.in_amount
            if t.out_symbol:
                if t.out_symbol not in self[t.timestamp]:
                    raise Exception(
                        '%s tried to remove coin %s they didn\'t own' %
                        (self._user_id, t.out_symbol))
                self[t.timestamp][t.out_symbol] -= t.out_amount
                if self[t.timestamp][t.out_symbol] < 1e-10:
                    del self[t.timestamp][t.out_symbol]

    def CreationDate(self):
        return self._transactions[0].timestamp

    def GetValueList(self, t_list):
        return [self.Value(t) for t in t_list]

    def GetChange(self, timestamp=None, timedelta='24h'):
        dt = datetime.fromtimestamp(timestamp) if timestamp else datetime.now()
        old_timestamp = (dt - util.GetTimeDelta(timedelta)).timestamp()
        old_value = self.Value(old_timestamp)
        new_value = self.Value(timestamp)
        if old_value != 0:
            return '%.2f%s' % (100 * (new_value - old_value) / old_value, '%')
        elif new_value == 0:
            return "No change"
        elif new_value > 0:
            return "+Inf%"
        else:
            return "-Inf%"

    def ClearRemote(self):
        with sql.GetCursor() as cursor:
            cursor.execute('DELETE FROM transactions where user_id = %s' %
                           self._user_id)
        self.clear()

    def Init(self, tuples, timestamp=None):
        """Takes a list of tuples of (symbol, amount)."""
        timestamp = int(timestamp if timestamp else time.time())
        with sql.GetCursor() as cursor:
            cursor.execute(
                'INSERT INTO transactions (user_id, type, timestamp) '
                'values (%s, "%s", %s)' % (self._user_id, "INIT", timestamp))

        transaction = Transaction(type="INIT", timestamp=timestamp)
        self._transactions.insert(self._transactions.bisect(transaction),
                                  transaction)
        for t in tuples:
            self.Buy(t[0], t[1], timestamp, init=False)
        self.InitFromTransactions()

    def Buy(self, symbol, amount, timestamp=None, init=True):
        timestamp = int(timestamp if timestamp else time.time())
        with sql.GetCursor() as cursor:
            cursor.execute(
                'INSERT INTO transactions (user_id, type, timestamp, in_symbol, in_amount) '
                'values (%s, "%s", %s, "%s", %s)' %
                (self._user_id, "BUY", timestamp, symbol.upper(), amount))
        transaction = Transaction(type="BUY",
                                  timestamp=timestamp,
                                  in_symbol=symbol.upper(),
                                  in_amount=amount)
        self._transactions.insert(self._transactions.bisect(transaction),
                                  transaction)
        if init:
            self.InitFromTransactions()

    def Sell(self, symbol, amount, timestamp=None):
        timestamp = int(timestamp if timestamp else time.time())
        with sql.GetCursor() as cursor:
            cursor.execute(
                'INSERT INTO transactions (user_id, type, timestamp, out_symbol, out_amount) '
                'values (%s, "%s", %s, "%s", %s)' %
                (self._user_id, "SELL", timestamp, symbol.upper(), amount))
        transaction = Transaction(type="SELL",
                                  timestamp=timestamp,
                                  out_symbol=symbol.upper(),
                                  out_amount=amount)
        self._transactions.insert(self._transactions.bisect(transaction),
                                  transaction)
        self.InitFromTransactions()

    def Trade(self,
              in_symbol,
              in_amount,
              out_symbol,
              out_amount,
              timestamp=None):
        timestamp = int(timestamp if timestamp else time.time())
        with sql.GetCursor() as cursor:
            cursor.execute(
                'INSERT INTO transactions (user_id, type, timestamp, in_symbol, in_amount, '
                'out_symbol, out_amount) values (%s, "%s", %s, "%s", %s, "%s", %s)'
                % (self._user_id, "SELL", timestamp, in_symbol.upper(),
                   in_amount, out_symbol.upper(), out_amount))
        transaction = Transaction(type="TRADE",
                                  timestamp=timestamp,
                                  out_symbol=out_symbol.upper(),
                                  out_amount=out_amount,
                                  in_symbol=in_symbol.upper(),
                                  in_amount=in_amount)
        self._transactions.insert(self._transactions.bisect(transaction),
                                  transaction)
        self.InitFromTransactions()

    def Value(self, timestamp=None):
        try:
            if timestamp:
                bisect_point = self.bisect(timestamp)
                if (bisect_point) is 0:
                    return 0.0
                data = self[self._list[bisect_point - 1]]
            else:
                data = self[self._list[-1]]
        except (IndexError, KeyError):
            return 0.0
        value = 0.0
        for symbol, amount in data.items():
            price = coin_data.GetHistory(symbol).GetValue(timestamp)
            value += amount * price
        return value

    def GetOwnedCurrency(self, timestamp=None):
        try:
            if timestamp:
                bisect_point = self.bisect(timestamp)
                if (bisect_point) is 0:
                    return {}
                return self[self._list[bisect_point - 1]]
            else:
                return self[self._list[-1]]
        except (IndexError, KeyError):
            return {}

    def AsTable(self, timestamp=None):
        tuples = []
        for symbol, amount in self.GetOwnedCurrency(timestamp).items():
            history = coin_data.GetHistory(symbol)
            price = history.GetValue(timestamp)
            curr_value = amount * price
            change_day = history.GetDayChange(timestamp)
            tuples.append([
                symbol, amount,
                '$%.2f (%.2f%s)' % (curr_value, change_day, "%"), curr_value
            ])
        tuples = sorted(tuples, key=lambda x: x[3], reverse=True)
        for t in tuples:
            t.pop()
        return tabulate(tuples, tablefmt='fancy_grid', floatfmt='.4f')

    def BreakTable(self, timestamp=None):
        tuples = []
        for symbol, amount in self.GetOwnedCurrency(timestamp).items():
            price = coin_data.GetHistory(symbol).GetValue(timestamp)
            value_at_t = amount * price
            tuples.append([
                symbol, amount,
                '%.2f%s' % ((value_at_t / self.Value(timestamp)) * 100, "%"),
                (value_at_t / self.Value(timestamp)) * 100
            ])
        tuples = sorted(tuples, key=lambda x: x[3], reverse=True)
        for t in tuples:
            t.pop()
        return tabulate(tuples, tablefmt='fancy_grid', floatfmt='.4f')
Ejemplo n.º 20
0
class PriorityDict(MutableMapping):
    """
    A PriorityDict provides the same methods as a dict. Additionally, a
    PriorityDict efficiently maintains its keys in value sorted order.
    Consequently, the keys method will return the keys in value sorted order,
    the popitem method will remove the item with the highest value, etc.
    """
    def __init__(self, *args, **kwargs):
        """
        A PriorityDict provides the same methods as a dict. Additionally, a
        PriorityDict efficiently maintains its keys in value sorted order.
        Consequently, the keys method will return the keys in value sorted
        order, the popitem method will remove the item with the highest value,
        etc.
        If the first argument is the boolean value False, then it indicates
        that keys are not comparable. By default this setting is True and
        duplicate values are tie-breaked on the key. Using comparable keys
        improves the performance of the PriorityDict.
        An optional *iterable* argument provides an initial series of items to
        populate the PriorityDict.  Each item in the sequence must itself
        contain two items. The first is used as a key in the new dictionary,
        and the second as the key's value. If a given key is seen more than
        once, the last value associated with it is retained in the new
        dictionary.
        If keyword arguments are given, the keywords themselves with their
        associated values are added as items to the dictionary. If a key is
        specified both in the positional argument and as a keyword argument, the
        value associated with the keyword is retained in the dictionary. For
        example, these all return a dictionary equal to ``{"one": 2, "two":
        3}``:
        * ``SortedDict(one=2, two=3)``
        * ``SortedDict({'one': 2, 'two': 3})``
        * ``SortedDict(zip(('one', 'two'), (2, 3)))``
        * ``SortedDict([['two', 3], ['one', 2]])``
        The first example only works for keys that are valid Python
        identifiers; the others work with any valid keys.
        Note that this constructor mimics the Python dict constructor. If
        you're looking for a constructor like collections.Counter(...), see
        PriorityDict.count(...).
        """
        self._dict = dict()

        if len(args) > 0 and isinstance(args[0], bool):
            if args[0]:
                self._list = SortedList()
            else:
                self._list = SortedListWithKey(key=lambda tup: tup[0])
        else:
            self._list = SortedList()

        self.iloc = _IlocWrapper(self)
        self.update(*args, **kwargs)

    def clear(self):
        """Remove all elements from the dictionary."""
        self._dict.clear()
        self._list.clear()

    def clean(self, value=0):
        """
        Remove all items with value less than or equal to `value`.
        Default `value` is 0.
        """
        _list, _dict = self._list, self._dict
        pos = self.bisect_right(value)
        for key in (key for value, key in _list[:pos]):
            del _dict[key]
        del _list[:pos]

    def __contains__(self, key):
        """Return True if and only if *key* is in the dictionary."""
        return key in self._dict

    def __delitem__(self, key):
        """
        Remove ``d[key]`` from *d*.  Raises a KeyError if *key* is not in the
        dictionary.
        """
        value = self._dict[key]
        self._list.remove((value, key))
        del self._dict[key]

    def __getitem__(self, key):
        """
        Return the priority of *key* in *d*.  Raises a KeyError if *key* is not
        in the dictionary.
        """
        return self._dict[key]

    def __iter__(self):
        """
        Create an iterator over the keys of the dictionary ordered by the value
        sort order.
        """
        return iter(key for value, key in self._list)

    def __reversed__(self):
        """
        Create an iterator over the keys of the dictionary ordered by the
        reversed value sort order.
        """
        return iter(key for value, key in reversed(self._list))

    def __len__(self):
        """Return the number of (key, value) pairs in the dictionary."""
        return len(self._dict)

    def __setitem__(self, key, value):
        """Set `d[key]` to *value*."""
        if key in self._dict:
            old_value = self._dict[key]
            self._list.remove((old_value, key))
        self._list.add((value, key))
        self._dict[key] = value

    def copy(self):
        """Create a shallow copy of the dictionary."""
        result = PriorityDict()
        result._dict = self._dict.copy()
        result._list = self._list.copy()
        result.iloc = _IlocWrapper(result)
        return result

    def __copy__(self):
        """Create a shallow copy of the dictionary."""
        return self.copy()

    @classmethod
    def fromkeys(cls, iterable, value=0):
        """
        Create a new dictionary with keys from `iterable` and values set to
        `value`. The default *value* is 0.
        """
        return PriorityDict((key, value) for key in iterable)

    def get(self, key, default=None):
        """
        Return the value for *key* if *key* is in the dictionary, else
        *default*.  If *default* is not given, it defaults to ``None``,
        so that this method never raises a KeyError.
        """
        return self._dict.get(key, default)

    def has_key(self, key):
        """Return True if and only in *key* is in the dictionary."""
        return key in self._dict

    def pop(self, key, default=_NotGiven):
        """
        If *key* is in the dictionary, remove it and return its value,
        else return *default*. If *default* is not given and *key* is not in
        the dictionary, a KeyError is raised.
        """
        if key in self._dict:
            value = self._dict[key]
            self._list.remove((value, key))
            return self._dict.pop(key)
        else:
            if default == _NotGiven:
                raise KeyError
            else:
                return default

    def popitem(self, index=-1):
        """
        Remove and return item at *index* (default: -1). Raises IndexError if
        dict is empty or index is out of range. Negative indices are supported
        as for slice indices.
        """
        value, key = self._list.pop(index)
        del self._dict[key]
        return key, value

    def setdefault(self, key, default=0):
        """
        If *key* is in the dictionary, return its value.  If not, insert *key*
        with a value of *default* and return *default*.  *default* defaults to
        ``0``.
        """
        if key in self._dict:
            return self._dict[key]
        else:
            self._dict[key] = default
            self._list.add((default, key))
            return default

    def elements(self):
        """
        Return an iterator over elements repeating each as many times as its
        count. Elements are returned in value sort-order. If an element’s count
        is less than one, elements() will ignore it.
        """
        values = (repeat(key, value) for value, key in self._list)
        return chain.from_iterable(values)

    def most_common(self, count=None):
        """
        Return a list of the `count` highest priority elements with their
        priority. If `count` is not specified, `most_common` returns *all*
        elements in the dict. Elements with equal counts are ordered by key.
        """
        _list, _dict = self._list, self._dict

        if count is None:
            return [(key, value) for value, key in reversed(_list)]

        end = len(_dict)
        start = end - count

        return [(key, value) for value, key in reversed(_list[start:end])]

    def subtract(self, elements):
        """
        Elements are subtracted from an iterable or from another mapping (or
        counter). Like dict.update() but subtracts counts instead of replacing
        them. Both inputs and outputs may be zero or negative.
        """
        self -= Counter(elements)

    def tally(self, *args, **kwargs):
        """
        Elements are counted from an iterable or added-in from another mapping
        (or counter). Like dict.update() but adds counts instead of replacing
        them. Also, the iterable is expected to be a sequence of elements, not a
        sequence of (key, value) pairs.
        """
        self += Counter(*args, **kwargs)

    @classmethod
    def count(self, *args, **kwargs):
        """
        Consume `args` and `kwargs` with a Counter and use that mapping to
        initialize a PriorityDict.
        """
        return PriorityDict(Counter(*args, **kwargs))

    def update(self, *args, **kwargs):
        """
        Update the dictionary with the key/value pairs from *other*, overwriting
        existing keys.
        *update* accepts either another dictionary object or an iterable of
        key/value pairs (as a tuple or other iterable of length two).  If
        keyword arguments are specified, the dictionary is then updated with
        those key/value pairs: ``d.update(red=1, blue=2)``.
        """
        _list, _dict = self._list, self._dict

        if len(args) == 1 and len(kwargs) == 0 and isinstance(args[0], Mapping):
            items = args[0]
        else:
            items = dict(*args, **kwargs)

        if (10 * len(items)) > len(_dict):
            _dict.update(items)
            _list.clear()
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(items):
                old_value = _dict[key]
                _list.remove((old_value, key))
                _dict[key] = value
                _list.add((value, key))

    def index(self, key):
        """
        Return the smallest *i* such that `d.iloc[i] == key`.  Raises KeyError
        if *key* is not present.
        """
        value = self._dict[key]
        return self._list.index((value, key))

    def bisect_left(self, value):
        """
        Similar to the ``bisect`` module in the standard library, this returns
        an appropriate index to insert *value* in PriorityDict. If *value* is
        already present in PriorityDict, the insertion point will be before (to
        the left of) any existing entries.
        """
        return self._list.bisect_left((value,))

    def bisect(self, value):
        """Same as bisect_left."""
        return self._list.bisect((value,))

    def bisect_right(self, value):
        """
        Same as `bisect_left`, but if *value* is already present in
        PriorityDict, the insertion point will be after (to the right
        of) any existing entries.
        """
        return self._list.bisect_right((value, _Biggest))

    def __iadd__(self, that):
        """Add values from `that` mapping."""
        _list, _dict = self._list, self._dict
        if len(_dict) == 0:
            _dict.update(that)
            _list.update((value, key) for key, value in iteritems(_dict))
        elif len(that) * 3 > len(_dict):
            _list.clear()
            for key, value in iteritems(that):
                if key in _dict:
                    _dict[key] += value
                else:
                    _dict[key] = value
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _list.remove((old_value, key))
                    value = old_value + value
                _dict[key] = value
                _list.add((value, key))
        return self

    def __isub__(self, that):
        """Subtract values from `that` mapping."""
        _list, _dict = self._list, self._dict
        if len(_dict) == 0:
            _dict.clear()
            _list.clear()
        elif len(that) * 3 > len(_dict):
            _list.clear()
            for key, value in iteritems(that):
                if key in _dict:
                    _dict[key] -= value
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _list.remove((old_value, key))
                    value = old_value - value
                    _dict[key] = value
                    _list.add((value, key))
        return self

    def __ior__(self, that):
        """Or values from `that` mapping (max(v1, v2))."""
        _list, _dict = self._list, self._dict
        if len(_dict) == 0:
            _dict.update(that)
            _list.update((value, key) for key, value in iteritems(_dict))
        elif len(that) * 3 > len(_dict):
            _list.clear()
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _dict[key] = old_value if old_value > value else value
                else:
                    _dict[key] = value
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _list.remove((old_value, key))
                    value = old_value if old_value > value else value
                _dict[key] = value
                _list.add((value, key))
        return self

    def __iand__(self, that):
        """And values from `that` mapping (min(v1, v2))."""
        _list, _dict = self._list, self._dict
        if len(_dict) == 0:
            _dict.clear()
            _list.clear()
        elif len(that) * 3 > len(_dict):
            _list.clear()
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _dict[key] = old_value if old_value < value else value
            _list.update((value, key) for key, value in iteritems(_dict))
        else:
            for key, value in iteritems(that):
                if key in _dict:
                    old_value = _dict[key]
                    _list.remove((old_value, key))
                    value = old_value if old_value < value else value
                    _dict[key] = value
                    _list.add((value, key))
        return self

    def __add__(self, that):
        """Add values from this and `that` mapping."""
        result = PriorityDict()
        _list, _dict = result._list, result._dict
        _dict.update(self._dict)
        for key, value in iteritems(that):
            if key in _dict:
                _dict[key] += value
            else:
                _dict[key] = value
        _list.update((value, key) for key, value in iteritems(_dict))
        return result

    def __sub__(self, that):
        """Subtract values in `that` mapping from this."""
        result = PriorityDict()
        _list, _dict = result._list, result._dict
        _dict.update(self._dict)
        for key, value in iteritems(that):
            if key in _dict:
                _dict[key] -= value
        _list.update((value, key) for key, value in iteritems(_dict))
        return result

    def __or__(self, that):
        """Or values from this and `that` mapping."""
        result = PriorityDict()
        _list, _dict = result._list, result._dict
        _dict.update(self._dict)
        for key, value in iteritems(that):
            if key in _dict:
                old_value = _dict[key]
                _dict[key] = old_value if old_value > value else value
            else:
                _dict[key] = value
        _list.update((value, key) for key, value in iteritems(_dict))
        return result

    def __and__(self, that):
        """And values from this and `that` mapping."""
        result = PriorityDict()
        _list, _dict = result._list, result._dict
        _dict.update(self._dict)
        for key, value in iteritems(that):
            if key in _dict:
                old_value = _dict[key]
                _dict[key] = old_value if old_value < value else value
        _list.update((value, key) for key, value in iteritems(_dict))
        return result

    def __eq__(self, that):
        """Compare two mappings for equality."""
        if isinstance(that, PriorityDict):
            that = that._dict
        return self._dict == that

    def __ne__(self, that):
        """Compare two mappings for inequality."""
        if isinstance(that, PriorityDict):
            that = that._dict
        return self._dict != that

    def __lt__(self, that):
        """Compare two mappings for less than."""
        if isinstance(that, PriorityDict):
            that = that._dict
        _dict = self._dict
        return (_dict != that and self <= that)

    def __le__(self, that):
        """Compare two mappings for less than equal."""
        if isinstance(that, PriorityDict):
            that = that._dict
        _dict = self._dict
        return (len(_dict) <= len(that) and
                all(_dict[key] <= that[key] if key in that else False
                    for key in _dict))

    def __gt__(self, that):
        """Compare two mappings for greater than."""
        if isinstance(that, PriorityDict):
            that = that._dict
        _dict = self._dict
        return (_dict != that and self >= that)

    def __ge__(self, that):
        """Compare two mappings for greater than equal."""
        if isinstance(that, PriorityDict):
            that = that._dict
        _dict = self._dict
        return (len(_dict) >= len(that) and
                all(_dict[key] >= that[key] if key in _dict else False
                    for key in that))

    def isdisjoint(self, that):
        """
        Return True if no key in `self` is also in `that`.
        This doesn't check that the value is greater than zero.
        To remove keys with value less than or equal to zero see *clean*.
        """
        return not any(key in self for key in that)

    def items(self):
        """
        Return a list of the dictionary's items (``(key, value)``
        pairs). Items are ordered by their value from least to greatest.
        """
        return list((key, value) for value, key in self._list)

    def iteritems(self):
        """
        Return an iterable over the items (``(key, value)`` pairs) of the
        dictionary. Items are ordered by their value from least to greatest.
        """
        return iter((key, value) for value, key in self._list)

    @not26
    def viewitems(self):
        """
        In Python 2.7 and later, return a new `ItemsView` of the dictionary's
        items. Beware iterating the `ItemsView` as items are unordered.
        In Python 2.6, raise a NotImplementedError.
        """
        if hexversion < 0x03000000:
            return self._dict.viewitems()
        else:
            return self._dict.items()

    def keys(self):
        """
        Return a list of the dictionary's keys. Keys are ordered
        by their corresponding value from least to greatest.
        """
        return list(key for value, key in self._list)

    def iterkeys(self):
        """
        Return an iterable over the keys of the dictionary. Keys are ordered
        by their corresponding value from least to greatest.
        """
        return iter(key for value, key in self._list)

    @not26
    def viewkeys(self):
        """
        In Python 2.7 and later, return a new `KeysView` of the dictionary's
        keys. Beware iterating the `KeysView` as keys are unordered.
        In Python 2.6, raise a NotImplementedError.
        """
        if hexversion < 0x03000000:
            return self._dict.viewkeys()
        else:
            return self._dict.keys()

    def values(self):
        """
        Return a list of the dictionary's values. Values are
        ordered from least to greatest.
        """
        return list(value for value, key in self._list)

    def itervalues(self):
        """
        Return an iterable over the values of the dictionary. Values are
        iterated from least to greatest.
        """
        return iter(value for value, key in self._list)

    @not26
    def viewvalues(self):
        """
        In Python 2.7 and later, return a `ValuesView` of the dictionary's
        values. Beware iterating the `ValuesView` as values are unordered.
        In Python 2.6, raise a NotImplementedError.
        """
        if hexversion < 0x03000000:
            return self._dict.viewvalues()
        else:
            return self._dict.values()

    def __repr__(self):
        """Return a string representation of PriorityDict."""
        return 'PriorityDict({0})'.format(repr(dict(self)))

    def _check(self):
        self._list._check()
        assert len(self._dict) == len(self._list)
        assert all(key in self._dict and self._dict[key] == value
                   for value, key in self._list)
class FileLabels():
    def __init__(self, filename, path):
        self.filename = filename
        self.path = path
        self.current_labels = SortedList(key=self._labelsortkey)
        self.updated_labels = SortedList(key=self._labelsortkey)
        self.get_labels_from_file()

        for l in self.current_labels:
            if not ((l.filename == self.filename) and (l.path == self.path)):
                raise Exception("Found label with incorrect filename/path")

    def get_labels_from_file(self):
        self.updated_labels = SortedList(key=self._labelsortkey)
        self.current_labels = SortedList(key=self._labelsortkey)
        self.current_labels.update(
            SrcLabelTool._get_labels(self.filename, self.path, "", "", True,
                                     None))
        self.updated_labels.update(self.current_labels)

    def insert_label(self, label):
        if not ((label.filename == self.filename) and
                (label.path == self.path)):
            raise Exception(
                "Trying to insert label with incorrect filename/path")

        if label in self.current_labels:
            return
        # update the lineno in the following labels
        if label in self.updated_labels:
            self.updated_labels.remove(label)
            self.updated_labels.add(label)
        i = self.updated_labels.bisect(label)
        for l in self.updated_labels[i:]:
            l.lineno += 1
        self.updated_labels.add(label)

    def remove_label(self, label):
        i = self.updated_labels.bisect(label)
        # update the lineno in the following labels
        for l in self.updated_labels[i:]:
            l.lineno -= 1
        self.updated_labels.remove(label)

    def insert_label_list(self, labels):
        sortedlabels = SortedList(key=self._revlabelsortkey)
        sortedlabels.update(labels)
        for i in sortedlabels:
            self.insert_label(i)

    def _labelsortkey(self, l):
        return l.lineno

    def _revlabelsortkey(self, l):
        return -l.lineno

    def update_file(self):
        fullpath = os.path.join(self.path, self.filename)
        f = open(fullpath, "r")
        lines = f.readlines()
        f.close()
        nolabels = [l for l in lines if not SrcLabelTool.is_any_label(l)]
        for l in self.updated_labels:
            nolabels.insert(l.lineno - 1, l.filerepr())

        # rewrite file
        f = open(fullpath, "w")
        for line in nolabels:
            f.write(line)
        f.close()
        self.get_labels_from_file()
Ejemplo n.º 22
0
class InclusionTreeBuilder:
    """
    this class builds a tree of polygons included in one another.
    it works through a sweeping line algorithm.
    also identifies each as a hole or a polygon.
    """
    def __init__(self, polygons):
        # the algorithm works in O(n) (times sorted container's costs) in this way:
        # we have a SortedList of all currently crossed paths
        self.crossed_paths = SortedList()
        # for each polygon, a SortedList of all of its currently crossed paths
        self.polygons = defaultdict(SortedList)
        # when meeting a new polygon for the first time
        # we will insert it in the crossed_paths list ; we get it's top neighbour (smaller)
        # and get the corresponding polygon
        # now if we are contained inside it, we are its child
        # if we are not contained inside it, we are its brother
        # to figure out whether we are inside or not, we look at #paths smaller than us
        # in the neighbour polygon's SortedList

        set_comparer(self)
        # we store all keys used for comparing paths
        # this speeds up keys computations and more importantly removes
        # rounding errors
        self.sweeping_keys = dict()

        polygons_number = self._create_events(polygons)
        self.current_point = None
        self.tree = InclusionTree()
        self.nodes = dict()  # store for each poly its node and father node

        for event in self.events:
            self.execute_event(event)
            if len(self.nodes) == polygons_number:
                return  # no need to finish the sweep once everyone is identified

    def _create_events(self, polygons):
        """
        create all start/end events for each path.
        each event is : a comparison key ; the path.
        """
        self.events = []
        polygons_number = 0
        for height, polygons in polygons.items():
            for polygon in polygons:
                polygons_number += 1
                for segment in polygon_segments(height, polygon):
                    angle = segment.key_angle()
                    print("angle for", segment, "is", angle)
                    for point, event_type in zip(sorted(segment.endpoints),
                                                 (START_EVENT, END_EVENT)):
                        key = (point, event_type, -height)
                        raise Exception("we lack an angle here")
                        self.events.append((key, segment))
                        self.sweeping_keys[(id(segment), point)] =\
                            (point.coordinates[1], angle, -height)

        self.events.sort(key=lambda e: e[0])
        return polygons_number

    def key(self, path):
        """
        returns key at current point for given path.
        """
        key_id = (id(path), self.current_point)
        if key_id in self.sweeping_keys:
            return self.sweeping_keys[key_id]
        else:
            current_x = self.current_point.coordinates[0]
            return (path.vertical_intersection_at(current_x), path.key_angle(),
                    -path.height)

    def execute_event(self, event):
        """
        execute start path or end path event
        """
        event_key, event_path = event
        event_point, event_type = event_key[0:2]

        if event_type == START_EVENT:
            self.current_point = event_point
            self.start_path(event_path)
        else:
            self.end_path(event_path)
            self.current_point = event_point

        if __debug__:
            # very slow
            paths = iter(self.crossed_paths)
            previous_path = next(paths, None)
            for path in paths:
                if self.key(previous_path) >= self.key(path):
                    paths = list(self.crossed_paths)
                    print(paths)
                    print("previous", previous_path, self.key(previous_path))
                    print("current", path, self.key(path))
                    tycat(self.current_point, paths, previous_path, path)
                    raise Exception("pb ordre")
                previous_path = path

    def start_path(self, path):
        """
        handles incoming path
        """
        index = self.crossed_paths.bisect(path)
        self.crossed_paths.insert(index, path)
        polygon = path.polygon_id()
        self.polygons[polygon].add(path)

        if polygon not in self.nodes:
            father_node = self.identify_father_node(path, index)
            new_node = father_node.add_child(path)
            self.nodes[polygon] = (new_node, father_node)
            print("adding", polygon, "as child of", id(father_node.content))

    def identify_father_node(self, path, index):
        """
        identify where polygon is in tree.
        we need the path and its position in crossed paths
        """
        if index == 0:
            # no one above us, we are below root
            return self.tree
        else:
            neighbour_polygon = self.crossed_paths[index - 1].polygon_id()
            above_paths = self.polygons[neighbour_polygon].bisect(path)
            if above_paths % 2:
                # odd neighbour's paths above us
                # we are inside him
                return self.nodes[neighbour_polygon][0]
            else:
                # event neighbour's paths above us
                # we are beside him
                return self.nodes[neighbour_polygon][1]

    def end_path(self, path):
        """
        handles ending path
        """
        print("removing", path, "from", self.crossed_paths)
        self.crossed_paths.remove(path)
        self.polygons[path.polygon_id()].remove(path)
class ParetoFrontier():
    def __init__(self, vertex, lista=None):
        # El último elemento de sucesores va a tener como sucesor a infinito.

        self.sucesores_map = {0: np.inf}
        self.predecesores_map = {np.inf: 0, 0: None}
        self.contenedor = []
        self.sorted_list = SortedList(self.contenedor)
        self.pareto_map = {0: np.inf}
        ##############################################################################
        self.vertex = vertex  # PILAS, ACABO DE AGREGAR ESTE
        self.info_label = {
        }  # A cada etiqueta apunta a una dupla con vértice previo y x_previo ¿hay que incicializar?
        self.lista_labels = []

        if lista != None:
            for label in lista:
                if label == (0, 0):
                    trazador = (None, None)
                else:
                    trazador = None

                self.add(label, trazador)

    def show_pareto(self):
        return self.pareto_map

    def show_pareto2(self):
        return self.pareto_map, self.sucesores_map, self.predecesores_map

    #def to_list(self):
    #   return list(self.sorted_list)

    def list_frontlabels(self):
        ## PILAS, ESTO ES MUY INEFICIENTE Y SÓLO LO TENGO PARA HACER PRINTS, NOTE QUE SIEMPRE RECONSTRUYE
        #LA LISTA DESDE EL PRINCIPIO.
        self.lista_labels = []
        #print('en list_frontlabels sorted_list:', self.sorted_list )
        #print('en list_frontlabels lista labels',self.lista_labels)

        for x in self.sorted_list:
            #print('en list_frontlabels: x en sorted_list',x)
            self.lista_labels.append((x, self.pareto_map[x]))
        print('lista_labels', self.lista_labels)
        return self.lista_labels

    def x_in_pareto(self, x):
        return x in self.pareto_map

    def _xleft(self, x):

        if self.x_in_pareto(x):
            return x
        else:
            i = self.sorted_list.bisect(x)
            if (i == 0):
                return 0
            else:
                return self.sorted_list[i - 1]

    def _yleft(self, x):
        return self.pareto_map[self._xleft(x)]

    def check_dominance(self, label):
        x = label[0]
        y = label[1]
        return self._yleft(x) <= y

    # En la siguiente función está implícito  que las etiquetas se van adicionando a Pareto Frontier
    # una a una.
    #def add(self, label,  pure_pareto=True):
    def add(self, label, trazadorx=None, pure_pareto=True):
        indi_pareto_modified = False
        discard_set = set()
        x = label[0]
        y = label[1]
        x_left = self._xleft(x)

        # SI LABEL ES DOMINADO POR ALGUIEN EN PARETO
        if self.check_dominance(label):
            #if self.pareto_map[x_left] <= y:
            return self, indi_pareto_modified, discard_set

        if x_left != x:

            next_x = self.sucesores_map[x_left]
            self.sucesores_map[x] = next_x
            self.predecesores_map[next_x] = x

            self.sucesores_map[x_left] = x
            self.predecesores_map[x] = x_left

            self.contenedor.append(x)
            self.sorted_list.add(x)
        elif x not in self.sorted_list:
            self.contenedor.append(x)
            self.sorted_list.add(x)

        self.pareto_map[x] = y
        if trazadorx != None:
            self.info_label[x] = trazadorx

        #print('PILAS',(x,y))
        indi_pareto_modified = True

        sucesor = self.sucesores_map[x]
        #print('cuando x es:', x, 'sucesor es:',sucesor)

        if pure_pareto == False:

            while sucesor < np.inf:
                if self.pareto_map[sucesor] > y:
                    self.pareto_map[sucesor] = y
                else:
                    break
                sucesor = self.sucesores_map[sucesor]

        else:
            while sucesor < np.inf:
                #print('ENTRAMOS AL WHILE cuando x es:', x, 'sucesor es:', sucesor)
                # Pilas, cuando una etiqueta se elimina del frente, debemos borrar también la información
                # consignada en info_label
                if self.pareto_map[sucesor] > y:
                    discard_set.add((sucesor, self.pareto_map[sucesor]))
                    del self.pareto_map[sucesor]
                    ########################################
                    del self.info_label[sucesor]
                    ########################################
                    nextt_x = self.sucesores_map[sucesor]
                    self.sucesores_map[x] = nextt_x
                    self.predecesores_map[nextt_x] = x

                    del self.sucesores_map[sucesor]
                    del self.predecesores_map[sucesor]

                    self.contenedor.remove(sucesor)
                    self.sorted_list.remove(sucesor)

                else:
                    break
                sucesor = self.sucesores_map[x]
        return self, indi_pareto_modified, discard_set

    def Delete_label(self, label):
        print('Entramos a Delete_label')
        x = label[0]
        y = label[1]
        if self.x_in_pareto(x):
            print(x, 'está en pareto')
            del self.pareto_map[x]
            print('así queda pareto map después de borrar', x, self.pareto_map)
            ################################################################################
            del self.info_label[
                x]  # puede haber problemas si en info_label no está x PILAS
            print('asi queda info_label después de borrar', x, self.info_label)
            ################################################################################
            predx = self.predecesores_map[x]
            sucx = self.sucesores_map[x]
            self.sucesores_map[predx] = sucx
            self.predecesores_map[sucx] = predx
            del self.sucesores_map[x]
            del self.predecesores_map[x]
            self.contenedor.remove(x)
            self.sorted_list.remove(x)
            print('esta es sorted_list después de borrar', x, self.sorted_list)
            return self
        else:
            #raise ValueError()
            print('No está en Pareto y retorno el mismo ParetoFrontier')
            return self

    def label_track(self, x):
        return self.info_label[x]
Ejemplo n.º 24
0
class DataPack(BasePack[Entry, Link, Group]):
    # pylint: disable=too-many-public-methods
    r"""A :class:`DataPack` contains a piece of natural language text and a
    collection of NLP entries (annotations, links, and groups). The natural
    language text could be a document, paragraph or in any other granularity.

    Args:
        pack_name (str, optional): A name for this data pack.
    """
    def __init__(self, pack_name: Optional[str] = None):
        super().__init__(pack_name)
        self._text = ""

        self.annotations: SortedList[Annotation] = SortedList()
        self.links: SortedList[Link] = SortedList()
        self.groups: SortedList[Group] = SortedList()
        self.generics: SortedList[Generics] = SortedList()

        self.__replace_back_operations: ReplaceOperationsType = []
        self.__processed_original_spans: List[Tuple[Span, Span]] = []
        self.__orig_text_len: int = 0

        self._index: DataIndex = DataIndex()

    def __getstate__(self):
        r"""
        In serialization,
            1) will serialize the annotation sorted list as a normal list;
            2) will not serialize the indices
        """
        state = super().__getstate__()
        state['annotations'] = list(state['annotations'])
        state['links'] = list(state['links'])
        state['groups'] = list(state['groups'])
        state['generics'] = list(state['generics'])
        return state

    def __setstate__(self, state):
        r"""
        In deserialization, we
            1) transform the annotation list back to a sorted list;
            2) initialize the indexes.
            3) Obtain the pack ids.
        """
        super().__setstate__(state)

        # For backward compatibility.
        if 'replace_back_operations' in self.__dict__:
            self.__replace_back_operations = self.__dict__.pop(
                'replace_back_operations')
        if 'processed_original_spans' in self.__dict__:
            self.__processed_original_spans = self.__dict__.pop(
                'processed_original_spans')
        if 'orig_text_len' in self.__dict__:
            self.__orig_text_len = self.__dict__.pop('orig_text_len')

        self.annotations = SortedList(self.annotations)
        self.links = SortedList(self.links)
        self.groups = SortedList(self.groups)
        self.generics = SortedList(self.generics)

        self._index = DataIndex()
        self._index.update_basic_index(list(self.annotations))
        self._index.update_basic_index(list(self.links))
        self._index.update_basic_index(list(self.groups))
        self._index.update_basic_index(list(self.generics))

        for a in self.annotations:
            a.set_pack(self)

        for a in self.links:
            a.set_pack(self)

        for a in self.groups:
            a.set_pack(self)

        for a in self.generics:
            a.set_pack(self)

    def __iter__(self):
        yield from self.annotations
        yield from self.links
        yield from self.groups
        yield from self.generics

    def _init_meta(self, pack_name: Optional[str] = None) -> Meta:
        return Meta(pack_name)

    def _validate(self, entry: EntryType) -> bool:
        return isinstance(entry, SinglePackEntries)

    @property
    def text(self) -> str:
        r"""Return the text of the data pack"""
        return self._text

    @property
    def all_annotations(self) -> Iterator[Annotation]:
        """
        An iterator of all annotations in this data pack.

        Returns: Iterator of all annotations, of
          type :class:"~forte.data.ontology.top.Annotation".

        """
        yield from self.annotations

    @property
    def num_annotations(self) -> int:
        """
        Number of annotations in this data pack.

        Returns: (int) Number of the links.

        """
        return len(self.annotations)

    @property
    def all_links(self) -> Iterator[Link]:
        """
        An iterator of all links in this data pack.

        Returns: Iterator of all links, of
          type :class:"~forte.data.ontology.top.Link".

        """
        yield from self.links

    @property
    def num_links(self) -> int:
        """
        Number of links in this data pack.

        Returns: Number of the links.

        """
        return len(self.links)

    @property
    def all_groups(self) -> Iterator[Group]:
        """
        An iterator of all groups in this data pack.

        Returns: Iterator of all groups, of
          type :class:"~forte.data.ontology.top.Group".

        """
        yield from self.groups

    @property
    def num_groups(self):
        """
        Number of groups in this data pack.

        Returns: Number of groups.

        """
        return len(self.groups)

    @property
    def all_generic_entries(self) -> Iterator[Generics]:
        """
        An iterator of all generic entries in this data pack.

        Returns: Iterator of generic

        """
        yield from self.generics

    @property
    def num_generics_entries(self):
        """
        Number of generics entries in this data pack.

        Returns: Number of generics entries.

        """
        return len(self.generics)

    def get_span_text(self, span: Span) -> str:
        r"""Get the text in the data pack contained in the span

        Args:
            span (Span): Span object which contains a `begin` and an `end` index

        Returns:
            The text within this span
        """
        return self._text[span.begin:span.end]

    def set_text(
            self,
            text: str,
            replace_func: Optional[Callable[[str],
                                            ReplaceOperationsType]] = None):

        if len(text) < len(self._text):
            raise ProcessExecutionException(
                "The new text is overwriting the original one with shorter "
                "length, which might cause unexpected behavior.")

        if len(self._text):
            logging.warning("Need to be cautious when changing the text of a "
                            "data pack, existing entries may get affected. ")

        span_ops = [] if replace_func is None else replace_func(text)

        # The spans should be mutually exclusive
        (self._text, self.__replace_back_operations,
         self.__processed_original_spans,
         self.__orig_text_len) = data_utils_io.modify_text_and_track_ops(
             text, span_ops)

    def get_original_text(self):
        r"""Get original unmodified text from the :class:`DataPack` object.

        Returns:
            Original text after applying the `replace_back_operations` of
            :class:`DataPack` object to the modified text
        """
        original_text, _, _, _ = data_utils_io.modify_text_and_track_ops(
            self._text, self.__replace_back_operations)
        return original_text

    def get_original_span(self,
                          input_processed_span: Span,
                          align_mode: str = "relaxed"):
        r"""Function to obtain span of the original text that aligns with the
        given span of the processed text.

        Args:
            input_processed_span: Span of the processed text for which the
            corresponding span of the original text is desired
            align_mode: The strictness criteria for alignment in the ambiguous
            cases, that is, if a part of input_processed_span spans a part
            of the inserted span, then align_mode controls whether to use the
            span fully or ignore it completely according to the following
            possible values

            - "strict" - do not allow ambiguous input, give ValueError
            - "relaxed" - consider spans on both sides
            - "forward" - align looking forward, that is, ignore the span
            towards the left, but consider the span towards the right
            - "backward" - align looking backwards, that is, ignore the span
            towards the right, but consider the span towards the left

        Returns:
            Span of the original text that aligns with input_processed_span

        Example:
            * Let o-up1, o-up2, ... and m-up1, m-up2, ... denote the unprocessed
              spans of the original and modified string respectively. Note that
              each o-up would have a corresponding m-up of the same size.
            * Let o-pr1, o-pr2, ... and m-pr1, m-pr2, ... denote the processed
              spans of the original and modified string respectively. Note that
              each o-p is modified to a corresponding m-pr that may be of a
              different size than o-pr.
            * Original string:
              <--o-up1--> <-o-pr1-> <----o-up2----> <----o-pr2----> <-o-up3->
            * Modified string:
              <--m-up1--> <----m-pr1----> <----m-up2----> <-m-pr2-> <-m-up3->
            * Note that `self.inverse_original_spans` that contains modified
              processed spans and their corresponding original spans, would look
              like - [(o-pr1, m-pr1), (o-pr2, m-pr2)]

            >> data_pack = DataPack()
            >> original_text = "He plays in the park"
            >> data_pack.set_text(original_text,\
            >>                    lambda _: [(Span(0, 2), "She"))]
            >> data_pack.text
            "She plays in the park"
            >> input_processed_span = Span(0, len("She plays"))
            >> orig_span = data_pack.get_original_span(input_processed_span)
            >> data_pack.get_original_text()[orig_span.begin: orig_span.end]
            "He plays"
        """
        assert align_mode in ["relaxed", "strict", "backward", "forward"]

        req_begin = input_processed_span.begin
        req_end = input_processed_span.end

        def get_original_index(input_index: int, is_begin_index: bool,
                               mode: str) -> int:
            r"""
            Args:
                input_index: begin or end index of the input span
                is_begin_index: if the index is the begin index of the input
                span or the end index of the input span
                mode: alignment mode
            Returns:
                Original index that aligns with input_index
            """
            if len(self.__processed_original_spans) == 0:
                return input_index

            len_processed_text = len(self._text)
            orig_index = None
            prev_end = 0
            for (inverse_span,
                 original_span) in self.__processed_original_spans:
                # check if the input_index lies between one of the unprocessed
                # spans
                if prev_end <= input_index < inverse_span.begin:
                    increment = original_span.begin - inverse_span.begin
                    orig_index = input_index + increment
                # check if the input_index lies between one of the processed
                # spans
                elif inverse_span.begin <= input_index < inverse_span.end:
                    # look backward - backward shift of input_index
                    if is_begin_index and mode in ["backward", "relaxed"]:
                        orig_index = original_span.begin
                    if not is_begin_index and mode == "backward":
                        orig_index = original_span.begin - 1

                    # look forward - forward shift of input_index
                    if is_begin_index and mode == "forward":
                        orig_index = original_span.end
                    if not is_begin_index and mode in ["forward", "relaxed"]:
                        orig_index = original_span.end - 1

                # break if the original index is populated
                if orig_index is not None:
                    break
                prev_end = inverse_span.end

            if orig_index is None:
                # check if the input_index lies between the last unprocessed
                # span
                inverse_span, original_span = self.__processed_original_spans[
                    -1]
                if inverse_span.end <= input_index < len_processed_text:
                    increment = original_span.end - inverse_span.end
                    orig_index = input_index + increment
                else:
                    # check if there input_index is not valid given the
                    # alignment mode or lies outside the processed string
                    raise ValueError(f"The input span either does not adhere "
                                     f"to the {align_mode} alignment mode or "
                                     f"lies outside to the processed string.")
            return orig_index

        orig_begin = get_original_index(req_begin, True, align_mode)
        orig_end = get_original_index(req_end - 1, False, align_mode) + 1

        return Span(orig_begin, orig_end)

    @classmethod
    def deserialize(cls, data_pack_string: str) -> "DataPack":
        """
        Deserialize a Data Pack from a string. This internally calls the
        internal :meth:`~forte.data.base_pack.BasePack._deserialize` function
        from :class:`~forte.data.base_pack.BasePack`.

        Args:
            data_pack_string: The serialized string of a data pack to be
              deserialized.

        Returns:
            An data pack object deserialized from the string.
        """
        return cls._deserialize(data_pack_string)

    def _add_entry(self, entry: EntryType) -> EntryType:
        r"""Force add an :class:`~forte.data.ontology.core.Entry` object to the
        :class:`DataPack` object. Allow duplicate entries in a pack.

        Args:
            entry (Entry): An :class:`~forte.data.ontology.core.Entry`
                object to be added to the pack.

        Returns:
            The input entry itself
        """
        return self.__add_entry_with_check(entry, True)

    def __add_entry_with_check(self,
                               entry: EntryType,
                               allow_duplicate: bool = True) -> EntryType:
        r"""Internal method to add an :class:`Entry` object to the
        :class:`DataPack` object.

        Args:
            entry (Entry): An :class:`Entry` object to be added to the datapack.
            allow_duplicate (bool): Whether we allow duplicate in the datapack.

        Returns:
            The input entry itself
        """
        if isinstance(entry, Annotation):
            target = self.annotations

            begin, end = entry.span.begin, entry.span.end

            if begin < 0:
                raise ValueError(f'The begin {begin} is smaller than 0, this'
                                 f'is not a valid begin.')

            if end > len(self.text):
                if len(self.text) == 0:
                    raise ValueError(
                        f"The end {end} of span is greater than the text "
                        f"length {len(self.text)}, which is invalid. The text "
                        f"length is 0, so it may be the case the you haven't "
                        f"set text for the data pack. Please set the text "
                        f"before calling `add_entry` on the annotations.")
                else:
                    raise ValueError(
                        f"The end {end} of span is greater than the text "
                        f"length {len(self.text)}, which is invalid. The "
                        f"problematic entry is of type {entry.__class__} "
                        f"at [{begin}:{end}]")

        elif isinstance(entry, Link):
            target = self.links
        elif isinstance(entry, Group):
            target = self.groups
        elif isinstance(entry, Generics):
            target = self.generics
        else:
            raise ValueError(
                f"Invalid entry type {type(entry)}. A valid entry "
                f"should be an instance of Annotation, Link, Group of Generics."
            )

        # TODO: duplicate is ill-defined.
        add_new = allow_duplicate or (entry not in target)

        if add_new:
            target.add(entry)

            # update the data pack index if needed
            self._index.update_basic_index([entry])
            if self._index.link_index_on and isinstance(entry, Link):
                self._index.update_link_index([entry])
            if self._index.group_index_on and isinstance(entry, Group):
                self._index.update_group_index([entry])
            self._index.deactivate_coverage_index()

            self._pending_entries.pop(entry.tid)

            return entry
        else:
            return target[target.index(entry)]

    def delete_entry(self, entry: EntryType):
        r"""Delete an :class:`~forte.data.ontology.core.Entry` object from the
        :class:`DataPack`. This find out the entry in the index and remove it
        from the index. Note that entries will only appear in the index if
        `add_entry` (or _add_entry_with_check) is called.

        Please note that deleting a entry do not guarantee the deletion of
        the related entries.

        Args:
            entry (Entry): An :class:`~forte.data.ontology.core.Entry`
                object to be deleted from the pack.

        """
        if isinstance(entry, Annotation):
            target = self.annotations
        elif isinstance(entry, Link):
            target = self.links
        elif isinstance(entry, Group):
            target = self.groups
        elif isinstance(entry, Generics):
            target = self.generics
        else:
            raise ValueError(
                f"Invalid entry type {type(entry)}. A valid entry "
                f"should be an instance of Annotation, Link, or Group.")

        begin: int = target.bisect_left(entry)

        index_to_remove = -1
        for i, e in enumerate(target[begin:]):
            if e.tid == entry.tid:
                index_to_remove = begin + i
                break

        if index_to_remove < 0:
            logger.warning(
                "The entry with id %d that you are trying to removed "
                "does not exists in the data pack's index. Probably it is "
                "created but not added in the first place.", entry.tid)
        else:
            target.pop(index_to_remove)

        # update basic index
        self._index.remove_entry(entry)

        # set other index invalid
        self._index.turn_link_index_switch(on=False)
        self._index.turn_group_index_switch(on=False)
        self._index.deactivate_coverage_index()

    @classmethod
    def validate_link(cls, entry: EntryType) -> bool:
        return isinstance(entry, Link)

    @classmethod
    def validate_group(cls, entry: EntryType) -> bool:
        return isinstance(entry, Group)

    def get_data(self,
                 context_type: Type[Annotation],
                 request: Optional[DataRequest] = None,
                 skip_k: int = 0) -> Iterator[Dict[str, Any]]:
        r"""Fetch entries from the data_pack of type `context_type`.

        Currently, we do not support Groups and Generics in the request.

        Example:

            .. code-block:: python

                requests = {
                    base_ontology.Sentence:
                        {
                            "component": ["dummy"],
                            "fields": ["speaker"],
                        },
                    base_ontology.Token: ["pos", "sense""],
                    base_ontology.EntityMention: {
                        "unit": "Token",
                    },
                }
                pack.get_data(base_ontology.Sentence, requests)

        Args:
            context_type (str): The granularity of the data context, which
                could be any ``Annotation`` type.
            request (dict): The entry types and fields required.
                The keys of the requests dict are the required entry types
                and the value should be either:

                - a list of field names or
                - a dict which accepts three keys: `"fields"`, `"component"`,
                  and `"unit"`.

                    - By setting `"fields"` (list), users
                      specify the requested fields of the entry. If "fields"
                      is not specified, only the default fields will be
                      returned.
                    - By setting `"component"` (list), users
                      can specify the components by which the entries are
                      generated. If `"component"` is not specified, will return
                      entries generated by all components.
                    - By setting `"unit"` (string), users can
                      specify a unit by which the annotations are indexed.

                Note that for all annotation types, `"text"` and `"span"`
                fields are returned by default; for all link types, `"child"`
                and `"parent"` fields are returned by default.
            skip_k (int): Will skip the first `skip_k` instances and generate
                data from the (`offset` + 1)th instance.

        Returns:
            A data generator, which generates one piece of data (a dict
            containing the required entries, fields, and context).
        """
        annotation_types: Dict[Type[Annotation], Union[Dict, List]] = dict()
        link_types: Dict[Type[Link], Union[Dict, List]] = dict()
        group_types: Dict[Type[Group], Union[Dict, List]] = dict()
        generics_types: Dict[Type[Generics], Union[Dict, List]] = dict()

        if request is not None:
            for key, value in request.items():
                if issubclass(key, Annotation):
                    annotation_types[key] = value
                elif issubclass(key, Link):
                    link_types[key] = value
                elif issubclass(key, Group):
                    group_types[key] = value
                elif issubclass(key, Generics):
                    generics_types[key] = value

        context_args = annotation_types.get(context_type)

        context_components, _, context_fields = self._parse_request_args(
            context_type, context_args)

        valid_context_ids: Set[int] = self.get_ids_by_type_subtype(
            context_type)
        if context_components:
            valid_component_id: Set[int] = set()
            for component in context_components:
                valid_component_id |= self.get_ids_by_creator(component)
            valid_context_ids &= valid_component_id

        skipped = 0
        # must iterate through a copy here because self.annotations is changing
        for context in list(self.annotations):
            if (context.tid not in valid_context_ids
                    or not isinstance(context, context_type)):
                continue
            if skipped < skip_k:
                skipped += 1
                continue

            data: Dict[str, Any] = dict()
            data["context"] = self.text[context.span.begin:context.span.end]
            data["offset"] = context.span.begin

            for field in context_fields:
                data[field] = getattr(context, field)

            if annotation_types:
                for a_type, a_args in annotation_types.items():
                    if issubclass(a_type, context_type):
                        continue
                    if a_type.__name__ in data.keys():
                        raise KeyError(
                            f"Requesting two types of entries with the "
                            f"same class name {a_type.__name__} at the "
                            f"same time is not allowed")
                    data[a_type.__name__] = \
                        self._generate_annotation_entry_data(
                            a_type, a_args, data, context)

            if link_types:
                for l_type, l_args in link_types.items():
                    if l_type.__name__ in data.keys():
                        raise KeyError(
                            f"Requesting two types of entries with the "
                            f"same class name {l_type.__name__} at the "
                            f"same time is not allowed")
                    data[l_type.__name__] = self._generate_link_entry_data(
                        l_type, l_args, data, context)

            # TODO: Getting Group based on range is not done yet.
            if group_types:
                raise NotImplementedError("Querying groups based on ranges is "
                                          "currently not supported.")

            if generics_types:
                raise NotImplementedError(
                    "Querying generic types based on ranges is "
                    "currently not supported.")

            yield data

    def _parse_request_args(self, a_type, a_args):
        # request which fields generated by which component
        components = None
        unit = None
        fields = set()
        if isinstance(a_args, dict):
            components = a_args.get("component")
            # pylint: disable=isinstance-second-argument-not-valid-type
            # TODO: until fix: https://github.com/PyCQA/pylint/issues/3507
            if components is not None and not isinstance(components, Iterable):
                raise TypeError(
                    "Invalid request format for 'components'. "
                    "The value of 'components' should be of an iterable type.")
            unit = a_args.get("unit")
            if unit is not None and not isinstance(unit, str):
                raise TypeError("Invalid request format for 'unit'. "
                                "The value of 'unit' should be a string.")
            a_args = a_args.get("fields", set())

        # pylint: disable=isinstance-second-argument-not-valid-type
        # TODO: disable until fix: https://github.com/PyCQA/pylint/issues/3507
        if isinstance(a_args, Iterable):
            fields = set(a_args)
        elif a_args is not None:
            raise TypeError(
                f"Invalid request format for '{a_type}'. "
                f"The request should be of an iterable type or a dict.")

        fields.add("tid")
        return components, unit, fields

    def _generate_annotation_entry_data(self, a_type: Type[Annotation],
                                        a_args: Union[Dict,
                                                      Iterable], data: Dict,
                                        cont: Optional[Annotation]) -> Dict:

        components, unit, fields = self._parse_request_args(a_type, a_args)

        a_dict: Dict[str, Any] = dict()

        a_dict["span"] = []
        a_dict["text"] = []
        for field in fields:
            a_dict[field] = []

        unit_begin = 0
        if unit is not None:
            if unit not in data.keys():
                raise KeyError(f"{unit} is missing in data. You need to "
                               f"request {unit} before {a_type}.")
            a_dict["unit_span"] = []

        cont_begin = cont.span.begin if cont else 0

        annotation: Annotation
        for annotation in self.get(a_type, cont, components):
            # we provide span, text (and also tid) by default
            a_dict["span"].append((annotation.span.begin, annotation.span.end))
            a_dict["text"].append(annotation.text)

            for field in fields:
                if field in ("span", "text"):
                    continue
                if field == "context_span":
                    a_dict[field].append((annotation.span.begin - cont_begin,
                                          annotation.span.end - cont_begin))
                    continue

                a_dict[field].append(getattr(annotation, field))

            if unit is not None:
                while not self._index.in_span(data[unit]["tid"][unit_begin],
                                              annotation.span):
                    unit_begin += 1

                unit_span_begin = unit_begin
                unit_span_end = unit_span_begin + 1

                while self._index.in_span(data[unit]["tid"][unit_span_end],
                                          annotation.span):
                    unit_span_end += 1

                a_dict["unit_span"].append((unit_span_begin, unit_span_end))

        for key, value in a_dict.items():
            a_dict[key] = np.array(value)

        return a_dict

    def _generate_link_entry_data(self, a_type: Type[Link],
                                  a_args: Union[Dict, Iterable], data: Dict,
                                  cont: Optional[Annotation]) -> Dict:

        components, unit, fields = self._parse_request_args(a_type, a_args)

        if unit is not None:
            raise ValueError(f"Link entries cannot be indexed by {unit}.")

        a_dict: Dict[str, Any] = dict()
        for field in fields:
            a_dict[field] = []
        a_dict["parent"] = []
        a_dict["child"] = []

        link: Link
        for link in self.get(a_type, cont, components):
            parent_type = link.ParentType.__name__
            child_type = link.ChildType.__name__

            if parent_type not in data.keys():
                raise KeyError(
                    f"The Parent entry of {a_type} is not requested."
                    f" You should also request {parent_type} with "
                    f"{a_type}")
            if child_type not in data.keys():
                raise KeyError(f"The child entry of {a_type} is not requested."
                               f" You should also request {child_type} with "
                               f"{a_type}")

            a_dict["parent"].append(
                np.where(data[parent_type]["tid"] == link.parent)[0][0])
            a_dict["child"].append(
                np.where(data[child_type]["tid"] == link.child)[0][0])

            for field in fields:
                if field in ("parent", "child"):
                    continue

                a_dict[field].append(getattr(link, field))

        for key, value in a_dict.items():
            a_dict[key] = np.array(value)
        return a_dict

    def build_coverage_for(self, context_type: Type[Annotation],
                           covered_type: Type[EntryType]):
        """
        User can call this function to build coverage index for specific types.
          The index provide a in-memory mapping from entries of `context_type`
          to the entries "covered" by it.
          See :class:`forte.data.data_pack.DataIndex` for more details.

        Args:
            context_type: The context/covering type.
            covered_type: The entry to find under the context type.

        """
        if self._index.coverage_index(context_type, covered_type) is None:
            self._index.build_coverage_index(self, context_type, covered_type)

    def iter_in_range(self, entry_type: Type[EntryType],
                      range_annotation: Annotation) -> Iterator[EntryType]:
        """
        Iterate the entries of the provided type within or fulfill the
        constraints of the `range_annotation`. The constraint is True if
        an entry is `in_span` of the provided `range_annotation`.

        Internally, if the coverage index between the entry type and the
        type of the `range_annotation` is built, then this will create the
        iterator from the index. Otherwise, the function will iterate them
        from scratch (which is slower). If there are frequent usage of this
        function, it is suggested to build the coverage index.

        Args:
            entry_type: The type of entry to iterate over.
            range_annotation: The range annotation that serve as the constraint.

        Returns:
            An iterator of the entries with in the `range_annotation`.

        """
        use_coverage = self._index.coverage_index_is_valid
        coverage_index: Optional[Dict[int, Set[int]]] = {}

        if use_coverage:
            coverage_index = self._index.coverage_index(
                type(range_annotation), entry_type)
            if coverage_index is None:
                use_coverage = False

        if use_coverage and coverage_index is not None:
            for tid in coverage_index[range_annotation.tid]:
                yield self.get_entry(tid)  # type: ignore
        else:
            if issubclass(entry_type, Annotation):
                range_begin = (range_annotation.span.begin
                               if range_annotation else 0)
                range_end = (range_annotation.span.end if range_annotation else
                             self.annotations[-1].span.end)

                if issubclass(entry_type, Annotation):
                    temp_begin = Annotation(self, range_begin, range_begin)
                    begin_index = self.annotations.bisect(temp_begin)

                    temp_end = Annotation(self, range_end, range_end)
                    end_index = self.annotations.bisect(temp_end)

                    # Make sure these temporary annotations are not part of the
                    # actual data.
                    temp_begin.regret_creation()
                    temp_end.regret_creation()
                    yield from self.annotations[begin_index:end_index]
            elif issubclass(entry_type, Link):
                for link in self.links:
                    if self._index.in_span(link, range_annotation.span):
                        yield link
            elif issubclass(entry_type, Group):
                for group in self.groups:
                    if self._index.in_span(group, range_annotation.span):
                        yield group

    def get(
            self,
            entry_type: Type[EntryType],  # type: ignore
            range_annotation: Optional[Annotation] = None,
            components: Optional[Union[str, Iterable[str]]] = None,
            include_sub_type=True) -> Iterable[EntryType]:
        r"""This function is used to get data from a data pack with various
        methods.

        Depending on the provided arguments, the function will perform several
        different filtering of the returned data.

        The `entry_type` is mandatory, where all the entries matching this type
        will be returned. The sub-types of the provided entry type will be
        also returned if `include_sub_type` is set to True (which is the
        default behavior).

        The `range_annotation` controls the search area of the sub-types. An
        entry `E` will be returned if :meth:`in_span(E, range_annotation`
        returns True. If this function is called frequently with queries
        related to the `range_annotation`, please consider to build the coverage
        index regarding the related entry types.

        The `components` list will filter the results by the `component` (i.e
        the creator of the entry). If `components` is provided, only the entries
        created by one of the `components` will be returned.

        Example:

            .. code-block:: python

                # Iterate through all the sentences in the pack.
                for sentence in input_pack.get(Sentence):
                    # Take all tokens from a sentence created by NLTKTokenizer.
                    token_entries = input_pack.get(
                        entry_type=Token,
                        range_annotation=sentence,
                        component='NLTKTokenizer')
                    ...

            In the above code snippet, we get entries of type ``Token`` within
            each ``sentence`` which were generated by ``NLTKTokenizer``. You
            can consider build coverage index between `Token` and `Sentence`
            if this snippet is frequently used.

        Args:
            entry_type (type): The type of entries requested.
            range_annotation (Annotation, optional): The range of entries
                requested. If `None`, will return valid entries in the range of
                whole data_pack.
            components (str or list, optional): The component (creator)
                generating the entries requested. If `None`, will return valid
                entries generated by any component.
            include_sub_type (bool): whether to consider the sub types of
                the provided entry type. Default `True`.
        """
        def require_annotations() -> bool:
            if issubclass(entry_type, Annotation):
                return True
            if issubclass(entry_type, Link):
                return (issubclass(entry_type.ParentType, Annotation)
                        and issubclass(entry_type.ChildType, Annotation))
            if issubclass(entry_type, Group):
                return issubclass(entry_type.MemberType, Annotation)
            return False

        # If we don't have any annotations but the items to check requires them,
        # then we simply yield from an empty list.
        if len(self.annotations) == 0 and range_annotation is not None:
            if require_annotations():
                yield from []
                return

        # Valid entry ids based on type.
        all_types: Set[Type]
        if include_sub_type:
            all_types = self._expand_to_sub_types(entry_type)
        else:
            all_types = {entry_type}

        entry_iter: Iterator[Entry]
        if issubclass(entry_type, Generics):
            entry_iter = self.generics
        elif range_annotation is not None:
            if (issubclass(entry_type, Annotation)
                    or issubclass(entry_type, Link)
                    or issubclass(entry_type, Group)):
                entry_iter = self.iter_in_range(entry_type, range_annotation)
        elif issubclass(entry_type, Annotation):
            entry_iter = self.annotations
        elif issubclass(entry_type, Link):
            entry_iter = self.links
        elif issubclass(entry_type, Group):
            entry_iter = self.groups
        else:
            raise ValueError(
                f"The requested type {str(entry_type)} is not supported.")

        for entry in entry_iter:
            # Filter by type and components.
            if type(entry) not in all_types:
                continue
            if components is not None:
                if not self.is_created_by(entry, components):
                    continue
            yield entry  # type: ignore
Ejemplo n.º 25
0
# 2012. Sum of Beauty in the Array
# https://leetcode.com/problems/sum-of-beauty-in-the-array/

from sortedcontainers import SortedList
​
class Solution:
    def sumOfBeauties(self, nums: List[int]) -> int:
        n = len(nums)
        left = SortedList()
        left.add(nums[0])
        
        right = SortedList()
        for i in range(2, n):
            right.add(nums[i])
        res = 0
        
        for i in range(1, n - 1):
            left_index = left.bisect(nums[i])
            right_index = right.bisect_left(nums[i])
​
            if left_index == len(left) and left[left_index - 1] < nums[i] and right_index == 0 and right[0] > nums[i]:
                res += 2
            elif nums[i - 1] < nums[i] < nums[i + 1]:
                res += 1
                
            left.add(nums[i])
            right.remove(nums[i + 1])
​
        return res