def process_entry(self, entry: str): """ Process a whole entry :param entry: :return: """ doc = self.nlp(entry) analysis = Analysis() for sent in doc.sents: if self.do_general_analysis: self.count_tokens(sent) for noun_phrase in sent.noun_chunks: covered = RangeSet() for trigger in self.all_triggers: if trigger in noun_phrase.text.lower(): start = noun_phrase.text.lower().index(trigger) if not Range(start, len(noun_phrase.text) + start).intersection(covered): covered.add( Range(noun_phrase.start, noun_phrase.end)) self.__process_np(trigger, noun_phrase, doc, analysis) return analysis
def expand_star(self, max_line: int, other_split_files): source_file_range = Range(1, max_line, include_end=True) union_of_splits = RangeSet() for split in other_split_files: lines = split._lines # pylint: disable=protected-access union_of_splits = union_of_splits.union(lines) diff = source_file_range.symmetric_difference(union_of_splits) self._lines.extend(diff)
def test_rangeset_docstring(): a = RangeSet() b = RangeSet([Range(0, 1), Range(2, 3), Range(4, 5)]) c = RangeSet(Range(0, 1), Range(2, 3), Range(4, 5)) d = RangeSet("[0, 1)", ["[1.5, 2)", "[2.5, 3)"], "[4, 5]") assert (str(a) == "{}") assert (str(b) == "{[0, 1), [2, 3), [4, 5)}") assert (str(c) == "{[0, 1), [2, 3), [4, 5)}") assert (b == c) assert (str(d) == "{[0, 1), [1.5, 2), [2.5, 3), [4, 5]}") asserterror( ValueError, RangeSet, ([[Range(0, 1), Range(2, 3)], [Range(4, 5), Range(6, 7)]], )) f = RangeSet("[0, 3]", "[2, 4)", "[5, 6]") assert (str(f) == "{[0, 4), [5, 6]}")
def _create_line_ranges(self, split_data: collections.abc.Mapping, max_line: int): lines = split_data.get("lines") if not lines or not lines.strip(): raise ConfigError( f'No lines specified for split file "{self._path}".') range_set = RangeSet() line_ranges = lines.split(",") for line_range in line_ranges: start, _, end = line_range.partition("-") if start.strip() == "*": self._has_star = True continue try: start = int(start) end = int(end) if end else start if not 0 < start <= max_line or not 0 < end <= max_line: raise ValueError(f"Out of range (1-{max_line})") range_set.add(Range(start, end, include_end=True)) except ValueError as ex: raise ConfigError( f'Invalid lines for split file "{self._path}": {ex}') return range_set
def test_rangeset_constructor_valid(args, ranges, strr, reprr, isempty): """ Tests that the constructor of rngset works as intended. Also, as a byproduct, tests the .ranges(), .__str__(), .__repr__(), .clear(), and .isempty() """ rangeset = RangeSet(*args) assert (ranges == rangeset.ranges()) assert (strr == str(rangeset)) assert (reprr == repr(rangeset)) assert (isempty == rangeset.isempty()) assert (isempty != bool(rangeset)) assert (hash(rangeset) == hash(rangeset.copy())) rangeset.clear() assert ("{}" == str(rangeset)) assert ("RangeSet{}" == repr(rangeset)) assert (rangeset.isempty()) assert (not bool(rangeset))
def test_issue8(): # issue: adding a Range to a RangeSet containing two non-overlapping ranges, such that the new range overlaps # with one but not the other, leads to a TypeError being raised. # cause: code was passing a Linked List Node instead of the node's value (a range) try: a = RangeSet() a.add(Range(100, 300)) a.add(Range(400, 500)) a.add(Range(500, 600)) assert (str(a) == "{[100, 300), [400, 600)}") b = RangeSet() b.add(Range(400, 600)) b.add(Range(200, 300)) b.add(Range(100, 200)) assert (str(b) == "{[100, 300), [400, 600)}") except TypeError: fail( "RangeSet should not have an issue concatenating to the second range of two in a RangeSet" )
2018, 12, 1), False, "[2017-05-27, 2018-02-02]", "Range[datetime.date(2017, 5, 27), datetime.date(2018, 2, 2)]"), (Range(datetime.timedelta(0, 3600), datetime.timedelta( 0, 7200)), datetime.timedelta(0, 6000), True, "[1:00:00, 2:00:00)", "Range[datetime.timedelta(seconds=3600), datetime.timedelta(seconds=7200))" ), (Range(datetime.timedelta(1, 1804), datetime.timedelta(3)), datetime.timedelta(1), False, "[1 day, 0:30:04, 3 days, 0:00:00)", "Range[datetime.timedelta(days=1, seconds=1804), datetime.timedelta(days=3))" ), (Range("begin", "end"), "middle", False, "[begin, end)", "Range['begin', 'end')"), (Range("begin", "end"), "cows", True, "[begin, end)", "Range['begin', 'end')"), # RangeSets (Range(1, 10), RangeSet(Range(2, 9)), True, "[1, 10)", "Range[1, 10)"), (Range(1, 10), RangeSet(Range(2, 3), Range(4, 5), Range(6, 7), Range( 8, 9)), True, "[1, 10)", "Range[1, 10)"), (Range(1, 10), RangeSet(Range(0, 11)), False, "[1, 10)", "Range[1, 10)"), (Range(1, 4), RangeSet(Range(2, 3), Range( 5, 6)), False, "[1, 4)", "Range[1, 4)"), ]) def test_range_contains(rng, item, contains, strr, reprr): """ Tests the __contains__, __str__, __repr__, and __hash__ methods of the range. """ assert (contains == (item in rng)) assert (strr == str(rng)) assert (reprr == repr(rng))
def test_rangeset_isdisjoint(rng1, rng2, isdisjoint, error_type): if error_type is not None: asserterror(error_type, rng1.isdisjoint, (rng2, )) else: assert (rng1.isdisjoint(rng2) == RangeSet(rng2).isdisjoint(rng1)) assert (isdisjoint == rng1.isdisjoint(rng2))
[Range(0, 1), Range(1.5, 2), Range(2.5, 3), Range(4, 5)], "{[0, 1), [1.5, 2), [2.5, 3), [4, 5)}", "RangeSet{Range[0, 1), Range[1.5, 2), Range[2.5, 3), Range[4, 5)}", False), # mix Rangelike, iterable args (["[0, 3]", "[2, 4)", "[5, 6]"], [Range(0, 4), Range("[5, 6]")], "{[0, 4), [5, 6]}", "RangeSet{Range[0, 4), Range[5, 6]}", False), # overlapping (["[0, 4)", "(1, 3)"], [Range(0, 4)], "{[0, 4)}", "RangeSet{Range[0, 4)}", False), # overlapping 2 ([Range(1, 3), Range(2, 4)], [Range( 1, 4)], "{[1, 4)}", "RangeSet{Range[1, 4)}", False), ([Range('apple', 'carrot'), Range('banana', 'durian')], [Range('apple', 'durian')], "{[apple, durian)}", "RangeSet{Range['apple', 'durian')}", False), ([RangeSet("(0, 1)", "(1, 2)", "(2, 3)")], [Range("(0, 1)"), Range("(1, 2)"), Range("(2, 3)")], "{(0, 1), (1, 2), (2, 3)}", "RangeSet{Range(0, 1), Range(1, 2), Range(2, 3)}", False) ]) def test_rangeset_constructor_valid(args, ranges, strr, reprr, isempty): """ Tests that the constructor of rngset works as intended. Also, as a byproduct, tests the .ranges(), .__str__(), .__repr__(), .clear(), and .isempty() """ rangeset = RangeSet(*args) assert (ranges == rangeset.ranges()) assert (strr == str(rangeset)) assert (reprr == repr(rangeset)) assert (isempty == rangeset.isempty()) assert (isempty != bool(rangeset))