コード例 #1
0
 def test_error(self):
     iv = IntervalTree()
     iv.add(10, 17)
     iv.add(25, 32)
     iv.add(15, 22)
     iv.add(20, 27)
     print(iv.pretty_print())
     print(iv.search(0, 220))
コード例 #2
0
 def setUp(self) -> None:
     intervals = []
     for i in range(0, 30, 5):
         intervals.append(Interval(i, i + 7))
     iv = IntervalTree()
     for f in intervals:
         iv.insert(f)
     self.intervals = intervals
     self.tree = iv
     pass
コード例 #3
0
 def test_1(self):
     tree = IntervalTree()
     tree.add(1, 3, 1.0)
     tree.add(3, 5, 2.0)
     tree.add(5, 7, 3.0)
     self.assertEqual(str(tree.search(3, 4)), '[Inv(3, 5, d=2.0)]')
     print(tree.search(2, 4))
     self.assertTrue('Inv(1, 3, d=1.0)' in str(tree.search(2, 4)))
     self.assertTrue('Inv(3, 5, d=2.0)' in str(tree.search(2, 4)))
     self.assertEqual(len(tree.search(2, 4)), 2)
     self.assertEqual(str(tree.search(5, 7)), '[Inv(5, 7, d=3.0)]')
コード例 #4
0
    def test_tree_pickle(self):
        a = IntervalTree()
        for ichr in range(5):
            for i in range(10, 100, 6):
                f = Interval(i - 4, i + 4)
                a.insert(f)

        a.dump('a.pkl')

        b = IntervalTree()
        b.load('a.pkl')
        for ichr in range(5):
            for i in range(10, 100, 6):
                f = Interval(i - 4, i + 4)
                af = sorted(a.find(f), key=operator.attrgetter('start'))
                bf = sorted(b.find(f), key=operator.attrgetter('start'))

                assert len(bf) > 0
                self.assertEqual(len(af), len(bf))
                self.assertEqual(af[0].start, bf[0].start)
                self.assertEqual(af[-1].start, bf[-1].start)
コード例 #5
0
ファイル: Test_quicksect.py プロジェクト: jianlins/quicksectx
 def test_1(self):
     tree = IntervalTree()
     tree.add(1, 3, 100)
     tree.add(3, 7, 110)
     tree.add(2, 5, 120)
     tree.add(4, 6, 130)
     print(tree.pretty_print())
     print(tree.find(Interval(2, 5)))
     tree.remove(Interval(2, 5))
     print(tree.find(Interval(2, 5)))
     print(tree.pretty_print())
     self.assertEqual(True, True)
コード例 #6
0
 def test_2(self):
     tree = IntervalTree()
     tree.add(1, 3, 1.0)
     tree.add(2, 3, 2.0)
     tree.add(3, 4, 3.0)
     tree.add(3, 5, 4.0)
     tree.add(4, 5, 5.0)
     tree.add(5, 6, 5.0)
     tree.add(2, 6, 6.0)
     print(tree.pretty_print())
     self.assertEqual(len(tree.search(4, 4)), 3)
     self.assertEqual(len(tree.search(3, 3)), 3)
     self.assertEqual(len(tree.search(4, 6)), 4)
コード例 #7
0
ファイル: FastCNER.py プロジェクト: jianlins/PyFastNER
    def addDeterminants(self, text, deter_rule, matches, match_begin,
                        match_end, current_position):
        deter_rule = deter_rule[FastCNER.END]
        end = current_position if match_end == 0 else match_end
        # in case the rules were not configured properly, this can ensure they won't break the execution.
        if match_begin > end:
            t = match_begin
            match_begin = end
            end = t
        current_span = Span(match_begin + self.offset, end + self.offset,
                            text[match_begin:end])

        current_spans_list = []
        overlap_checkers = self.overlap_checkers
        for key in deter_rule.keys():
            rule_id = deter_rule[key]
            if self.logger is not None:
                self.logger.debug('try add matched rule ({}-{})\t{}'.format(
                    match_begin, match_end, str(self.rule_store[rule_id])))
            current_span.rule_id = rule_id
            if key in overlap_checkers:
                current_spans_list = matches[key]
                overlap_checker = overlap_checkers[key]
                overlapped_pos = overlap_checker.search(
                    current_span.begin, current_span.end)
                if len(overlapped_pos) > 0:
                    pos = overlapped_pos.pop().data
                    overlapped_span = current_spans_list[pos]
                    if not self.compareSpan(current_span, overlapped_span):
                        continue
                    current_spans_list[pos] = current_span
                    overlap_checker.remove(
                        Interval(current_span.begin, current_span.end))
                    overlap_checker.add(current_span.begin, current_span.end,
                                        pos)
                else:
                    overlap_checker.add(current_span.begin, current_span.end,
                                        len(current_spans_list))
                    current_spans_list.append(current_span)
            else:
                matches[key] = current_spans_list
                overlap_checker = IntervalTree()
                # quickset's search will include both lower and upper bounds, so minus one from the end.
                overlap_checker.add(current_span.begin, current_span.end - 1,
                                    len(current_spans_list))
                current_spans_list.append(current_span)
                overlap_checkers[key] = overlap_checker

        pass
コード例 #8
0
ファイル: Vectorizer.py プロジェクト: ryannetwork/medspacy_io
    def to_data_dict(
            doc: Doc,
            sent_window: int = 1,
            type_filter: Union[Set[str], Dict] = set(),
            default_label: str = "NEG",
            data_dict: dict = {
                'X': [],
                'concept': [],
                'y': []
            }) -> Dict:
        """
        Convert a SpaCy doc into a labeled data dictionary. Assuming the doc has been labeled based on concepts(snippets), Vectorizer
        extends the input to the concepts' context sentences (depends on the sent_window size), generate labeled context
        sentences data, and return a dictionary (with three keys: 'X'---the text of context sentences,'concepts'---
        the text of labeled concepts, 'y'---label)
        @param doc: a SpaCy Doc
        @param sent_window: The window size (in sentences) around the target concept that need to be pulled
        @param type_filter: Specify whether and what types of annotation will be used generate the output DataFrame, this
        parameter can be defined as a set (only concept names are included) or a dictionary (where attributes and values
        can be included), which maps a matched concept (string and its context string) to a new value in "y"
        column in the output. The structure of expected dictionary will be:
        concept_type->attr1->value1->...(other attr->value pairs if needed)->mapped key name
        @param default_label: If there is no labeled concept in the context sentences, label it with this default_label
        @param data_dict: a dictionary to hold the output and pass on across documents, so that a corpus can be aggregated
        @param sent_idx: an IntervalTree built with all sentences in the doc
        @param context_sents: a 2-d list of sentences with predefined window size.
        @return: a dictionary
        """
        sent_idx = IntervalTree()
        sents = list(doc.sents)
        context_sents = []
        for i in range(0, len(sents) - sent_window + 1):
            begin_sent = sents[i]
            end_sent = sents[i + sent_window - 1]
            sent_idx.add(begin_sent.start, end_sent.end, len(context_sents))
            context_sents.append(sents[i:i + sent_window])
        concepts = []
        if hasattr(doc._, "concepts"):
            for type in doc._.concepts:
                if len(type_filter) == 0 or type in type_filter:
                    concepts.extend(doc._.concepts[type])
        else:
            concepts = [
                ent for ent in doc.ents
                if (len(type_filter) == 0 or ent.label in type_filter)
            ]

        get_doc_name = 'doc_name' in data_dict
        doc_name = doc._.doc_name if get_doc_name else ''

        if isinstance(type_filter, Set):
            data_dict = Vectorizer.to_data_dict_on_types(
                concepts=concepts,
                type_filter=type_filter,
                default_label=default_label,
                data_dict=data_dict,
                sent_idx=sent_idx,
                context_sents=context_sents,
                doc_name=doc_name)
        elif isinstance(type_filter, Dict):
            if len(type_filter) == 0:
                data_dict = Vectorizer.to_data_dict_on_types(
                    concepts=concepts,
                    default_label=default_label,
                    data_dict=data_dict,
                    sent_idx=sent_idx,
                    context_sents=context_sents,
                    doc_name=doc_name)
            else:
                data_dict = Vectorizer.to_data_dict_on_type_attr_values(
                    concepts=concepts,
                    type_filter=type_filter,
                    default_label=default_label,
                    data_dict=data_dict,
                    sent_idx=sent_idx,
                    context_sents=context_sents,
                    doc_name=doc_name)
        else:
            raise TypeError(
                'The arg: "type_filter" needs to be either a set of concept names or a dictionary. Not a {}:\n\t{}'
                .format(type(type_filter), str(type_filter)))
        return data_dict
コード例 #9
0
 def test_duplicates(self):
     tree = IntervalTree()
     tree.add(1, 3, 1.0)
     tree.add(1, 3, 1.0)
     self.assertEqual(len(tree.search(1, 1.5)), 2)
コード例 #10
0
 def test_3(self):
     tree = IntervalTree()
     tree.add(1, 1, 1.0)
     print(tree.pretty_print())
     print((tree.search(1, 3)))
コード例 #11
0
 def setUp(self):
     self.tree4 = IntervalTree()
     self.tree4.insert(Interval(22, 33, data='example1'))
     self.tree4.insert(Interval(22, 33, data='example2'))
コード例 #12
0
 def setUp(self):
     self.tree = IntervalTree()
コード例 #13
0
ファイル: Test_quicksect.py プロジェクト: jianlins/quicksectx
from quicksectx import IntervalTree, Interval
import unittest
tree = IntervalTree()
tree.add(0, 3, 100)
tree.add(5, 8, 110)
tree.add(6, 10, 120)
tree.add(8, 9, 130)
tree.add(15, 23, 140)
tree.add(19, 20, 150)
tree.add(17, 19, 160)
tree.add(26, 26, 160)
tree.add(25, 30, 160)
tree.add(16, 21, 160)
print(tree.search(3, 15))
print(tree.pretty_print())
print('\n\n---\n\n\n')
tree = IntervalTree()
tree.add(0, 3, 100)
tree.add(5, 8, 110)
tree.add(6, 10, 120)
tree.add(8, 9, 130)
tree.add(15, 23, 140)
tree.add(16, 21, 160)
tree.add(17, 19, 160)
tree.add(19, 20, 150)
tree.add(25, 30, 160)
tree.add(26, 26, 160)
tree.add(27, 28, 160)
tree.add(27, 28, 160)
tree.add(27, 28, 160)
print(tree.pretty_print())