Python adjust_sentences Examples

Programming Language: Python

Namespace/Package Name: derek.data.helper

Method/Function: adjust_sentences

Examples at hotexamples.com: 7

Python adjust_sentences - 7 examples found. These are the top rated real world Python examples of derek.data.helper.adjust_sentences extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: sentence_adjustment.py Project: wayne9qiu/derek

    def test_contained_entities(self):
        sentences = [Sentence(0, 2), Sentence(2, 3), Sentence(3, 5), Sentence(5, 8), Sentence(8, 10), Sentence(10, 15)]
        entities = [Entity('', 2, 6, 'test'), Entity('', 6, 7, 'test'), Entity('', 7, 8, 'test')]

        expected_sentences = [Sentence(0, 2), Sentence(2, 8), Sentence(8, 10), Sentence(10, 15)]
        got_sentences = adjust_sentences(sentences, entities)
        self.assertEqual(expected_sentences, got_sentences)

Example #2

Show file

File: sentence_adjustment.py Project: wayne9qiu/derek

    def test_two_entities_separated(self):
        sentences = [Sentence(0, 2), Sentence(2, 3), Sentence(3, 5), Sentence(5, 8), Sentence(8, 10), Sentence(10, 15)]
        entities = [Entity('', 2, 4, 'test'), Entity('', 9, 11, 'test')]

        expected_sentences = [Sentence(0, 2), Sentence(2, 5), Sentence(5, 8), Sentence(8, 15)]
        got_sentences = adjust_sentences(sentences, entities)
        self.assertEqual(expected_sentences, got_sentences)

Example #3

Show file

File: sentence_adjustment.py Project: wayne9qiu/derek

    def test_multi_sentence(self):
        sentences = [Sentence(0, 2), Sentence(2, 3), Sentence(3, 5), Sentence(5, 8), Sentence(8, 10), Sentence(10, 15)]
        entities = [Entity('', 2, 9, 'test')]

        expected_sentences = [Sentence(0, 2), Sentence(2, 10), Sentence(10, 15)]
        got_sentences = adjust_sentences(sentences, entities)
        self.assertEqual(expected_sentences, got_sentences)

Example #4

Show file

File: sentence_adjustment.py Project: wayne9qiu/derek

    def test_one_entity(self):
        sentences = [Sentence(0, 2), Sentence(2, 3), Sentence(3, 5)]
        entities = [Entity('', 2, 4, 'test')]

        expected_sentences = [Sentence(0, 2), Sentence(2, 5)]
        got_sentences = adjust_sentences(sentences, entities)
        self.assertEqual(expected_sentences, got_sentences)

Example #5

Show file

File: readers.py Project: wayne9qiu/derek

def _merge(raw_tokens: list, sentences: list, raw_paragraphs: list, raw_entities: list, raw_relations: list, *,
           symmetric_types: set = None) -> Tuple[List[Sentence], List[Paragraph], List[Entity], Set[Relation]]:
    """
    :param raw_tokens: list of tuples: (start, end, text)
    :param sentences: list of Sentence objects
    :param raw_paragraphs: list of tuples: (start, end)
    :param raw_entities: list of dicts: {'id', 'type', 'start', 'end'}
    :param raw_relations: list of dicts: {'type', 'first', 'second'}
    """
    paragraphs = []

    cur_par_idx = 0
    par_start = 0

    entities = sorted(align_raw_entities(raw_entities, raw_tokens))
    entities_dict = {ent.id: ent for ent in entities}
    sentences = adjust_sentences(sentences, entities)

    for i, sentence in enumerate(sentences):
        for token in raw_tokens[sentence.start_token: sentence.end_token]:
            if par_start != i + 1 and (_end_of_text(sentences, raw_tokens, sentence, token, i)
                                       or _end_of_paragraph(raw_paragraphs, cur_par_idx, token)):
                paragraphs.append(Paragraph(par_start, i + 1))
                par_start = i + 1
                cur_par_idx += 1

    return sentences, paragraphs, entities, _get_relations(raw_relations, entities_dict, symmetric_types)

Example #6

Show file

File: readers.py Project: wayne9qiu/derek

    def _get_docs(self, raw_docs: Dict[str, List[dict]],
                  groups: Dict[str, list]) -> Dict[str, Document]:
        docs = {}
        for doc_id, raw_tokens in raw_docs.items():
            tokens = []
            token_features = {}
            sentences = []
            sent_start = 0
            shift2idx = {}

            for i, raw_token in enumerate(raw_tokens):
                tokens.append(raw_token['token'])
                token_features.setdefault('lemma',
                                          []).append(raw_token['lemma'])
                token_features.setdefault('gram', []).append(raw_token['gram'])
                if "speech" in raw_token:
                    token_features.setdefault("speech",
                                              []).append(raw_token['speech'])
                    token_features.setdefault("said",
                                              []).append(raw_token['said'])
                    token_features.setdefault("author_comment", []).append(
                        raw_token['author_comment'])
                    token_features.setdefault("speech_verb", []).append(
                        raw_token['speech_verb'])
                shift2idx[raw_token['shift']] = i

                if raw_token['gram'] == 'SENT':
                    sentences.append(Sentence(sent_start, i + 1))
                    sent_start = i + 1
            if sentences[-1].end_token != len(tokens):
                sentences.append(Sentence(sent_start, len(tokens)))
            entities = self._get_entities(groups, shift2idx, doc_id)
            sentences = adjust_sentences(sentences, entities)

            doc = Document(doc_id,
                           tokens,
                           sentences, [Paragraph(0, len(sentences))],
                           entities,
                           token_features=token_features)
            docs[doc_id] = doc

        return docs

Example #7

Show file

File: sentence_adjustment.py Project: wayne9qiu/derek

 def test_no_entities(self):
     sentences = [Sentence(0, 2), Sentence(2, 3)]
     entities = []
     got_sentences = adjust_sentences(sentences, entities)
     self.assertEqual(sentences, got_sentences)