Python Pipeline.tokenize Beispiele

Programmiersprache: Python

Namespace / Paketname: aspect_based_sentiment_analysis

Klasse / Typ: Pipeline

Methode / Funktion: tokenize

Beispiele auf hotexamples.com: 6

Python Pipeline.tokenize - 6 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die aspect_based_sentiment_analysis.Pipeline.tokenize, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

tokenize(6)

transform(4)

encode(3)

Pipeline(2)

predict(2)

preprocess(2)

text_splitter(2)

evaluate(1)

postprocess(1)

review(1)

Beispiel #1

Datei anzeigen

Datei: recognition_key_token.py Projekt: rolczynski/Explainable-Sentiment-Analysis

def mask_examples(nlp: Pipeline, domain: str, is_test: bool):
    dataset = absa.load_examples('semeval', domain, is_test)
    for i, example in enumerate(dataset):
        yield i, -1, example  # Predict without a mask.
        [tokenized_example] = nlp.tokenize([example])
        n = len(tokenized_example.text_tokens)
        for index in range(n):
            new_example = mask_tokens(nlp, tokenized_example, indices=[index])
            yield i, index, new_example

Beispiel #2

Datei anzeigen

Datei: test_pipeline.py Projekt: thinkingoutloud55/Aspect-Based-Sentiment-Analysis-1

def test_predict(nlp: Pipeline):
    text_1 = ("We are great fans of Slack, but we wish the subscriptions "
              "were more accessible to small startups.")
    text_2 = "We are great fans of Slack"
    aspect = "Slack"
    examples = [Example(text_1, aspect), Example(text_2, aspect)]
    tokenized_examples = nlp.tokenize(examples)
    input_batch = nlp.encode(tokenized_examples)
    output_batch = nlp.predict(input_batch)
    assert output_batch.scores.shape == [2, 3]
    assert output_batch.hidden_states.shape == [2, 13, 25, 768]
    assert output_batch.attentions.shape == [2, 12, 12, 25, 25]
    assert output_batch.attention_grads.shape == [2, 12, 12, 25, 25]
    scores = output_batch.scores.numpy()
    assert np.argmax(scores, axis=-1).tolist() == [2, 2]

Beispiel #3

Datei anzeigen

Datei: recognition_key_token_pair.py Projekt: rolczynski/Explainable-Sentiment-Analysis

def mask_examples(nlp: Pipeline, domain: str, part_parts: Tuple[int, int]):
    dataset = absa.load_examples('semeval', domain, test=True)
    # Split a dataset because it's better to cache more freq.
    part, parts = part_parts
    chunks = utils.split(dataset, n=parts)
    dataset_chunk = chunks[part]

    for i, example in enumerate(dataset_chunk):
        yield i, -1, -1, example    # Predict without a mask.

        [tokenized_example] = nlp.tokenize([example])
        n = len(tokenized_example.text_tokens)

        x, y = np.triu_indices(n, k=1)  # Exclude the diagonal.
        for ij in zip(x, y):
            new_example = mask_tokens(nlp, tokenized_example, indices=ij)
            yield i, *ij, new_example

Beispiel #4

Datei anzeigen

Datei: test_pipeline.py Projekt: thinkingoutloud55/Aspect-Based-Sentiment-Analysis-1

def test_review(nlp: Pipeline):
    text_1 = ("We are great fans of Slack, but we wish the subscriptions "
              "were more accessible to small startups.")
    text_2 = "The Slack often has bugs."
    aspect = "slack"
    examples = [Example(text_1, aspect), Example(text_2, aspect)]

    tokenized_examples = nlp.tokenize(examples)
    input_batch = nlp.encode(tokenized_examples)
    output_batch = nlp.predict(input_batch)
    predictions = nlp.review(tokenized_examples, output_batch)
    predictions = list(predictions)

    labeled_1, labeled_2 = predictions
    assert labeled_1.sentiment == Sentiment.positive
    assert labeled_2.sentiment == Sentiment.negative
    assert isinstance(labeled_1, PredictedExample)
    assert isinstance(labeled_1.scores, list)
    assert isinstance(labeled_1.review, Review)
    assert not labeled_1.review.is_reference
    assert not labeled_1.review.patterns

Beispiel #5

Datei anzeigen

Datei: test_pipeline.py Projekt: thinkingoutloud55/Aspect-Based-Sentiment-Analysis-1

def test_encode(nlp: Pipeline):
    text_1 = ("We are great fans of Slack, but we wish the subscriptions "
              "were more accessible to small startups.")
    text_2 = "We are great fans of Slack"
    aspect = "Slack"

    examples = [Example(text_1, aspect), Example(text_2, aspect)]
    tokenized_examples = nlp.tokenize(examples)
    input_batch = nlp.encode(tokenized_examples)
    assert isinstance(input_batch.token_ids, tf.Tensor)
    # 101 the CLS token, 102 the SEP tokens.
    token_ids = input_batch.token_ids.numpy()
    values = [101, 2057, 2024, 2307, 4599, 1997, 19840, 102, 19840, 102]
    assert token_ids[1, :10].tolist() == values
    assert token_ids[0, :7].tolist() == values[:7]
    # The second sequence should be padded (shorter),
    # and attention mask should be set.
    assert np.allclose(token_ids[1, 10:], 0)
    attention_mask = input_batch.attention_mask.numpy()
    assert np.allclose(attention_mask[1, 10:], 0)
    # Check how the tokenizer marked the segments.
    token_type_ids = input_batch.token_type_ids.numpy()
    assert token_type_ids[0, -2:].tolist() == [1, 1]
    assert np.allclose(token_type_ids[0, :-2], 0)

Beispiel #6

Datei anzeigen

Datei: recognition_key_token_triplet.py Projekt: rolczynski/Explainable-Sentiment-Analysis

def mask_examples(
        nlp: Pipeline,
        domain: str,
        part_parts: Tuple[int, int]
):
    dataset = absa.load_examples('semeval', domain, test=True)
    # Filter out examples that contain a key token or a pair of key tokens,
    # and that are other than negative.
    y_ref, _, mask_1 = key_token_labels(nlp, domain, is_test=True)
    y_ref, _, mask_2 = key_token_pair_labels(nlp, domain, parts=10)
    mask = ~(mask_1 | mask_2) & y_ref == Sentiment.negative.value
    dataset = [e for e, is_correct in zip(dataset, mask) if is_correct]

    # Split a dataset because it's better to cache more freq.
    part, parts = part_parts
    chunks = utils.split(dataset, n=parts)
    dataset_chunk = chunks[part]

    for i, example in enumerate(dataset_chunk):
        yield i, -1, -1, -1, example    # Predict without a mask.

        [tokenized_example] = nlp.tokenize([example])
        n = len(tokenized_example.text_tokens)

        ij = np.zeros(shape=[n, n])
        ij[np.triu_indices(n, k=1)] = 1  # The j shifted by 1 from i.
        ij = ij.reshape([n, n, 1]).astype(bool)

        jk = np.zeros(shape=[n, n])
        jk[np.triu_indices(n, k=1)] = 1  # The k shifted by 1 from j.
        jk = jk.reshape([1, n, n]).astype(bool)

        matrix_ijk = np.where(ij & jk)
        for ijk in zip(*matrix_ijk):
            new_example = mask_tokens(nlp, tokenized_example, indices=ijk)
            yield i, *ijk, new_example