Python parse_posts Examples

Programming Language: Python

Namespace/Package Name: nlp.transform

Method/Function: parse_posts

Examples at hotexamples.com: 9

Python parse_posts - 9 examples found. These are the top rated real world Python examples of nlp.transform.parse_posts extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def test_split_data(sample_posts):
    """Test that we can take list of tuples and return splits"""
    parsed_posts = transform.parse_posts(sample_posts)
    docs = transform.make_docs(parsed_posts)
    split_data = transform.split_data(docs, train_ratio=0.5)
    assert len(split_data.train) == 1
    assert len(split_data.valid) == 1

Example #2

Show file

File: cli.py Project: logan-connolly/aita

def preprocess(run_id: str, labels: str) -> Path:
    """Read in raw posts and process data to satisfy spacy train api"""
    raw_posts = io.read_raw_posts(run_id)

    parsed_posts = transform.parse_posts(raw_posts)
    filtered_posts = transform.filter_posts(parsed_posts, labels)

    docs = transform.make_docs(filtered_posts)
    split_data = transform.split_data(docs, train_ratio=0.6)

    train_docs = transform.convert_to_doc_binary(split_data.train)
    valid_docs = transform.convert_to_doc_binary(split_data.valid)

    io.write_docs(train_docs, transform.Split.TRAIN.value, run_id)
    io.write_docs(valid_docs, transform.Split.VALID.value, run_id)

    return paths.DATA_DIRS.processed

Example #3

Show file

def sample_doc_bin(sample_posts: list[transform.RawPost]) -> DocBin:
    """Convert sample posts to doc bin"""
    tuple_list = transform.parse_posts(sample_posts)
    docs = transform.make_docs(tuple_list)
    return transform.convert_to_doc_binary(docs)

Example #4

Show file

def test_parse_posts(sample_posts):
    """Read in raw posts returning text/label tuple pairs"""
    parsed_posts = transform.parse_posts(sample_posts)
    assert parsed_posts == [("AITA sample text", "NTA"),
                            ("AITA sample text", "YTA")]

Example #5

Show file

def test_convert_to_doc_binary(sample_posts):
    """Test that docs are correctly written to binary format"""
    parsed_posts = transform.parse_posts(sample_posts)
    docs = transform.make_docs(parsed_posts)
    doc_bin = transform.convert_to_doc_binary(docs)
    assert isinstance(doc_bin, DocBin)

Example #6

Show file

def test_make_docs(sample_posts):
    """Test that docs are created from list of tuples"""
    parsed_posts = transform.parse_posts(sample_posts)
    docs = transform.make_docs(parsed_posts)
    assert all(isinstance(doc, Doc) for doc in docs)

Example #7

Show file

def test_filter_posts_no_filter(sample_posts):
    """Test that all posts are returned if no filter provided"""
    parsed_posts = transform.parse_posts(sample_posts)
    filtered_posts = transform.filter_posts(parsed_posts)
    assert len(filtered_posts) == len(parsed_posts)

Example #8

Show file

def test_filter_posts(sample_posts):
    """Test that posts are correctly filtered out by label"""
    parsed_posts = transform.parse_posts(sample_posts)
    filtered_posts = transform.filter_posts(parsed_posts, labels="NTA")
    assert all(label == "NTA" for _, label in filtered_posts)

Example #9

Show file

def test_parse_posts_invalid():
    """Test invalid dictionary passed as post"""
    with pytest.raises(ValueError):
        transform.parse_posts([{"not_valid_text": "value", "label": "NTA"}])