Exemple #1
0
def test_lexicon():
    '''
    Testing lexical tracking feature that can be enabled when querying.

        speech = Report()
        speech.query('subject=125 and session=10', lexicon=True)

    This should enable the tracking of word types used by a speaker ('C' for
    child or 'P' for parent) at a given visit (subj, sess):

        word_types = speech.lexicon[(125, 10, 'P')]

    This return the **set** of word types used by the parent speaker in
    the transcript of the recorded visit for subject 125 at session 10.

    '''
    speech = Report()
    speech.query('subject=125 and session=10', lexicon=True)
    speech.query('subject=125 and session=11', lexicon=True)
    A = speech.lexicon[(125, 10, 'P')]
    B = speech.lexicon[(125, 11, 'P')]
    assert len(A) == 326
    assert len(B) == 271
    B.update(A)                 # union of A and B
    assert len(B) == 457
Exemple #2
0
def test_subject_report():
    '''
    Testing the SubjectReport class.
    
    '''
    speech = Report()
    speech.query('subject=125 and session=11')
    A = speech.result(125, 11, "C")

    subject = SubjectReport(125, min_session=10, max_session=11) 
    subject.query()
    results = subject.results()
    B = results[(125, 11, 'C')]
    assert A['word_tokens'] == B['word_tokens']
    assert A['word_types']  == B['word_types']
    assert A['utterances']  == B['utterances']
Exemple #3
0
from ldp.speech import Report

speech = Report()
speech.query('session in (8, 9)', project=2)
columns = 'subject session mlu'.split(' ')      # columns to print

def pprint(args):                               # pretty-print
    print "\t".join(str(i) for i in args)

pprint(c.upper() for c in columns)              # header

for subj, sess in sorted(speech.trans):         # iterate over transcript IDs
    row = speech.result(subj, sess, 'C')        # get child result
    pprint(row[i] for i in columns)             # pretty-print specified cols

Exemple #4
0
def test_report():
    '''
    Testing the Report class
    
    '''
    speech = Report()                   # initialize general speech report
    speech.query('subject=22 and session=6')
    child = speech.result(22, 6, 'C')
    assert child['subject'] == 22
    assert child['session'] == 6
    assert child['utterances'] == 1065
    assert child['word_tokens'] == 3283
    assert child['word_types'] == 397
    assert round(child['mlu'], 3) == 3.083
    parent = speech.result(22, 6, 'P')
    assert parent['utterances'] == 1252
    assert parent['word_tokens'] == 5604
    assert parent['word_types'] == 650
    assert round(parent['mlu'], 3) == 4.476

    speech.query('subject=125 and session=11')
    child = speech.result(125, 11, "C")
    assert child['word_tokens'] == 1933
    assert child['word_types'] == 353
    assert child['utterances'] == 499

    speech_uniq_tokens = Report(lemmatize=False)
    speech_uniq_tokens.query('subject=125 and session=11')
    child = speech_uniq_tokens.result(125, 11, "C")
    assert child['word_tokens'] == 1933
    assert child['word_types'] == 400
    assert child['utterances'] == 499
Exemple #5
0
from ldp.speech import Report

speech = Report()
speech.query('session in ("11", "12")', project=2)
speech.report(header=True)