def test_lexicon(): ''' Testing lexical tracking feature that can be enabled when querying. speech = Report() speech.query('subject=125 and session=10', lexicon=True) This should enable the tracking of word types used by a speaker ('C' for child or 'P' for parent) at a given visit (subj, sess): word_types = speech.lexicon[(125, 10, 'P')] This return the **set** of word types used by the parent speaker in the transcript of the recorded visit for subject 125 at session 10. ''' speech = Report() speech.query('subject=125 and session=10', lexicon=True) speech.query('subject=125 and session=11', lexicon=True) A = speech.lexicon[(125, 10, 'P')] B = speech.lexicon[(125, 11, 'P')] assert len(A) == 326 assert len(B) == 271 B.update(A) # union of A and B assert len(B) == 457
def test_subject_report(): ''' Testing the SubjectReport class. ''' speech = Report() speech.query('subject=125 and session=11') A = speech.result(125, 11, "C") subject = SubjectReport(125, min_session=10, max_session=11) subject.query() results = subject.results() B = results[(125, 11, 'C')] assert A['word_tokens'] == B['word_tokens'] assert A['word_types'] == B['word_types'] assert A['utterances'] == B['utterances']
from ldp.speech import Report speech = Report() speech.query('session in (8, 9)', project=2) columns = 'subject session mlu'.split(' ') # columns to print def pprint(args): # pretty-print print "\t".join(str(i) for i in args) pprint(c.upper() for c in columns) # header for subj, sess in sorted(speech.trans): # iterate over transcript IDs row = speech.result(subj, sess, 'C') # get child result pprint(row[i] for i in columns) # pretty-print specified cols
def test_report(): ''' Testing the Report class ''' speech = Report() # initialize general speech report speech.query('subject=22 and session=6') child = speech.result(22, 6, 'C') assert child['subject'] == 22 assert child['session'] == 6 assert child['utterances'] == 1065 assert child['word_tokens'] == 3283 assert child['word_types'] == 397 assert round(child['mlu'], 3) == 3.083 parent = speech.result(22, 6, 'P') assert parent['utterances'] == 1252 assert parent['word_tokens'] == 5604 assert parent['word_types'] == 650 assert round(parent['mlu'], 3) == 4.476 speech.query('subject=125 and session=11') child = speech.result(125, 11, "C") assert child['word_tokens'] == 1933 assert child['word_types'] == 353 assert child['utterances'] == 499 speech_uniq_tokens = Report(lemmatize=False) speech_uniq_tokens.query('subject=125 and session=11') child = speech_uniq_tokens.result(125, 11, "C") assert child['word_tokens'] == 1933 assert child['word_types'] == 400 assert child['utterances'] == 499
from ldp.speech import Report speech = Report() speech.query('session in ("11", "12")', project=2) speech.report(header=True)