Ejemplo n.º 1
0
from ldp.gesture import LRB, GestureType
from ldp.data import Utterances, Subjects
from util.count import FeatureCounter

lrb = LRB()
gtype = GestureType()
subjects = Subjects()
utterances = Utterances()
count = FeatureCounter('Subject', 'Session', 'Project', 'Gesture')

P2 = set(subjects.project(2))

columns = 'subject, session, c_lrb, c_g_type'
filter  = 'session in (1,2,3,4,5,8) and c_lrb != ""'

def pprint(args): print "\t".join(args)

for subj, sess, h, g in utterances(columns, filter, limit=''): 
    proj = 2 if subj in P2 else 3
    H = lrb.valid_values(h.upper())
    G = gtype.valid_values(g, subcodes=False)
    for (h, g) in zip(H, G):
        code = "{0}+{1}".format(h, g)
        count(subj, sess, proj, code) 

count.print_report('Gesture')
Ejemplo n.º 2
0
import re
from nlp import Tokenizer
from ldp.data import Utterances
from util.count import FeatureCounter

count = FeatureCounter('Subject', 'Session', 'Speaker', 'Word')
utterances = Utterances()
parse = Tokenizer()

words = [word.rstrip() for word in open('words.txt')]
rgx = re.compile(r'\b(?:' + '|'.join(words) + r')\b')

columns = 'subject, session, row, p_utts, c_utts'
where = 'session < 8'

# pretty-print with tab delims
def pprint(*args):
    print '\t'.join(str(x) for x in args)

pprint(*'SUBJ SESS SPKR ROW UTT MATCH'.split(' '))      # header

for subj, sess, row, p, c in utterances(columns, where, project=2):
    for spkr, utt in [('P', p), ('C', c)]:
        matches = rgx.findall(utt)
        for word in matches:
            count(subj, sess, spkr, word)
        if matches:
            pprint(subj, sess, spkr, row, utt, ', '.join(matches))

print
count.print_report('Word')
Ejemplo n.º 3
0
import re
from ldp.data import Utterances
from nlp.lex import Tokenizer
from util.count import FeatureCounter

utts = Utterances()
tokenize = Tokenizer()
count = FeatureCounter('Subject', 'Session', 'Speaker')
columns = 'subject, session, key, c_utts, p_utts'

wordchar = re.compile(r'\w')
grandmother = re.compile(r'G')
father = re.compile(r'F|@')


for subj, sess, key, c, p in utts(columns, 
                                  where='session in ("11", "12")',
                                  limit='', 
                                  project=2):
    for spkr, utt in [('CHILD', c), ('MOTHER', p)]:
        if spkr == 'MOTHER':
            if father.search(key): 
                spkr = 'FATHER'
            elif grandmother.search(key): 
                spkr = 'GRANDMOTHER'
        for t in tokenize(utt):
            if wordchar.search(t): count(subj, sess, spkr)

count.print_report('Speaker')
Ejemplo n.º 4
0
from ldp.gesture import LRB
from ldp.data import Utterances, Subjects
from util.count import FeatureCounter

lrb = LRB()
subjects = Subjects()
utterances = Utterances()
count = FeatureCounter('Subject', 'Session', 'Project', 'LRB')

P2 = set(subjects.project(2))

columns = 'subject, session, c_lrb, c_g_type'
filter  = 'session in (1,2,3,4,5,8) and c_lrb != ""'

def pprint(args): print "\t".join(args)

for subj, sess, h, g in utterances(columns, filter, limit=''): 
    proj = 2 if subj in P2 else 3
    for h in lrb.valid_values(h.upper()): 
        count(subj, sess, proj, h) 

count.print_report('LRB')