예제 #1
0
 def test_metadata(self):
     doc = ElementTree(file='fixtures/metadata.xml')
     parser = MorParser()
     metadata = parser.parse_metadata(doc)
     self.assertEqual(metadata, {
         'lang': 'eng',
         'corpus': 'manchester',
         'date': '1984-01-01',
         'participants': [
             {
                 'id': 'INV',
                 'name': 'Caroline',
                 'role': 'Investigator',
                 'language': 'eng'
             },
             {
                 'id': 'CHI',
                 'name': 'Nicole',
                 'role': 'Target_Child',
                 'language': 'eng',
                 'age': 'P2Y10M8D',
                 'sex': 'female',
                 'group': 'typical',
                 'SES' :'MC'
             },
             {
                 'id': 'MOT',
                 'name': 'Mother',
                 'language': 'eng'
             }]})
예제 #2
0
    def test_missing_pos(self):
        parser = MorParser()
        for uid, speaker, tokens in parser.parse("fixtures/missing_pos.xml"):

            for token in tokens:
               #print(token.word + '/' + token.pos + '|' + token.stem)
               self.assertNotEqual(token.pos, 'unk',
                                   'failed to parse known tag')
예제 #3
0
 def test_compound(self):
     comps = self.compounds.findall("w")
     self.assertEqual(5, len(comps))
     for word in comps:
         parser = MorParser()
         parser.namespace = ""
         parts = parser.parse_mor_element(word, word.find('mor'))
         self.assertGreaterEqual(parts[0].stem.count("_"), 1)
예제 #4
0
 def test_clitics(self):
     parser = MorParser()
     for uid, speaker, tokens in parser.parse("fixtures/clitics.xml"):
         self.assertGreater(len(tokens), 1,
                            "failed splitting {0} into clitics".format(tokens))
         self.assertNotIn("?", [w.word for w in tokens])
     self.assertEqual(' '.join(map(str, tokens)),
                      ("hidden/part|hide&PERF away/adv|away where/adv:wh|where "
                       "nobody/pro:indef|nobody 'd/mod|genmod be/v:cop|be ./.|."))
     head, tail = parser.split_clitic_wordform("that's")
     self.assertEqual(head, "that")
     self.assertEqual(tail, ["'s"])
예제 #5
0
 def test_commas(self):
     parser = MorParser()
     for uid, speaker, tokens in parser.parse("fixtures/commas.xml"):
         self.assertIn(',', [word.stem for word in tokens])
예제 #6
0
 def test_document(self):
     parser = MorParser()
     for i in parser.parse("fixtures/test_doc.xml"):
         # iterate through an ensure no exceptions are thrown
         pass
예제 #7
0
from talkbank_parser import MorParser
import glob
import illegal_utterance_filter

parser = MorParser()


def age_transform(metadata):
    '''transform the original age into age in months'''
    ageday = 0
    for i in metadata:  # get the location of 'Y', 'M', 'D'
        if i == 'Y':
            loc_Y = metadata.index(i)
        elif i == 'M':
            loc_M = metadata.index(i)
        elif i == 'D':
            loc_D = metadata.index(i)
    for i in metadata:  # get the number before 'Y', 'M', and 'D'
        if i == 'Y':
            age_year = int(metadata[loc_Y - 1])
        elif i == 'M':
            age_month = int(metadata[loc_Y + 1:loc_M])
        elif i == 'D':
            age_day = int(metadata[loc_M + 1:loc_D])
            if age_day >= 15:
                ageday = 1
            else:
                ageday = 0
    age_transformed = age_year * 12 + age_month + ageday
    return age_transformed
예제 #8
0
 def test_compound(self):
     for word in self.compounds.findall("w/mor"):
         parser = MorParser()
         parser.namespace = ""
         parts = parser.parse_mor_element(None, word)
         self.assertGreaterEqual(parts[0].stem.count("_"), 1)