Ejemplo n.º 1
0
 def getNbVowels(_utt):
     i = 0
     for phone in _utt.phoneList:
         if utils.isVowel(phone).find('-1') != -1 or \
         utils.isVowel(phone).find('0') != -1 or \
         phone.end == _utt.end:
             pass
         else:
             i = i + 1
     return i
Ejemplo n.º 2
0
def extractVowelDur(_textgridFilename):
    """
	This function extracts the durations of the vowels in a word
	Input
		_textgridFilename 	filename of the textgrid to read
	Output
		vowelDur 	list of viwel durations in a word
	"""
    currUtterance = utils.parseTextgrid(_textgridFilename)
    currPhoneList = currUtterance.phoneList
    vowelDur = []
    for phonei in currPhoneList:
        if utils.isVowel(phonei).find('-1') != -1 or \
        utils.isVowel(phonei).find('0') != -1 or \
        phonei.end == currUtterance.end:
            pass
        else:
            dur = float(phonei.end) - float(phonei.start)
            vowelDur.append(dur)
    return vowelDur
Ejemplo n.º 3
0
def getConsecutiveVowelRatio(_textgridFilename):
    """
	This function extracts the ratio between each consecutive pair of vowel
	read from the textGrid file given in input
	"""
    vowelRatio = []
    vowelDur = []
    currUtterance = utils.parseTextgrid(_textgridFilename)
    currPhoneList = currUtterance.phoneList
    for phonei in currPhoneList:
        if utils.isVowel(phonei).find('-1') != -1 or \
        utils.isVowel(phonei).find('0') != -1 or \
        phonei.end == currUtterance.end:
            pass
        else:
            dur = float(phonei.end) - float(phonei.start)
            vowelDur.append(dur)

    for i in range(len(vowelDur) - 1):
        ratio = vowelDur[i + 1] / vowelDur[i]
        vowelRatio.append(ratio)

    return vowelRatio
Ejemplo n.º 4
0
def getDurationVec(_textgridFilename):
    """
	This function returns the list of 25 durations according to the
	phone nature of the current sample point (vowel, consonant, insertion)
	Input
		_textgridFilename 	filename of the textgrid file to parse
	Output
		durations 			list of 25 durations 
	"""
    listOfNatures = []  # Size should be 25
    durations = []  # Size should be 25
    currentUtterance = utils.parseTextgrid(_textgridFilename)
    # Time step
    nbPoints = 25
    timeStep = (float(currentUtterance.end) -
                float(currentUtterance.start)) / nbPoints
    currentPhoneList = currentUtterance.phoneList
    phoneIndex = 0
    for i in range(1, nbPoints + 1):
        mid = (timeStep * i - timeStep * (i - 1)) / 2
        if (float(currentPhoneList[phoneIndex].end) -
                float(currentPhoneList[0].start) > (timeStep * (i - 1) + mid)):
            listOfNatures.append(utils.isVowel(currentPhoneList[phoneIndex]))
            if utils.isVowel(currentPhoneList[phoneIndex]).find('0') != -1 or \
            utils.isVowel(currentPhoneList[phoneIndex]).find('-1') != -1 or \
            phoneIndex == (len(currentPhoneList) -1):
                durations.append(0)
            else:
                dur = float(currentPhoneList[phoneIndex].end) - float(
                    currentPhoneList[phoneIndex].start)
                durations.append(dur)
        else:
            phoneIndex += 1
            listOfNatures.append(utils.isVowel(currentPhoneList[phoneIndex]))
            if utils.isVowel(currentPhoneList[phoneIndex]).find('0') != -1 or\
            utils.isVowel(currentPhoneList[phoneIndex]).find('-1') != -1 or\
            phoneIndex == (len(currentPhoneList) -1):
                durations.append(0)
            else:
                dur = float(currentPhoneList[phoneIndex].end) - float(
                    currentPhoneList[phoneIndex].start)
                durations.append(dur)
    return durations
Ejemplo n.º 5
0
def getArticleFeatures(parsedSents, sentences):
    errors = []
    features = []
    headList = []
    for i in range(len(parsedSents)):
        if parsedSents[i] == None:
            continue
        s = parsedSents[i]
        sent = sentences[i]
        sHeads = set()
        for chunk in s.chunks:
            if chunk.type == 'NP':
                #filter NP chunk size bigger than 4
                if len(chunk.words) > 4:
                    continue
                head = utils.getHeadFeatures(chunk)
                if head == None or head.type == 'PRP':
                    continue  #ignore PRP and None

                #get NP article
                article, s_article = utils.getArticle(chunk)
                if s_article == 'O':
                    continue  #ignore DT which is not a, an or the

                #precheck a/an singular and plural of this NP
                c_list = utils.getCountable(head)
                isUncount = False
                if c_list[0] == 'N' and c_list[1] == 'Y':
                    isUncount = True
                error = utils.precheckArticle(article, sent, head, chunk,
                                              isUncount)
                if error != None:
                    e = dict()
                    e['start'] = error.start
                    e['end'] = error.end
                    e['output'] = error.output
                    e['desc'] = error.description
                    e['type'] = error.type_
                    e['original'] = error.original
                    e['newSent'] = error.newSent
                    errors.append(e)
                    continue

                #for the rest of the NP chunk, extract features for classifier
                bef3pos, aft3pos = utils.getAround(head.index, s, article)
                bnp_word, bnp_pos = utils.getBeforeNP(chunk, s)
                anp_word, anp_pos = utils.getAfterNP(chunk, s)
                wordnet = utils.getWordNet(head)
                isPlural = utils.getCount(head)
                pp, pps = utils.prepModify(chunk)
                adj, adj_grade, pdt, prp, relation = utils.getChunkFeatures(
                    chunk)
                ref = utils.getRef(headList, head, sHeads)
                vowel = utils.isVowel(chunk, article)

                #features
                feature = []
                feature.append(s_article)
                feature.append(head.string)
                feature.append(head.lemma)
                feature.append(head.type)
                feature.append(isPlural)
                feature.append(wordnet)
                feature.append(pp)
                feature.append(pps)
                feature.append(adj)
                feature.append(adj_grade)
                feature.append(pdt)
                feature.append(prp)
                feature.append(relation)
                feature.append(ref)
                feature.append(bnp_word)
                feature.append(bnp_pos)
                feature.append(anp_word)
                feature.append(anp_pos)
                feature.extend(c_list)
                feature.extend(bef3pos)
                feature.extend(aft3pos)

                #index of NP chunk, head and article
                index = []
                index.append(i)
                index.append(chunk.start)
                index.append(chunk.stop)
                index.append(head.index)
                if article == None:
                    #if need article where to insert?
                    index.append(-1)
                else:
                    index.append(article.index)
                index.append(vowel)
                r = dict()
                r["feature"] = feature
                r["index"] = index
                features.append(r)

                sHeads.add(head.string.lower())
        headList.append(sHeads)
        if len(headList) > 5:
            del headList[0]
    return errors, features