def getNbVowels(_utt): i = 0 for phone in _utt.phoneList: if utils.isVowel(phone).find('-1') != -1 or \ utils.isVowel(phone).find('0') != -1 or \ phone.end == _utt.end: pass else: i = i + 1 return i
def extractVowelDur(_textgridFilename): """ This function extracts the durations of the vowels in a word Input _textgridFilename filename of the textgrid to read Output vowelDur list of viwel durations in a word """ currUtterance = utils.parseTextgrid(_textgridFilename) currPhoneList = currUtterance.phoneList vowelDur = [] for phonei in currPhoneList: if utils.isVowel(phonei).find('-1') != -1 or \ utils.isVowel(phonei).find('0') != -1 or \ phonei.end == currUtterance.end: pass else: dur = float(phonei.end) - float(phonei.start) vowelDur.append(dur) return vowelDur
def getConsecutiveVowelRatio(_textgridFilename): """ This function extracts the ratio between each consecutive pair of vowel read from the textGrid file given in input """ vowelRatio = [] vowelDur = [] currUtterance = utils.parseTextgrid(_textgridFilename) currPhoneList = currUtterance.phoneList for phonei in currPhoneList: if utils.isVowel(phonei).find('-1') != -1 or \ utils.isVowel(phonei).find('0') != -1 or \ phonei.end == currUtterance.end: pass else: dur = float(phonei.end) - float(phonei.start) vowelDur.append(dur) for i in range(len(vowelDur) - 1): ratio = vowelDur[i + 1] / vowelDur[i] vowelRatio.append(ratio) return vowelRatio
def getDurationVec(_textgridFilename): """ This function returns the list of 25 durations according to the phone nature of the current sample point (vowel, consonant, insertion) Input _textgridFilename filename of the textgrid file to parse Output durations list of 25 durations """ listOfNatures = [] # Size should be 25 durations = [] # Size should be 25 currentUtterance = utils.parseTextgrid(_textgridFilename) # Time step nbPoints = 25 timeStep = (float(currentUtterance.end) - float(currentUtterance.start)) / nbPoints currentPhoneList = currentUtterance.phoneList phoneIndex = 0 for i in range(1, nbPoints + 1): mid = (timeStep * i - timeStep * (i - 1)) / 2 if (float(currentPhoneList[phoneIndex].end) - float(currentPhoneList[0].start) > (timeStep * (i - 1) + mid)): listOfNatures.append(utils.isVowel(currentPhoneList[phoneIndex])) if utils.isVowel(currentPhoneList[phoneIndex]).find('0') != -1 or \ utils.isVowel(currentPhoneList[phoneIndex]).find('-1') != -1 or \ phoneIndex == (len(currentPhoneList) -1): durations.append(0) else: dur = float(currentPhoneList[phoneIndex].end) - float( currentPhoneList[phoneIndex].start) durations.append(dur) else: phoneIndex += 1 listOfNatures.append(utils.isVowel(currentPhoneList[phoneIndex])) if utils.isVowel(currentPhoneList[phoneIndex]).find('0') != -1 or\ utils.isVowel(currentPhoneList[phoneIndex]).find('-1') != -1 or\ phoneIndex == (len(currentPhoneList) -1): durations.append(0) else: dur = float(currentPhoneList[phoneIndex].end) - float( currentPhoneList[phoneIndex].start) durations.append(dur) return durations
def getArticleFeatures(parsedSents, sentences): errors = [] features = [] headList = [] for i in range(len(parsedSents)): if parsedSents[i] == None: continue s = parsedSents[i] sent = sentences[i] sHeads = set() for chunk in s.chunks: if chunk.type == 'NP': #filter NP chunk size bigger than 4 if len(chunk.words) > 4: continue head = utils.getHeadFeatures(chunk) if head == None or head.type == 'PRP': continue #ignore PRP and None #get NP article article, s_article = utils.getArticle(chunk) if s_article == 'O': continue #ignore DT which is not a, an or the #precheck a/an singular and plural of this NP c_list = utils.getCountable(head) isUncount = False if c_list[0] == 'N' and c_list[1] == 'Y': isUncount = True error = utils.precheckArticle(article, sent, head, chunk, isUncount) if error != None: e = dict() e['start'] = error.start e['end'] = error.end e['output'] = error.output e['desc'] = error.description e['type'] = error.type_ e['original'] = error.original e['newSent'] = error.newSent errors.append(e) continue #for the rest of the NP chunk, extract features for classifier bef3pos, aft3pos = utils.getAround(head.index, s, article) bnp_word, bnp_pos = utils.getBeforeNP(chunk, s) anp_word, anp_pos = utils.getAfterNP(chunk, s) wordnet = utils.getWordNet(head) isPlural = utils.getCount(head) pp, pps = utils.prepModify(chunk) adj, adj_grade, pdt, prp, relation = utils.getChunkFeatures( chunk) ref = utils.getRef(headList, head, sHeads) vowel = utils.isVowel(chunk, article) #features feature = [] feature.append(s_article) feature.append(head.string) feature.append(head.lemma) feature.append(head.type) feature.append(isPlural) feature.append(wordnet) feature.append(pp) feature.append(pps) feature.append(adj) feature.append(adj_grade) feature.append(pdt) feature.append(prp) feature.append(relation) feature.append(ref) feature.append(bnp_word) feature.append(bnp_pos) feature.append(anp_word) feature.append(anp_pos) feature.extend(c_list) feature.extend(bef3pos) feature.extend(aft3pos) #index of NP chunk, head and article index = [] index.append(i) index.append(chunk.start) index.append(chunk.stop) index.append(head.index) if article == None: #if need article where to insert? index.append(-1) else: index.append(article.index) index.append(vowel) r = dict() r["feature"] = feature r["index"] = index features.append(r) sHeads.add(head.string.lower()) headList.append(sHeads) if len(headList) > 5: del headList[0] return errors, features