def main(): print(ch.chebyshev(rd.random(10), rd.random(10))) print(hm.hamming(rd.random(10), rd.random(10))) print(mk.L_p(rd.random(10), rd.random(10), 0)) print(mk.L_p(rd.random(10), rd.random(10), 1)) print(mk.L_p(rd.random(10), rd.random(10), 2)) print(ma.mahalanobis(rd.random(10), rd.random(10))) print(ie.information_entropy((1, 2, 3, 4), (0.1, 0.2, 0.5, 0.7))) print(jc.jaccard_distance((1, 3, 4), (2, 4, 6))) print(jc.jaccard_similarity_coefficient((1, 3, 4), (1, 4, 6))) print(cs.cosine((1, 3, 4, 5), (2, 3, 4, 5)))
def pruneVowels(vowels, vowel, vowelMeans, vowelCovs, outlie): """ Tries to prune outlier vowels, making sure enough tokens are left to calculate mahalanobis distance. """ enough = False while not enough: outtokens = [] for token in vowels[vowel]: x = np.array(token) dist = mahalanobis(x, vowelMeans[vowel], vowelCovs[vowel]) if dist ** 2 <= outlie: outtokens.append(token) if len(outtokens) >= 10: enough = True else: outlie = outlie + 0.5 return(outtokens)
def repredictF1F2(measurements, vowelMeans, vowelCovs, vowels): """ Predicts F1 and F2 from the speaker's own vowel distributions based on the mahalanobis distance. """ # print "\nREMEASURING..." remeasurements = [] for vm in measurements: valuesList = [] distanceList = [] nFormantsList = [] vowel = vm.cd # if no remeasurement takes place, the new winner index will be automatically zero (see the three cases listed below) # but we actually want to keep the old values for the formant tracks keepOldTracks = True for i in range(len(vm.poles)): if len(vm.poles[i]) >= 2: F1 = vm.poles[i][0] F2 = vm.poles[i][1] if len(vm.poles[i]) >= 3 and vm.poles[i][2]: # could be "None" F3 = vm.poles[i][2] else: F3 = "NA" B1 = math.log(vm.bandwidths[i][0]) B2 = math.log(vm.bandwidths[i][1]) if len(vm.bandwidths[i]) >= 3 and vm.poles[i][2]: B3 = vm.bandwidths[i][2] else: B3 = "NA" ##nFormants = len(vm.poles[i]) lDur = math.log(vm.dur) values = [F1, F2, B1, B2, lDur] outvalues = [F1, F2, F3, B1, B2, B3, lDur] x = np.array(values) # If there is only one member of a vowel category, # the covariance matrix will be filled with NAs # sys.stderr.write(vowel+"\n") if vowel in vowelCovs: # if no re-measurement is to take place for one of the three reasons below, the list of candidate measurements and nFormants # will be filled with four identical copies of the original measurement, all with a distance of zero # so that the original measurement is guaranteed to be # re-selected if np.isnan(vowelCovs[vowel][0, 0]): valuesList.append( [float(vm.f1), float(vm.f2), vm.f3, math.log(float(vm.b1)), math.log(float(vm.b2)), vm.b3, lDur]) distanceList.append(0) nFormantsList.append(vm.nFormants) elif len(vowels[vowel]) < 7: valuesList.append( [float(vm.f1), float(vm.f2), vm.f3, math.log(float(vm.b1)), math.log(float(vm.b2)), vm.b3, lDur]) distanceList.append(0) nFormantsList.append(vm.nFormants) # "real" re-measurement else: dist = mahalanobis( x, vowelMeans[vowel], vowelCovs[vowel]) valuesList.append(outvalues) distanceList.append(dist) nFormantsList.append( i + 3) # these are the formant setting used, not the actual number of formants returned keepOldTracks = False else: valuesList.append( [float(vm.f1), float(vm.f2), vm.f3, math.log(float(vm.b1)), math.log(float(vm.b2)), vm.b3, lDur]) distanceList.append(0) nFormantsList.append(i + 3) winnerIndex = distanceList.index(min(distanceList)) dist = repr(min(distanceList)) bestValues = valuesList[winnerIndex] bestnFormants = nFormantsList[winnerIndex] # if bestnFormants != vm.nFormants: # print "\tVowel %s in word %s was originally measured with nFormants = %s; now measured with nFormants = %s!" % (vm.phone, vm.word, vm.nFormants, bestnFormants) # print "\told formant tracks:\n\t\t%s" % vm.tracks # print "\tnew formant tracks:\n\t\t%s\n" % vm.all_tracks[winnerIndex] # change formants and bandwidths to new values vm.f1 = round(bestValues[0], 1) vm.f2 = round(bestValues[1], 1) if bestValues[2] != "NA" and bestValues[2] != None: vm.f3 = round(bestValues[2], 1) else: vm.f3 = '' vm.b1 = round(math.exp(bestValues[3]), 1) vm.b2 = round(math.exp(bestValues[4]), 1) if bestValues[5] != "NA" and bestValues[5] != None: try: vm.b3 = round(bestValues[5], 1) except OverflowError: vm.b3 = '' else: vm.b3 = '' vm.nFormants = bestnFormants # change formant tracks to new values as well if not keepOldTracks: vm.tracks = vm.all_tracks[winnerIndex] vm.winner_poles = vm.all_poles[winnerIndex] vm.winner_bandwidths = vm.all_bandwidths[winnerIndex] remeasurements.append(vm) return remeasurements