def __init__(self): self.p = Printer(1) self.param = Params() self.m = Math() self.am = AudioManager() self.paths = Paths() self.trainingDesc, self.testingDesc = self.scanForAudioFiles()
def __init__(self): self.paths = Paths(); self.param = Params(); self.pc = PrintConfig(); self.p = Printer(1); self.am = AudioManager(); self.m = Math(); self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2); self.data = self.pickle.LoadData();
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1) self.data.raw = self.am.readAudio(self.paths.file)
def squareLength(self): """ Returns the square length of this `Vector`. :rtype: :class:`nodex.datatypes.Float`""" v = self ^ [2.0, 2.0, 2.0] # square all components v = Math.sum1D(v[0], v[1], v[2]) # sum all components return v
class PreProcessing: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1) self.data.raw = self.am.readAudio(self.paths.file) def filterPitch(self, p): for i, v in enumerate(p): if i == 0 or i == len(p) - 1: continue if p[i - 1] == 0 and v != 0 and p[i + 1] == 0: p[i] = 0 elif p[i - 1] != 0 and v == 0 and p[i + 1] != 0: p[i] = np.mean([p[i - 1], p[i + 1]]) return p def run(self, save=1): stp = self.param.step for step, idx in enumerate(range(0, len(self.data.raw), stp)): trama = self.data.raw[idx:idx + self.param.pf] tramaAC = self.m.autocorrelation(trama) power = np.sum(self.data.raw[idx:idx + stp]**2) / self.param.step pitch = 0 if (max(tramaAC[self.param.pi:self.param.pfN]) > self.param.threshold): pitch = np.argmax( tramaAC[self.param.pi:self.param.pfN]) + self.param.pi self.data.pitch.append(pitch) self.data.power.append(power) if (step in self.pc.stepInto1 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step) + "------------------ start", 1) self.p.prnt(4, str("In First Cycle"), 1) self.p.plot([(self.data.raw, 'speech', 'c', 0), (trama, 'trama', 'r', idx)]) self.p.plot([(trama, 'trama', 'c', 0)]) self.p.plot([(tramaAC, 'tramaAC', 'c', 0)]) self.p.plot([(tramaAC, 'tramaAC', 'c', 0)]) self.p.prnt(2, str(step) + "------------------ end", 1) if self.pc.stop1: input(" ...") self.data.pitch = self.filterPitch(self.data.pitch) if save: self.pickle.SaveData(self.data)
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag4, sTag=self.paths.tag5) self.cc = CodeConfig() self.cu = CodingUtils() self.encoded = self.pickle.LoadEncoded()
def __ge__(self, other): return Math.bimath(self, other, func=Math.greaterOrEqual)
def __div__(self, other): """ Divide all individual components of this array by the other Nodex """ return Math.bimath(self, other, func=Math.divide)
def __ne__(self, other): return Math.bimath(self, other, func=Math.notEqual)
def __add__(self, other): """ Add all individual components of this array to the other Nodex """ return Math.bimath(self, other, func=Math.sum)
def __mul__(self, other): """ Multiply all individual components of this array with the other Nodex """ return Math.bimath(self, other, func=Math.multiply)
def __sub__(self, other): """ Subtract the other `Nodex` from this instance using - """ return Math.bimath(self, other, func=Math.subtract)
def sign(self, **kwargs): """ Returns whether this value is greater or equal to zero. :rtype: :class:`nodex.datatypes.Float` """ return Math.greaterOrEqual(self, 0.0)
def __eq__(self, other): return Math.bimath(self, other, func=Math.equal)
def __pow__(self, other): """ Square all individual components of this array by the other Nodex """ return Math.bimath(self, other, func=Math.power)
def __sub__(self, other): """ Subtract all individual components of this array by the other Nodex """ return Math.bimath(self, other, func=Math.subtract)
def __le__(self, other): return Math.bimath(self, other, func=Math.lessOrEqual)
def __gt__(self, other): return Math.bimath(self, other, func=Math.greaterThan)
def __mul__(self, other): """ Multiply the other `Nodex` with this instance using * """ return Math.bimath(self, other, func=Math.multiply)
def __add__(self, other): """ (+ operator) Add to the other `Nodex` using + """ return Math.bimath(self, other, func=Math.sum)
def __lt__(self, other): return Math.bimath(self, other, func=Math.lessThan)
def abs(self, **kwargs): return Math.abs(self, **kwargs)
def __div__(self, other): """ Divide this instance by other `Nodex` using / """ return Math.bimath(self, other, func=Math.divide)
def __pow__(self, other): """ Square this instance by other `Nodex` using pow() """ return Math.bimath(self, other, func=Math.power)
class LpcProcessing: def __init__(self): self.paths = Paths(); self.param = Params(); self.pc = PrintConfig(); self.p = Printer(1); self.am = AudioManager(); self.m = Math(); self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2); self.data = self.pickle.LoadData(); def calculateLpcCoefs(self, data): toep = scipy.linalg.toeplitz(data[0:-1]) a = np.linalg.inv(toep).dot(-data[1:]) return a def calculateGain(self, s, Rs0, lpcCoef): Rs = self.m.autocorrelation(s)[1:self.param.p] G = np.sqrt((1 + lpcCoef.dot(Rs))*Rs0) return G def voicedPreprocesing(self,data): # High pass filter dLen = len(data) spp = data[1:dLen] - self.param.u * data[0:dLen-1] spp = np.insert(spp, 0, spp[0] ) return spp def run(self, save=1): stp = self.param.step for step, idx in enumerate(range(0,len(self.data.raw),stp)): pitch = self.data.pitch[step] trama = self.data.raw[idx:idx+self.param.pf] h = np.zeros(self.param.pf) tramaHp = trama if pitch: tramaHp = self.voicedPreprocesing(trama) ham = np.hamming(len(tramaHp)) tramaHpHam = tramaHp*ham tramaHpHamAc = self.m.autocorrelation(tramaHpHam) tramaHpHamAcP=tramaHpHamAc[0:self.param.p] lpcCoefs = self.calculateLpcCoefs(tramaHpHamAcP) energy = self.data.power[step]*self.param.step G = self.calculateGain(trama, energy, lpcCoefs) for it, t in enumerate(trama): if it<self.param.p-1: h[it] = 0 elif it==self.param.p: h[it] = G else: for ic, c in enumerate(lpcCoefs): h[it] -= c*h[it-ic-1] hShift = np.append( h[self.param.p-1:], np.zeros(self.param.p-1)) hShiftFft = np.fft.fft(hShift) trFft = np.fft.fft(tramaHp) hShiftAc = self.m.autocorrelation(hShift) self.data.gain.append(G) if step==0: self.data.lpc = lpcCoefs else: self.data.lpc = np.vstack([self.data.lpc, lpcCoefs]) if (step in self.pc.stepInto2 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step)+"------------------ start", 1) self.p.prnt(4, str("In Second Cycle"), 1) self.p.prnt(6, "Current voice pitch: " +str(self.data.pitch[step]), 1) self.p.plot([(self.data.raw, 'speech', 'y', 0),(trama, 'trama', 'r', idx)]) ptrFft = 20*np.log10(trFft[0:int(len(trFft)/2)]) phShiftFft = 20*np.log10(hShiftFft[0:int(len(hShiftFft)/2)]) self.p.plot([(tramaHp, 'trama - high pass', 'b', 0)]) self.p.plot([(tramaHpHam, 'trama - hamming', 'b', 0)]) self.p.plot([(tramaHpHamAc, 'trama - auto correlation', 'b', 0),(tramaHpHamAc[0:self.param.p], 'trama - auto correlation (p='+str(self.param.p)+")", 'r', 0)]) self.p.plot([(h, 'h', 'm', 0)]) self.p.plot([(hShift, 'hShift', 'm', 0)]) self.p.plot([(ptrFft, 'trFft dB', 'b', 0),(phShiftFft, 'phShiftFft dB', 'r', 0)],0) self.p.plot([(hShiftAc[:30], 'hShiftAc 30', 'r', 0),(tramaHpHamAc[:30], 'tramaHpHamAc 30', 'b', 0)],0) self.p.plot([(hShiftAc[:self.param.p], 'hShiftAc p', 'r', 0),(tramaHpHamAc[:self.param.p], 'tramaHpHamAc p', 'b', 0)],0) self.p.prnt(2, str(step)+"------------------ end", 1) if self.pc.stop2: input(" ...") self.data.pitch = np.mat(self.data.pitch).T self.data.gain = np.mat(self.data.gain).T if save: self.pickle.SaveData(self.data)
class Preprocess: def __init__(self): self.p = Printer(1) self.param = Params() self.m = Math() self.am = AudioManager() self.paths = Paths() self.trainingDesc, self.testingDesc = self.scanForAudioFiles() def scanForAudioFiles(self): trainPaths = self.am.scanDirectory(self.paths.folderTrain) testPaths = self.am.scanDirectory(self.paths.folderTest) return trainPaths, testPaths def readAudioFile(self, desc): path = desc[2] raw = self.am.readAudio(path) return raw def getSignalEnergy(self, raw): energy = [] stp = self.param.step for step, idx in enumerate(range(0, len(raw), stp)): e = np.sum(raw[idx:idx + stp]**2) energy.append(e) return energy # def getSpeech(self, raw, energy): rawAbs = abs(raw) stp = self.param.step whiteNoiseRef = 100 activationScale = [50, 1000, 100] dectivationScale = [100] activated = [0, 0, 0] lastActivated = 0 spans = [] span = [] spanMaxRef = [] maxRef = 0 maxRaw = [] for i, e in enumerate(energy): mx = max(rawAbs[:(i + 1) * stp]) if i == 0 else max(rawAbs[(i) * stp:(i + 1) * stp]) maxRaw.append(mx) for i, e in enumerate(energy): wait = 0 # passed the minimum activation if e >= whiteNoiseRef * activationScale[0] and activated[0] == 0: activated[0] = 1 lastActivated = i # bellow the deactivation value elif e < whiteNoiseRef * dectivationScale[ 0] and i - lastActivated > wait and activated[0] == 1: if activated[0] == 1 and activated[1] == 1: span = [lastActivated * stp, i * stp] spans.append(span) spanMaxRef.append(maxRef) activated = [0, 0, 0] maxRef = maxRaw[i] # passed the second activation if activated[0] == 1 and e >= whiteNoiseRef * activationScale[1]: activated[1] = 1 maxRef = max([maxRef, maxRaw[i]]) # join spans, which are close joinedSpans = [] joinedSpanMaxRef = [] join = [] jmaxRef = 0 maxG = 1000 for i, s in enumerate(spans): if i == 0: join = [s[0], s[1]] jmaxRef = spanMaxRef[i] elif s[0] - join[1] < maxG: join[1] = s[1] jmaxRef = max([jmaxRef, spanMaxRef[i]]) else: joinedSpans.append(join) joinedSpanMaxRef.append(jmaxRef) jmaxRef = spanMaxRef[i] join = s if i == len(spans) - 1 and len(join) == 2: joinedSpans.append(join) joinedSpanMaxRef.append(jmaxRef) # remove short spans minL = 1500 longEnoughSpans = [] longEnoughMaxRef = [] for i, s in enumerate(joinedSpans): if s[1] - s[0] > minL: longEnoughSpans.append(s) longEnoughMaxRef.append(joinedSpanMaxRef[i]) # the most probable span bestSpan = [longEnoughSpans[np.argmax(longEnoughMaxRef)]] speech = [] speechIdx = [] for s in bestSpan: speech.append(raw[s[0]:s[1]]) speechIdx.append(s[0]) return speech, speechIdx def extractSpeech(self, desc, visu=False): raw = self.readAudioFile(desc) energy = self.getSignalEnergy(raw) speech, speechIdx = self.getSpeech(raw, energy) if (visu): title = str(" content: " + desc[1][0]) + " | orator: " + str( desc[1][2]) + " | version: " + str(desc[1][1]) self.p.plotSpeech(raw, speech, speechIdx, title) return speech def getDistanceMap(self, sR, sT): R = len(sR) T = len(sT) D = np.zeros([R, T]) for r in range(R): for t in range(T): tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R))) tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R))) if tMin <= t and t <= tMax: D[r, t] = np.sqrt((sR[r] - sT[t])**2) else: D[r, t] = np.Inf return D def processSpeech(self, raw): stp = self.param.step wndw = self.param.window p = self.param.p tramasAC = [] for step, idx in enumerate(range(0, len(raw), stp)): trama = raw[idx:idx + wndw] if len(trama) < wndw: expTrama = trama for i in range(0, np.ceil((wndw / len(trama)) - 1).astype(int)): expTrama = np.hstack([expTrama, trama]) expTrama = expTrama[0:wndw] trama = expTrama tAC = self.m.autocorrelation(trama) ptAC = tAC[:p] tramasAC = np.vstack([ptAC] if step == 0 else [tramasAC, ptAC]) # if step == 20: # self.p.plot([ (tAC, 'all', 'b*-', 0), (ptAC, 'order p='+str(p), 'y', 0) ], 0, 'Autocorrelation of segment'); # title = str( " trama: " + str(step)) # self.p.plot([ (raw, 'speech', 'r', 0), (trama, 'segment ', 'b', idx) ], 0, title) return tramasAC def getExpandedDistanceMap(self, D): eD = np.zeros(np.array(D.shape) + 1) + np.Inf eD[1:, 1:] = D eD[0, 0] = 0 return eD def getDistanceMapOfAc(self, sR, sT): R = len(sR) T = len(sT) D = np.zeros([R, T]) for r in range(R): for t in range(T): tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R))) tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R))) if not (tMin <= t + 1 and t - 1 <= tMax): D[r, t] = np.Inf else: D[r, t] = (sum((sT[t] - sR[r])**2)**(0.5)) return D def stepOne(self, dist, position, arround): dim = arround.shape if 2 < sum(dim): dirs = np.array([]) if 1 < dim[0] and 1 < dim[1]: dirs = np.array([[1, 0], [0, 1], [1, 1]]) elif 1 < dim[0]: dirs = np.array([[1, 0]]) elif 1 < dim[1]: dirs = np.array([[0, 1]]) minDir = dirs[0] minVal = arround[minDir[0], minDir[1]] for d in dirs: thisVal = arround[d[0], d[1]] if thisVal <= minVal: minDir = d minVal = thisVal dist = dist + minVal position = position + minDir return position, dist, minVal def getDistanceRoute(self, expD): target = expD.shape Route = np.zeros(expD.shape) expDRoute = np.array(expD) baseline = 0.5 pos = np.array([0, 0]) dist = 0 Route[pos[0], pos[1]] = baseline step = 0 while ((target[0] - 1) - pos[0] + (target[0] - 1) - pos[0]) != 0: around = expD[pos[0]:pos[0] + 2, pos[1]:pos[1] + 2] pos, dist, delta = self.stepOne(dist, pos, around) step = step + 1 Route[pos[0], pos[1]] = baseline + delta expDRoute[pos[0], pos[1]] = expDRoute[pos[0], pos[1]] + 3 globalDist = np.inf if 0 < step: globalDist = dist / step return globalDist, Route, expDRoute def getDistance(self, sR, sT): D = self.getDistanceMapOfAc(sR, sT) expD = self.getExpandedDistanceMap(D) globalDist, route, expdRoute = self.getDistanceRoute(expD) return globalDist, expD, route, expdRoute def processAll(self, descs): ACs = [] for d in descs: speech = self.extractSpeech(d, False)[0] speechAC = self.processSpeech(speech) ACs.append(speechAC) return ACs def compareAC(self, speechA_AC, speechB_AC): globalDistance, expD, route, expdRoute = self.getDistance( speechA_AC, speechB_AC) return globalDistance, expD, route, expdRoute def compare(self, descA, descB, visu=False, speechAlreadyProcessed=False): speechA = self.extractSpeech(descA, visu)[0] speechB = self.extractSpeech(descB, visu)[0] speechA_AC = self.processSpeech(speechA) speechB_AC = self.processSpeech(speechB) globalDistance, expD, route, expdRoute = self.compareAC( speechA_AC, speechB_AC) return globalDistance, expD, route, expdRoute def compare1toN(self, one, many, visu=False): dA = one for dK in many: globalDistance, expD, route, expdRoute = self.compare( dA, dK, visu) if (visu): self.p.imShow( expD, "expD of " + str(dA[1]) + " v " + str(dK[1]) + " dist=" + str(round(globalDistance, 3))) self.p.imShow( expdRoute, "expdRoute of " + str(dA[1]) + " v " + str(dK[1]) + " dist=" + str(round(globalDistance, 3))) self.p.imShow( route, "route of " + str(dA[1]) + " v " + str(dK[1]) + " dist=" + str(round(globalDistance, 3))) def compareTestToTrain(self, test, train, visu=False): testACs = self.processAll(test) trainACs = self.processAll(train) rows = len(test) cols = len(train) scoreMap = np.zeros([rows, cols]) matchMap = np.zeros([rows, cols]) matchScoreMap = np.zeros([rows * 3, cols]) - np.inf # print("compareTestToTrain 1 - ", testACs) matchCount = 0 testCount = 0 print("compareTestToTrain - ", len(test[0]), len(train[0])) for i, iTest in enumerate(test): iexp = i * 3 iTestAC = testACs[i] scores = [] for j, jTrain in enumerate(train): jTrainAC = trainACs[j] globalDistance, expD, route, expdRoute = self.compareAC( iTestAC, jTrainAC) scoreMap[i, j] = globalDistance matchScoreMap[iexp, j] = globalDistance scores.append(globalDistance) isSame = iTest[1][0] == jTrain[1][0] matchScoreMap[iexp + 1, j] = 1 if isSame else np.inf # print(" -> ", i, j, " - ", iTest[1][0]," v ", jTrain[1][0], " \t",round(globalDistance, 3) ) lowestScoreIdx = np.argmin(scores) isMatch = iTest[1][0] == train[lowestScoreIdx][1][0] matchCount = matchCount + (1 if isMatch else 0) testCount = testCount + 1 print(" -> ", i, lowestScoreIdx, " - ", iTest[1][0], " v ", train[lowestScoreIdx][1][0], " \t", round(scores[lowestScoreIdx], 3), " \t", isMatch) matchMap[i, lowestScoreIdx] = 1 * (1 if isMatch else -1) matchScoreMap[iexp + 1, lowestScoreIdx] = 1 * (2 if isMatch else -0.5) matchRatio = (matchCount / testCount) if testCount != 0 else 0 self.p.imShow(scoreMap, "scoreMap ") self.p.imShow(matchMap, "matchMap ") self.p.imShow(matchScoreMap, "matchScoreMap ") print(" matchRatio ", matchRatio, " ", matchCount, testCount) def compareAll(self, data): numOfSamples = len(data) confusionMap = np.ones([numOfSamples, numOfSamples]) # matchMap = np.ones([numOfSamples, numOfSamples]) matchMap = np.zeros([numOfSamples, numOfSamples]) xMap = np.zeros([numOfSamples, numOfSamples]) ACs = self.processAll(data) for i, di in enumerate(data): iAC = ACs[i] iValue = di[1][0] for j, dj in enumerate(data): if i > j - 1: jValue = dj[1][0] jAC = ACs[j] globalDistance, expD, route, expdRoute = self.compareAC( iAC, jAC) confusionMap[i, j] = globalDistance confusionMap[j, i] = globalDistance matchMap[i, j] = (jValue == iValue) and (globalDistance < 0.8) xMap[i, j] = (globalDistance < 0.5) print(" -> ", i, j, " - ", di[1][0], " v ", dj[1][0], " \t", round(globalDistance, 3)) self.p.imShow(confusionMap, "confusionMap ") self.p.imShow(matchMap, "matchMap ") self.p.imShow(xMap, "xMap ") def run(self): # self.compare1toN(self.testingDesc[21], [self.trainingDesc[30]], True) # self.compare1toN(self.trainingDesc[1], [self.trainingDesc[0]], True) # self.compareAll(self.trainingDesc[:40]) self.compareTestToTrain(self.testingDesc[:], self.trainingDesc[:])
def abs(self, **kwargs): """ Returns the absolute value of this value. :rtype: :class:`nodex.datatypes.Float` """ return Math.abs(self, **kwargs)