def __init__(self): self.p = Printer(1) self.param = Params() self.m = Math() self.am = AudioManager() self.paths = Paths() self.trainingDesc, self.testingDesc = self.scanForAudioFiles()
def __init__(self): self.paths = Paths(); self.param = Params(); self.pc = PrintConfig(); self.p = Printer(1); self.am = AudioManager(); self.m = Math(); self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2); self.data = self.pickle.LoadData();
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1) self.data.raw = self.am.readAudio(self.paths.file)
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.am = AudioManager() self.p = Printer(1) self.S = Synthesizer() self.pickle = Pickle(self.paths.pickle) self.decoded, self.original, self.coded = self.loadAll() self.cP, self.cG, self.cLpc = self.organize() self.cSn = self.SynthAll()
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag4, sTag=self.paths.tag5) self.cc = CodeConfig() self.cu = CodingUtils() self.encoded = self.pickle.LoadEncoded()
class Decoder: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag4, sTag=self.paths.tag5) self.cc = CodeConfig() self.cu = CodingUtils() self.encoded = self.pickle.LoadEncoded() def separateBins(self): binary = self.encoded.binaries innerL = [5, 7, 34] gain = [] pitch = [] lsp = [] for i, s in enumerate(range(0, len(binary), sum(innerL))): separated = [] eLast = s for e in innerL: separated.append(binary[eLast:eLast + e]) eLast = eLast + e gain.append(separated[0]) pitch.append(separated[1]) lsp.append(separated[2]) return gain, pitch, lsp def debinariseLsp(self, bLsp): frames = self.setupLspFrames() qLsp = [] bcLsp = [] for i, b in enumerate(bLsp): idx = 0 qlsp = [] bclsp = [] for j, f in enumerate(frames): bc = b[idx:idx + f.bits] qb = self.cu.debinarise([bc], f)[0] / 0.5 * np.pi idx = idx + f.bits qlsp.append(qb) bclsp.append(bc) qLsp.append(qlsp) bcLsp.append(bclsp) return qLsp, bcLsp def debinariseGain(self, bGain, maxGain): gainFrame = CodeFrame(self.cc.gainSegments) gainFrame.scale(maxGain) qGain = self.cu.debinarise(bGain, gainFrame) return qGain def debinarisePitch(self, bPitch): qPitch = [] for i, b in enumerate(bPitch): q = int(b, 2) q = q + (0 if q == 0 else 19) qPitch.append(q) return qPitch def lspToLpc(self, lsp): lsp = np.array(lsp) return self.cu.lsf_to_lpc(lsp) def setupLspFrames(self): frames = [] for s in self.cc.lspSegments: frames.append(CodeFrame(s)) return frames def lspToLpc(self, lsp): lsp = np.array(lsp) return self.cu.lsf_to_lpc(lsp) def Save(self, qlpc, qpitch, qgain): self.decoded = DecodedData() self.decoded.gain = qgain self.decoded.pitch = qpitch self.decoded.lpc = qlpc self.pickle.SaveDecoded(self.decoded) def run(self, save=1): maxGain = self.encoded.maxGain bGain, bPitch, bLsp = self.separateBins() qGain = self.debinariseGain(bGain, maxGain) qPitch = self.debinarisePitch(bPitch) qLsp, bcLsp = self.debinariseLsp(bLsp) qLpc = self.lspToLpc(qLsp) qLpc = self.cu.removeLpcPrefix(qLpc) self.Save(qLpc, qPitch, qGain) for step in range(len(bGain)): if (step in self.pc.stepInto5 or step in self.pc.stepIntoAll): self.p.prnt(2, ' ', 1) self.p.prnt( 2, ' bitcount -> ' + str(len(self.encoded.binaries)), 1) self.p.prnt( 2, ' gain -> ' + str(bGain[step]) + " - " + str(qGain[step]), 1) self.p.prnt( 2, ' pitch -> ' + str(bPitch[step]) + " - " + str(qPitch[step]), 1) self.p.prnt(2, ' lsp -> ' + str(bLsp[step]), 1) self.p.prnt(2, ' ', 1) for i, lspCoef in enumerate(qLsp[step]): self.p.prnt( 2, ' lsp ' + str(i) + ' -> ' + str(bcLsp[step][i]) + " - " + str(lspCoef), 1) self.p.prnt(2, ' ', 1) for i, lpcCoef in enumerate(qLpc[step]): self.p.prnt(2, ' lpc ' + str(i) + ' -> ' + str(lpcCoef), 1) #Decoder().run()
class Coder: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag3, sTag=self.paths.tag4) self.cc = CodeConfig() self.cu = CodingUtils() self.data = self.pickle.LoadData() def binariseGain(self): gainFrame = CodeFrame(self.cc.gainSegments) maxGain = np.max(self.data.gain) gainFrame.scale(maxGain) binary, quanta, indice = self.cu.binarise(self.data.gain, gainFrame) return binary, maxGain def binarisePitch(self): bits = 7 binary = [] dig = [] for i, p in enumerate(self.data.pitch): shifted = (p - self.param.pi + 1 if p != 0 else p).astype(np.uint8) dig.append(shifted[0, 0]) binary.append(np.binary_repr(shifted[0, 0], width=bits)) return binary def lpcToLsp(self, lpc): return self.cu.lpc_to_lsf(lpc) def setupLspFrames(self): frames = [] for s in self.cc.lspSegments: frames.append(CodeFrame(s)) return frames def binariseLsp(self, LSP): frames = self.setupLspFrames() lspBinaries = [] for j, lsp in enumerate(LSP): lspBin = [] for i, coef in enumerate(lsp): c = [coef * 0.5 / np.pi] binary, quanta, indice = self.cu.binarise(c, frames[i]) lspBin.append(binary[0]) lspBinaries.append(lspBin) return lspBinaries def comoposeBinaries(self, gain, pitch, lsp): allBins = [] for i in range(len(gain)): blsp = '' for ls in lsp[i]: blsp = blsp + ls oneBin = gain[i] + pitch[i] + blsp allBins.append(oneBin) return allBins def zipBinaries(self, coded, maxGain): encoded = EncodedData() encoded.maxGain = maxGain binary = '' for c in coded: binary = binary + c encoded.binaries = binary return encoded def run(self, save=1): bGain, maxGain = self.binariseGain() bPitch = self.binarisePitch() LPC1 = self.cu.prefixLpcWith1(self.data.lpc) LSP = self.lpcToLsp(LPC1) bLSP = self.binariseLsp(LSP) coded = self.comoposeBinaries(bGain, bPitch, bLSP) encoded = self.zipBinaries(coded, maxGain) self.pickle.SaveEncoded(encoded) for step in range(len(bGain)): if (step in self.pc.stepInto4 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step) + "------------------ start", 1) self.p.prnt(4, str("In Forth Cycle"), 1) self.p.prnt(2, ' ', 1) self.p.prnt(2, ' gain max -> ' + str(maxGain), 1) self.p.prnt( 2, ' gain -> ' + str(self.data.gain[step, 0]) + " == " + str(bGain[step]), 1) self.p.prnt( 2, ' pitch -> ' + str(self.data.pitch[step, 0]) + " == " + str(bPitch[step]), 1) self.p.prnt(2, ' ', 1) for i, c in enumerate(self.data.lpc[step]): self.p.prnt( 2, ' lpc ' + str(i) + ' -> ' + str(round(c, 4)) + "\t" + bLSP[step][i], 1) self.p.prnt(2, ' ', 1) for i, c in enumerate(LSP[step]): self.p.prnt( 2, ' lsp ' + str(i) + ' -> ' + str(round(LSP[step][i], 4)) + "\t" + bLSP[step][i], 1) self.p.prnt(2, ' ', 1) self.p.prnt(2, ' lsp -> ' + coded[step], 1) self.p.prnt(2, ' ', 1)
class Preprocess: def __init__(self): self.p = Printer(1) self.param = Params() self.m = Math() self.am = AudioManager() self.paths = Paths() self.trainingDesc, self.testingDesc = self.scanForAudioFiles() def scanForAudioFiles(self): trainPaths = self.am.scanDirectory(self.paths.folderTrain) testPaths = self.am.scanDirectory(self.paths.folderTest) return trainPaths, testPaths def readAudioFile(self, desc): path = desc[2] raw = self.am.readAudio(path) return raw def getSignalEnergy(self, raw): energy = [] stp = self.param.step for step, idx in enumerate(range(0, len(raw), stp)): e = np.sum(raw[idx:idx + stp]**2) energy.append(e) return energy # def getSpeech(self, raw, energy): rawAbs = abs(raw) stp = self.param.step whiteNoiseRef = 100 activationScale = [50, 1000, 100] dectivationScale = [100] activated = [0, 0, 0] lastActivated = 0 spans = [] span = [] spanMaxRef = [] maxRef = 0 maxRaw = [] for i, e in enumerate(energy): mx = max(rawAbs[:(i + 1) * stp]) if i == 0 else max(rawAbs[(i) * stp:(i + 1) * stp]) maxRaw.append(mx) for i, e in enumerate(energy): wait = 0 # passed the minimum activation if e >= whiteNoiseRef * activationScale[0] and activated[0] == 0: activated[0] = 1 lastActivated = i # bellow the deactivation value elif e < whiteNoiseRef * dectivationScale[ 0] and i - lastActivated > wait and activated[0] == 1: if activated[0] == 1 and activated[1] == 1: span = [lastActivated * stp, i * stp] spans.append(span) spanMaxRef.append(maxRef) activated = [0, 0, 0] maxRef = maxRaw[i] # passed the second activation if activated[0] == 1 and e >= whiteNoiseRef * activationScale[1]: activated[1] = 1 maxRef = max([maxRef, maxRaw[i]]) # join spans, which are close joinedSpans = [] joinedSpanMaxRef = [] join = [] jmaxRef = 0 maxG = 1000 for i, s in enumerate(spans): if i == 0: join = [s[0], s[1]] jmaxRef = spanMaxRef[i] elif s[0] - join[1] < maxG: join[1] = s[1] jmaxRef = max([jmaxRef, spanMaxRef[i]]) else: joinedSpans.append(join) joinedSpanMaxRef.append(jmaxRef) jmaxRef = spanMaxRef[i] join = s if i == len(spans) - 1 and len(join) == 2: joinedSpans.append(join) joinedSpanMaxRef.append(jmaxRef) # remove short spans minL = 1500 longEnoughSpans = [] longEnoughMaxRef = [] for i, s in enumerate(joinedSpans): if s[1] - s[0] > minL: longEnoughSpans.append(s) longEnoughMaxRef.append(joinedSpanMaxRef[i]) # the most probable span bestSpan = [longEnoughSpans[np.argmax(longEnoughMaxRef)]] speech = [] speechIdx = [] for s in bestSpan: speech.append(raw[s[0]:s[1]]) speechIdx.append(s[0]) return speech, speechIdx def extractSpeech(self, desc, visu=False): raw = self.readAudioFile(desc) energy = self.getSignalEnergy(raw) speech, speechIdx = self.getSpeech(raw, energy) if (visu): title = str(" content: " + desc[1][0]) + " | orator: " + str( desc[1][2]) + " | version: " + str(desc[1][1]) self.p.plotSpeech(raw, speech, speechIdx, title) return speech def getDistanceMap(self, sR, sT): R = len(sR) T = len(sT) D = np.zeros([R, T]) for r in range(R): for t in range(T): tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R))) tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R))) if tMin <= t and t <= tMax: D[r, t] = np.sqrt((sR[r] - sT[t])**2) else: D[r, t] = np.Inf return D def processSpeech(self, raw): stp = self.param.step wndw = self.param.window p = self.param.p tramasAC = [] for step, idx in enumerate(range(0, len(raw), stp)): trama = raw[idx:idx + wndw] if len(trama) < wndw: expTrama = trama for i in range(0, np.ceil((wndw / len(trama)) - 1).astype(int)): expTrama = np.hstack([expTrama, trama]) expTrama = expTrama[0:wndw] trama = expTrama tAC = self.m.autocorrelation(trama) ptAC = tAC[:p] tramasAC = np.vstack([ptAC] if step == 0 else [tramasAC, ptAC]) # if step == 20: # self.p.plot([ (tAC, 'all', 'b*-', 0), (ptAC, 'order p='+str(p), 'y', 0) ], 0, 'Autocorrelation of segment'); # title = str( " trama: " + str(step)) # self.p.plot([ (raw, 'speech', 'r', 0), (trama, 'segment ', 'b', idx) ], 0, title) return tramasAC def getExpandedDistanceMap(self, D): eD = np.zeros(np.array(D.shape) + 1) + np.Inf eD[1:, 1:] = D eD[0, 0] = 0 return eD def getDistanceMapOfAc(self, sR, sT): R = len(sR) T = len(sT) D = np.zeros([R, T]) for r in range(R): for t in range(T): tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R))) tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R))) if not (tMin <= t + 1 and t - 1 <= tMax): D[r, t] = np.Inf else: D[r, t] = (sum((sT[t] - sR[r])**2)**(0.5)) return D def stepOne(self, dist, position, arround): dim = arround.shape if 2 < sum(dim): dirs = np.array([]) if 1 < dim[0] and 1 < dim[1]: dirs = np.array([[1, 0], [0, 1], [1, 1]]) elif 1 < dim[0]: dirs = np.array([[1, 0]]) elif 1 < dim[1]: dirs = np.array([[0, 1]]) minDir = dirs[0] minVal = arround[minDir[0], minDir[1]] for d in dirs: thisVal = arround[d[0], d[1]] if thisVal <= minVal: minDir = d minVal = thisVal dist = dist + minVal position = position + minDir return position, dist, minVal def getDistanceRoute(self, expD): target = expD.shape Route = np.zeros(expD.shape) expDRoute = np.array(expD) baseline = 0.5 pos = np.array([0, 0]) dist = 0 Route[pos[0], pos[1]] = baseline step = 0 while ((target[0] - 1) - pos[0] + (target[0] - 1) - pos[0]) != 0: around = expD[pos[0]:pos[0] + 2, pos[1]:pos[1] + 2] pos, dist, delta = self.stepOne(dist, pos, around) step = step + 1 Route[pos[0], pos[1]] = baseline + delta expDRoute[pos[0], pos[1]] = expDRoute[pos[0], pos[1]] + 3 globalDist = np.inf if 0 < step: globalDist = dist / step return globalDist, Route, expDRoute def getDistance(self, sR, sT): D = self.getDistanceMapOfAc(sR, sT) expD = self.getExpandedDistanceMap(D) globalDist, route, expdRoute = self.getDistanceRoute(expD) return globalDist, expD, route, expdRoute def processAll(self, descs): ACs = [] for d in descs: speech = self.extractSpeech(d, False)[0] speechAC = self.processSpeech(speech) ACs.append(speechAC) return ACs def compareAC(self, speechA_AC, speechB_AC): globalDistance, expD, route, expdRoute = self.getDistance( speechA_AC, speechB_AC) return globalDistance, expD, route, expdRoute def compare(self, descA, descB, visu=False, speechAlreadyProcessed=False): speechA = self.extractSpeech(descA, visu)[0] speechB = self.extractSpeech(descB, visu)[0] speechA_AC = self.processSpeech(speechA) speechB_AC = self.processSpeech(speechB) globalDistance, expD, route, expdRoute = self.compareAC( speechA_AC, speechB_AC) return globalDistance, expD, route, expdRoute def compare1toN(self, one, many, visu=False): dA = one for dK in many: globalDistance, expD, route, expdRoute = self.compare( dA, dK, visu) if (visu): self.p.imShow( expD, "expD of " + str(dA[1]) + " v " + str(dK[1]) + " dist=" + str(round(globalDistance, 3))) self.p.imShow( expdRoute, "expdRoute of " + str(dA[1]) + " v " + str(dK[1]) + " dist=" + str(round(globalDistance, 3))) self.p.imShow( route, "route of " + str(dA[1]) + " v " + str(dK[1]) + " dist=" + str(round(globalDistance, 3))) def compareTestToTrain(self, test, train, visu=False): testACs = self.processAll(test) trainACs = self.processAll(train) rows = len(test) cols = len(train) scoreMap = np.zeros([rows, cols]) matchMap = np.zeros([rows, cols]) matchScoreMap = np.zeros([rows * 3, cols]) - np.inf # print("compareTestToTrain 1 - ", testACs) matchCount = 0 testCount = 0 print("compareTestToTrain - ", len(test[0]), len(train[0])) for i, iTest in enumerate(test): iexp = i * 3 iTestAC = testACs[i] scores = [] for j, jTrain in enumerate(train): jTrainAC = trainACs[j] globalDistance, expD, route, expdRoute = self.compareAC( iTestAC, jTrainAC) scoreMap[i, j] = globalDistance matchScoreMap[iexp, j] = globalDistance scores.append(globalDistance) isSame = iTest[1][0] == jTrain[1][0] matchScoreMap[iexp + 1, j] = 1 if isSame else np.inf # print(" -> ", i, j, " - ", iTest[1][0]," v ", jTrain[1][0], " \t",round(globalDistance, 3) ) lowestScoreIdx = np.argmin(scores) isMatch = iTest[1][0] == train[lowestScoreIdx][1][0] matchCount = matchCount + (1 if isMatch else 0) testCount = testCount + 1 print(" -> ", i, lowestScoreIdx, " - ", iTest[1][0], " v ", train[lowestScoreIdx][1][0], " \t", round(scores[lowestScoreIdx], 3), " \t", isMatch) matchMap[i, lowestScoreIdx] = 1 * (1 if isMatch else -1) matchScoreMap[iexp + 1, lowestScoreIdx] = 1 * (2 if isMatch else -0.5) matchRatio = (matchCount / testCount) if testCount != 0 else 0 self.p.imShow(scoreMap, "scoreMap ") self.p.imShow(matchMap, "matchMap ") self.p.imShow(matchScoreMap, "matchScoreMap ") print(" matchRatio ", matchRatio, " ", matchCount, testCount) def compareAll(self, data): numOfSamples = len(data) confusionMap = np.ones([numOfSamples, numOfSamples]) # matchMap = np.ones([numOfSamples, numOfSamples]) matchMap = np.zeros([numOfSamples, numOfSamples]) xMap = np.zeros([numOfSamples, numOfSamples]) ACs = self.processAll(data) for i, di in enumerate(data): iAC = ACs[i] iValue = di[1][0] for j, dj in enumerate(data): if i > j - 1: jValue = dj[1][0] jAC = ACs[j] globalDistance, expD, route, expdRoute = self.compareAC( iAC, jAC) confusionMap[i, j] = globalDistance confusionMap[j, i] = globalDistance matchMap[i, j] = (jValue == iValue) and (globalDistance < 0.8) xMap[i, j] = (globalDistance < 0.5) print(" -> ", i, j, " - ", di[1][0], " v ", dj[1][0], " \t", round(globalDistance, 3)) self.p.imShow(confusionMap, "confusionMap ") self.p.imShow(matchMap, "matchMap ") self.p.imShow(xMap, "xMap ") def run(self): # self.compare1toN(self.testingDesc[21], [self.trainingDesc[30]], True) # self.compare1toN(self.trainingDesc[1], [self.trainingDesc[0]], True) # self.compareAll(self.trainingDesc[:40]) self.compareTestToTrain(self.testingDesc[:], self.trainingDesc[:])
class LpcProcessing: def __init__(self): self.paths = Paths(); self.param = Params(); self.pc = PrintConfig(); self.p = Printer(1); self.am = AudioManager(); self.m = Math(); self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2); self.data = self.pickle.LoadData(); def calculateLpcCoefs(self, data): toep = scipy.linalg.toeplitz(data[0:-1]) a = np.linalg.inv(toep).dot(-data[1:]) return a def calculateGain(self, s, Rs0, lpcCoef): Rs = self.m.autocorrelation(s)[1:self.param.p] G = np.sqrt((1 + lpcCoef.dot(Rs))*Rs0) return G def voicedPreprocesing(self,data): # High pass filter dLen = len(data) spp = data[1:dLen] - self.param.u * data[0:dLen-1] spp = np.insert(spp, 0, spp[0] ) return spp def run(self, save=1): stp = self.param.step for step, idx in enumerate(range(0,len(self.data.raw),stp)): pitch = self.data.pitch[step] trama = self.data.raw[idx:idx+self.param.pf] h = np.zeros(self.param.pf) tramaHp = trama if pitch: tramaHp = self.voicedPreprocesing(trama) ham = np.hamming(len(tramaHp)) tramaHpHam = tramaHp*ham tramaHpHamAc = self.m.autocorrelation(tramaHpHam) tramaHpHamAcP=tramaHpHamAc[0:self.param.p] lpcCoefs = self.calculateLpcCoefs(tramaHpHamAcP) energy = self.data.power[step]*self.param.step G = self.calculateGain(trama, energy, lpcCoefs) for it, t in enumerate(trama): if it<self.param.p-1: h[it] = 0 elif it==self.param.p: h[it] = G else: for ic, c in enumerate(lpcCoefs): h[it] -= c*h[it-ic-1] hShift = np.append( h[self.param.p-1:], np.zeros(self.param.p-1)) hShiftFft = np.fft.fft(hShift) trFft = np.fft.fft(tramaHp) hShiftAc = self.m.autocorrelation(hShift) self.data.gain.append(G) if step==0: self.data.lpc = lpcCoefs else: self.data.lpc = np.vstack([self.data.lpc, lpcCoefs]) if (step in self.pc.stepInto2 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step)+"------------------ start", 1) self.p.prnt(4, str("In Second Cycle"), 1) self.p.prnt(6, "Current voice pitch: " +str(self.data.pitch[step]), 1) self.p.plot([(self.data.raw, 'speech', 'y', 0),(trama, 'trama', 'r', idx)]) ptrFft = 20*np.log10(trFft[0:int(len(trFft)/2)]) phShiftFft = 20*np.log10(hShiftFft[0:int(len(hShiftFft)/2)]) self.p.plot([(tramaHp, 'trama - high pass', 'b', 0)]) self.p.plot([(tramaHpHam, 'trama - hamming', 'b', 0)]) self.p.plot([(tramaHpHamAc, 'trama - auto correlation', 'b', 0),(tramaHpHamAc[0:self.param.p], 'trama - auto correlation (p='+str(self.param.p)+")", 'r', 0)]) self.p.plot([(h, 'h', 'm', 0)]) self.p.plot([(hShift, 'hShift', 'm', 0)]) self.p.plot([(ptrFft, 'trFft dB', 'b', 0),(phShiftFft, 'phShiftFft dB', 'r', 0)],0) self.p.plot([(hShiftAc[:30], 'hShiftAc 30', 'r', 0),(tramaHpHamAc[:30], 'tramaHpHamAc 30', 'b', 0)],0) self.p.plot([(hShiftAc[:self.param.p], 'hShiftAc p', 'r', 0),(tramaHpHamAc[:self.param.p], 'tramaHpHamAc p', 'b', 0)],0) self.p.prnt(2, str(step)+"------------------ end", 1) if self.pc.stop2: input(" ...") self.data.pitch = np.mat(self.data.pitch).T self.data.gain = np.mat(self.data.gain).T if save: self.pickle.SaveData(self.data)
class Analysis: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.am = AudioManager() self.p = Printer(1) self.S = Synthesizer() self.pickle = Pickle(self.paths.pickle) self.decoded, self.original, self.coded = self.loadAll() self.cP, self.cG, self.cLpc = self.organize() self.cSn = self.SynthAll() def loadAll(self): coded = self.pickle.LoadEncoded(self.paths.tag4) decoded = self.pickle.LoadDecoded(self.paths.tag5) data = self.pickle.LoadData(self.paths.tag3) return decoded, data, coded def SynthAll(self): snO = self.S.synth(self.cLpc.o, self.cP.o, self.cG.o) snD = self.S.synth(self.cLpc.d, self.cP.d, self.cG.d) snE = [] for i, sno in enumerate(snO): snE.append(snO[i] - snD[i]) rw = self.am.readAudio(self.paths.file) return ComaparedData(snO, snD, snE, rw) def organize(self): oPitch = [] dPitch = [] ePitch = [] oGain = [] dGain = [] eGain = [] oLpc = self.original.lpc dLpc = self.decoded.lpc for i in range(len(self.original.lpc)): op = int(self.original.pitch[i][0, 0]) dp = self.decoded.pitch[i] ep = op - dp oPitch.append(op) dPitch.append(dp) ePitch.append(ep) og = round(self.original.gain[i][0, 0], 3) dg = self.decoded.gain[i] eg = og - dg oGain.append(og) dGain.append(dg) eGain.append(eg) cP = ComaparedData(oPitch, dPitch, ePitch) cG = ComaparedData(oGain, dGain, eGain) cLpc = ComaparedData(oLpc, dLpc, None) return cP, cG, cLpc def compareVisu(self): for i in range(len(self.cP.o)): if (i in self.pc.stepInto6 or i in self.pc.stepIntoAll): self.p.prnt(2, str(i) + "------------------ start", 1) self.p.prnt(4, str("In Sixth Cycle"), 1) self.p.prnt(2, ' ', 1) self.p.prnt( 2, ' gain ' + str(i) + " " + " -> " + str(round(self.cG.o[i], 0)) + "\t" + str(int(self.cG.d[i])) + "\t" + str(int(self.cG.e[i])), 1) self.p.prnt( 2, ' pitch ' + str(i) + " " + " -> " + str(round(self.cP.o[i], 0)) + "\t" + str(int(self.cP.d[i])) + "\t" + str(int(self.cP.e[i])), 1) for j in range(len(self.cLpc.o[i])): self.p.prnt( 2, ' lpc ' + str(i) + " " + str(j) + " -> " + str(round(self.cLpc.o[i][j], 3)) + "\t" + str(round(self.cLpc.d[i][j], 3)), 1) start = i * self.param.step end = start + self.param.step tag = str(i) + "th " self.p.plot([ (self.cSn.raw[start:end], ' raw audio', 'k', 0), (self.cSn.o[start:end], tag + ' original synth', 'b', 0), (self.cSn.d[start:end], tag + ' decoded synth', 'g', 0), (self.cSn.e[start:end], tag + ' error synth', 'r', 0) ]) self.p.plot([(self.cSn.raw, ' raw audio', 'k', 0), (self.cSn.o, ' original synth', 'b', 0), (self.cSn.d, ' decoded synth', 'g', 0), (self.cSn.e, ' error synth', 'r', 0)]) self.p.plot([(self.cP.o, ' original gain', 'b', 0), (self.cP.d, 'decoded gain', 'g*', 0), (self.cP.e, 'error', 'r--', 0)]) self.p.plot([(self.cG.o, ' original gain', 'b', 0), (self.cG.d, 'decoded gain', 'g*', 0), (self.cG.e, 'error', 'r--', 0)]) originalFileSize = self.pickle.getFileSize(self.paths.file) * 8 codedFileSize = len(self.coded.binaries) self.p.prnt( 2, ' --------------------------------------------------- ', 1) self.p.prnt( 2, ' original file size -> ' + str(originalFileSize) + ' bits', 1) self.p.prnt( 2, ' coded file size -> ' + str(codedFileSize) + ' bits', 1) self.p.prnt( 2, ' compression -> ' + str(round((codedFileSize / originalFileSize) * 100, 3)) + ' %', 1) def compareAudio(self): self.p.prnt(4, "", 1) self.p.prnt(4, str("Listen"), 1) self.p.prnt(4, " 1. original file", 1) self.p.prnt(4, " 2. sytesized before coding", 1) self.p.prnt(4, " 3. sytesized after decoding", 1) self.am.playOriginalAudio(self.paths.file) self.am.playSyntesizedAudio(self.cSn.o) self.am.playSyntesizedAudio(self.cSn.d)
class PreProcessing: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1) self.data.raw = self.am.readAudio(self.paths.file) def filterPitch(self, p): for i, v in enumerate(p): if i == 0 or i == len(p) - 1: continue if p[i - 1] == 0 and v != 0 and p[i + 1] == 0: p[i] = 0 elif p[i - 1] != 0 and v == 0 and p[i + 1] != 0: p[i] = np.mean([p[i - 1], p[i + 1]]) return p def run(self, save=1): stp = self.param.step for step, idx in enumerate(range(0, len(self.data.raw), stp)): trama = self.data.raw[idx:idx + self.param.pf] tramaAC = self.m.autocorrelation(trama) power = np.sum(self.data.raw[idx:idx + stp]**2) / self.param.step pitch = 0 if (max(tramaAC[self.param.pi:self.param.pfN]) > self.param.threshold): pitch = np.argmax( tramaAC[self.param.pi:self.param.pfN]) + self.param.pi self.data.pitch.append(pitch) self.data.power.append(power) if (step in self.pc.stepInto1 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step) + "------------------ start", 1) self.p.prnt(4, str("In First Cycle"), 1) self.p.plot([(self.data.raw, 'speech', 'c', 0), (trama, 'trama', 'r', idx)]) self.p.plot([(trama, 'trama', 'c', 0)]) self.p.plot([(tramaAC, 'tramaAC', 'c', 0)]) self.p.plot([(tramaAC, 'tramaAC', 'c', 0)]) self.p.prnt(2, str(step) + "------------------ end", 1) if self.pc.stop1: input(" ...") self.data.pitch = self.filterPitch(self.data.pitch) if save: self.pickle.SaveData(self.data)
class Synthesizer: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag2, sTag=self.paths.tag3) self.data = self.pickle.LoadData() def gNoise(self, length, val=1): return np.array([np.random.normal() for i in range(int(length))]) * val def linearPredictor(self, lpcCoefs, Sni, SniPrev): lpcLen = len(lpcCoefs) SniPrev = SniPrev[len(SniPrev) - lpcLen:] SniPrefixed = np.append(SniPrev, Sni) for i, n in enumerate(range(lpcLen, len(SniPrefixed))): SniLastN = SniPrefixed[i:n] pred = -np.dot(lpcCoefs, SniLastN[::-1]) SniPrefixed[n] += pred Sni = SniPrefixed[lpcLen:] return Sni def glutealPulse(self, pitch, gain=1): N0 = np.int(pitch) Nop = np.int(N0 * 2 / 3) pulse = np.zeros(N0) for n in range(Nop): pulse[n] = ((2 * Nop - 1) * n - 3 * n**2) / (Nop**2 - 3 * Nop + 2) return pulse * gain def synth(self, LPC, Pitch, Gain): stp = self.param.step Sn = np.array([]) SniPrev = np.zeros(stp) stepInset = 0 for step, pitch in enumerate(Pitch[:]): lpcCoefs = LPC[step] gain = float(Gain[step]) stepLeftover = stp - stepInset Sni = np.array([]) if pitch == 0: G = float(np.sqrt(1 / stp) * gain) Sni = np.append(Sni, self.gNoise(stepLeftover, G)) stepInset = 0 else: G = float(np.sqrt(pitch / stp) * gain) innerJumps = int(1 if pitch == 0 else np.ceil(stepLeftover / pitch)) spannedStep = 0 Snisub = np.array([]) for ij in range(innerJumps): Snisubi = self.glutealPulse(pitch, G) Snisub = np.append(Snisub, Snisubi) spannedStep += pitch Sni = np.append(Sni, Snisub) stepInset = spannedStep - stepLeftover Sni = self.linearPredictor(lpcCoefs, Sni, SniPrev) SniPrev = Sni Sn = np.append(Sn, np.array(Sni)) return Sn def run(self, save=1): stp = self.param.step Sn = np.array([]) SniPrev = np.zeros(self.param.step) stepInset = 0 for step, pitch in enumerate(self.data.pitch[:]): lpcCoefs = self.data.lpc[step] gain = float(self.data.gain[step]) stepLeftover = stp - stepInset Sni = np.array([]) if pitch == 0: G = float(np.sqrt(1 / self.param.step) * gain) Sni = np.append(Sni, self.gNoise(stepLeftover, G)) stepInset = 0 else: G = float(np.sqrt(pitch / self.param.step) * gain) innerJumps = int(1 if pitch == 0 else np.ceil(stepLeftover / pitch)) spannedStep = 0 Snisub = np.array([]) for ij in range(innerJumps): Snisubi = self.glutealPulse(pitch, G) Snisub = np.append(Snisub, Snisubi) spannedStep += pitch Sni = np.append(Sni, Snisub) stepInset = spannedStep - stepLeftover Sni = self.linearPredictor(lpcCoefs, Sni, SniPrev) SniPrev = Sni Sn = np.append(Sn, np.array(Sni)) if (step in self.pc.stepInto3 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step) + "------------------ start", 1) self.p.prnt(4, str("In Third Cycle"), 1) self.p.plot([(Sn, 'Sn', 'b', 0), (Sni, 'Sni', 'r', len(Sn) - len(Sni)), (self.data.raw[:len(Sn)] * 1, 'raw*0.3', 'c', 0)], 0) prev = 3 since = len(Sn) - len(Sni) * prev since = 0 if since < 0 else since self.p.plot( [(self.data.raw[since:len(Sn)] * 2, 'raw*2', 'c', since), (Sn[since:len(Sn) - len(Sni)], 'Sn', 'b', since), (Sni, 'Sni', 'r', len(Sn) - len(Sni))], 0) if self.pc.stop3: input(" ...") self.syntesized = Sn if save: self.pickle.SaveData(self.data) self.pickle.save('syntesized', self.syntesized)