def __init__(self): self.paths = Paths(); self.param = Params(); self.pc = PrintConfig(); self.p = Printer(1); self.am = AudioManager(); self.m = Math(); self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2); self.data = self.pickle.LoadData();
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1) self.data.raw = self.am.readAudio(self.paths.file)
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.am = AudioManager() self.p = Printer(1) self.S = Synthesizer() self.pickle = Pickle(self.paths.pickle) self.decoded, self.original, self.coded = self.loadAll() self.cP, self.cG, self.cLpc = self.organize() self.cSn = self.SynthAll()
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag4, sTag=self.paths.tag5) self.cc = CodeConfig() self.cu = CodingUtils() self.encoded = self.pickle.LoadEncoded()
class PreProcessing: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1) self.data.raw = self.am.readAudio(self.paths.file) def filterPitch(self, p): for i, v in enumerate(p): if i == 0 or i == len(p) - 1: continue if p[i - 1] == 0 and v != 0 and p[i + 1] == 0: p[i] = 0 elif p[i - 1] != 0 and v == 0 and p[i + 1] != 0: p[i] = np.mean([p[i - 1], p[i + 1]]) return p def run(self, save=1): stp = self.param.step for step, idx in enumerate(range(0, len(self.data.raw), stp)): trama = self.data.raw[idx:idx + self.param.pf] tramaAC = self.m.autocorrelation(trama) power = np.sum(self.data.raw[idx:idx + stp]**2) / self.param.step pitch = 0 if (max(tramaAC[self.param.pi:self.param.pfN]) > self.param.threshold): pitch = np.argmax( tramaAC[self.param.pi:self.param.pfN]) + self.param.pi self.data.pitch.append(pitch) self.data.power.append(power) if (step in self.pc.stepInto1 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step) + "------------------ start", 1) self.p.prnt(4, str("In First Cycle"), 1) self.p.plot([(self.data.raw, 'speech', 'c', 0), (trama, 'trama', 'r', idx)]) self.p.plot([(trama, 'trama', 'c', 0)]) self.p.plot([(tramaAC, 'tramaAC', 'c', 0)]) self.p.plot([(tramaAC, 'tramaAC', 'c', 0)]) self.p.prnt(2, str(step) + "------------------ end", 1) if self.pc.stop1: input(" ...") self.data.pitch = self.filterPitch(self.data.pitch) if save: self.pickle.SaveData(self.data)
class Decoder: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag4, sTag=self.paths.tag5) self.cc = CodeConfig() self.cu = CodingUtils() self.encoded = self.pickle.LoadEncoded() def separateBins(self): binary = self.encoded.binaries innerL = [5, 7, 34] gain = [] pitch = [] lsp = [] for i, s in enumerate(range(0, len(binary), sum(innerL))): separated = [] eLast = s for e in innerL: separated.append(binary[eLast:eLast + e]) eLast = eLast + e gain.append(separated[0]) pitch.append(separated[1]) lsp.append(separated[2]) return gain, pitch, lsp def debinariseLsp(self, bLsp): frames = self.setupLspFrames() qLsp = [] bcLsp = [] for i, b in enumerate(bLsp): idx = 0 qlsp = [] bclsp = [] for j, f in enumerate(frames): bc = b[idx:idx + f.bits] qb = self.cu.debinarise([bc], f)[0] / 0.5 * np.pi idx = idx + f.bits qlsp.append(qb) bclsp.append(bc) qLsp.append(qlsp) bcLsp.append(bclsp) return qLsp, bcLsp def debinariseGain(self, bGain, maxGain): gainFrame = CodeFrame(self.cc.gainSegments) gainFrame.scale(maxGain) qGain = self.cu.debinarise(bGain, gainFrame) return qGain def debinarisePitch(self, bPitch): qPitch = [] for i, b in enumerate(bPitch): q = int(b, 2) q = q + (0 if q == 0 else 19) qPitch.append(q) return qPitch def lspToLpc(self, lsp): lsp = np.array(lsp) return self.cu.lsf_to_lpc(lsp) def setupLspFrames(self): frames = [] for s in self.cc.lspSegments: frames.append(CodeFrame(s)) return frames def lspToLpc(self, lsp): lsp = np.array(lsp) return self.cu.lsf_to_lpc(lsp) def Save(self, qlpc, qpitch, qgain): self.decoded = DecodedData() self.decoded.gain = qgain self.decoded.pitch = qpitch self.decoded.lpc = qlpc self.pickle.SaveDecoded(self.decoded) def run(self, save=1): maxGain = self.encoded.maxGain bGain, bPitch, bLsp = self.separateBins() qGain = self.debinariseGain(bGain, maxGain) qPitch = self.debinarisePitch(bPitch) qLsp, bcLsp = self.debinariseLsp(bLsp) qLpc = self.lspToLpc(qLsp) qLpc = self.cu.removeLpcPrefix(qLpc) self.Save(qLpc, qPitch, qGain) for step in range(len(bGain)): if (step in self.pc.stepInto5 or step in self.pc.stepIntoAll): self.p.prnt(2, ' ', 1) self.p.prnt( 2, ' bitcount -> ' + str(len(self.encoded.binaries)), 1) self.p.prnt( 2, ' gain -> ' + str(bGain[step]) + " - " + str(qGain[step]), 1) self.p.prnt( 2, ' pitch -> ' + str(bPitch[step]) + " - " + str(qPitch[step]), 1) self.p.prnt(2, ' lsp -> ' + str(bLsp[step]), 1) self.p.prnt(2, ' ', 1) for i, lspCoef in enumerate(qLsp[step]): self.p.prnt( 2, ' lsp ' + str(i) + ' -> ' + str(bcLsp[step][i]) + " - " + str(lspCoef), 1) self.p.prnt(2, ' ', 1) for i, lpcCoef in enumerate(qLpc[step]): self.p.prnt(2, ' lpc ' + str(i) + ' -> ' + str(lpcCoef), 1) #Decoder().run()
class Coder: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag3, sTag=self.paths.tag4) self.cc = CodeConfig() self.cu = CodingUtils() self.data = self.pickle.LoadData() def binariseGain(self): gainFrame = CodeFrame(self.cc.gainSegments) maxGain = np.max(self.data.gain) gainFrame.scale(maxGain) binary, quanta, indice = self.cu.binarise(self.data.gain, gainFrame) return binary, maxGain def binarisePitch(self): bits = 7 binary = [] dig = [] for i, p in enumerate(self.data.pitch): shifted = (p - self.param.pi + 1 if p != 0 else p).astype(np.uint8) dig.append(shifted[0, 0]) binary.append(np.binary_repr(shifted[0, 0], width=bits)) return binary def lpcToLsp(self, lpc): return self.cu.lpc_to_lsf(lpc) def setupLspFrames(self): frames = [] for s in self.cc.lspSegments: frames.append(CodeFrame(s)) return frames def binariseLsp(self, LSP): frames = self.setupLspFrames() lspBinaries = [] for j, lsp in enumerate(LSP): lspBin = [] for i, coef in enumerate(lsp): c = [coef * 0.5 / np.pi] binary, quanta, indice = self.cu.binarise(c, frames[i]) lspBin.append(binary[0]) lspBinaries.append(lspBin) return lspBinaries def comoposeBinaries(self, gain, pitch, lsp): allBins = [] for i in range(len(gain)): blsp = '' for ls in lsp[i]: blsp = blsp + ls oneBin = gain[i] + pitch[i] + blsp allBins.append(oneBin) return allBins def zipBinaries(self, coded, maxGain): encoded = EncodedData() encoded.maxGain = maxGain binary = '' for c in coded: binary = binary + c encoded.binaries = binary return encoded def run(self, save=1): bGain, maxGain = self.binariseGain() bPitch = self.binarisePitch() LPC1 = self.cu.prefixLpcWith1(self.data.lpc) LSP = self.lpcToLsp(LPC1) bLSP = self.binariseLsp(LSP) coded = self.comoposeBinaries(bGain, bPitch, bLSP) encoded = self.zipBinaries(coded, maxGain) self.pickle.SaveEncoded(encoded) for step in range(len(bGain)): if (step in self.pc.stepInto4 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step) + "------------------ start", 1) self.p.prnt(4, str("In Forth Cycle"), 1) self.p.prnt(2, ' ', 1) self.p.prnt(2, ' gain max -> ' + str(maxGain), 1) self.p.prnt( 2, ' gain -> ' + str(self.data.gain[step, 0]) + " == " + str(bGain[step]), 1) self.p.prnt( 2, ' pitch -> ' + str(self.data.pitch[step, 0]) + " == " + str(bPitch[step]), 1) self.p.prnt(2, ' ', 1) for i, c in enumerate(self.data.lpc[step]): self.p.prnt( 2, ' lpc ' + str(i) + ' -> ' + str(round(c, 4)) + "\t" + bLSP[step][i], 1) self.p.prnt(2, ' ', 1) for i, c in enumerate(LSP[step]): self.p.prnt( 2, ' lsp ' + str(i) + ' -> ' + str(round(LSP[step][i], 4)) + "\t" + bLSP[step][i], 1) self.p.prnt(2, ' ', 1) self.p.prnt(2, ' lsp -> ' + coded[step], 1) self.p.prnt(2, ' ', 1)
class LpcProcessing: def __init__(self): self.paths = Paths(); self.param = Params(); self.pc = PrintConfig(); self.p = Printer(1); self.am = AudioManager(); self.m = Math(); self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2); self.data = self.pickle.LoadData(); def calculateLpcCoefs(self, data): toep = scipy.linalg.toeplitz(data[0:-1]) a = np.linalg.inv(toep).dot(-data[1:]) return a def calculateGain(self, s, Rs0, lpcCoef): Rs = self.m.autocorrelation(s)[1:self.param.p] G = np.sqrt((1 + lpcCoef.dot(Rs))*Rs0) return G def voicedPreprocesing(self,data): # High pass filter dLen = len(data) spp = data[1:dLen] - self.param.u * data[0:dLen-1] spp = np.insert(spp, 0, spp[0] ) return spp def run(self, save=1): stp = self.param.step for step, idx in enumerate(range(0,len(self.data.raw),stp)): pitch = self.data.pitch[step] trama = self.data.raw[idx:idx+self.param.pf] h = np.zeros(self.param.pf) tramaHp = trama if pitch: tramaHp = self.voicedPreprocesing(trama) ham = np.hamming(len(tramaHp)) tramaHpHam = tramaHp*ham tramaHpHamAc = self.m.autocorrelation(tramaHpHam) tramaHpHamAcP=tramaHpHamAc[0:self.param.p] lpcCoefs = self.calculateLpcCoefs(tramaHpHamAcP) energy = self.data.power[step]*self.param.step G = self.calculateGain(trama, energy, lpcCoefs) for it, t in enumerate(trama): if it<self.param.p-1: h[it] = 0 elif it==self.param.p: h[it] = G else: for ic, c in enumerate(lpcCoefs): h[it] -= c*h[it-ic-1] hShift = np.append( h[self.param.p-1:], np.zeros(self.param.p-1)) hShiftFft = np.fft.fft(hShift) trFft = np.fft.fft(tramaHp) hShiftAc = self.m.autocorrelation(hShift) self.data.gain.append(G) if step==0: self.data.lpc = lpcCoefs else: self.data.lpc = np.vstack([self.data.lpc, lpcCoefs]) if (step in self.pc.stepInto2 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step)+"------------------ start", 1) self.p.prnt(4, str("In Second Cycle"), 1) self.p.prnt(6, "Current voice pitch: " +str(self.data.pitch[step]), 1) self.p.plot([(self.data.raw, 'speech', 'y', 0),(trama, 'trama', 'r', idx)]) ptrFft = 20*np.log10(trFft[0:int(len(trFft)/2)]) phShiftFft = 20*np.log10(hShiftFft[0:int(len(hShiftFft)/2)]) self.p.plot([(tramaHp, 'trama - high pass', 'b', 0)]) self.p.plot([(tramaHpHam, 'trama - hamming', 'b', 0)]) self.p.plot([(tramaHpHamAc, 'trama - auto correlation', 'b', 0),(tramaHpHamAc[0:self.param.p], 'trama - auto correlation (p='+str(self.param.p)+")", 'r', 0)]) self.p.plot([(h, 'h', 'm', 0)]) self.p.plot([(hShift, 'hShift', 'm', 0)]) self.p.plot([(ptrFft, 'trFft dB', 'b', 0),(phShiftFft, 'phShiftFft dB', 'r', 0)],0) self.p.plot([(hShiftAc[:30], 'hShiftAc 30', 'r', 0),(tramaHpHamAc[:30], 'tramaHpHamAc 30', 'b', 0)],0) self.p.plot([(hShiftAc[:self.param.p], 'hShiftAc p', 'r', 0),(tramaHpHamAc[:self.param.p], 'tramaHpHamAc p', 'b', 0)],0) self.p.prnt(2, str(step)+"------------------ end", 1) if self.pc.stop2: input(" ...") self.data.pitch = np.mat(self.data.pitch).T self.data.gain = np.mat(self.data.gain).T if save: self.pickle.SaveData(self.data)
class Analysis: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.am = AudioManager() self.p = Printer(1) self.S = Synthesizer() self.pickle = Pickle(self.paths.pickle) self.decoded, self.original, self.coded = self.loadAll() self.cP, self.cG, self.cLpc = self.organize() self.cSn = self.SynthAll() def loadAll(self): coded = self.pickle.LoadEncoded(self.paths.tag4) decoded = self.pickle.LoadDecoded(self.paths.tag5) data = self.pickle.LoadData(self.paths.tag3) return decoded, data, coded def SynthAll(self): snO = self.S.synth(self.cLpc.o, self.cP.o, self.cG.o) snD = self.S.synth(self.cLpc.d, self.cP.d, self.cG.d) snE = [] for i, sno in enumerate(snO): snE.append(snO[i] - snD[i]) rw = self.am.readAudio(self.paths.file) return ComaparedData(snO, snD, snE, rw) def organize(self): oPitch = [] dPitch = [] ePitch = [] oGain = [] dGain = [] eGain = [] oLpc = self.original.lpc dLpc = self.decoded.lpc for i in range(len(self.original.lpc)): op = int(self.original.pitch[i][0, 0]) dp = self.decoded.pitch[i] ep = op - dp oPitch.append(op) dPitch.append(dp) ePitch.append(ep) og = round(self.original.gain[i][0, 0], 3) dg = self.decoded.gain[i] eg = og - dg oGain.append(og) dGain.append(dg) eGain.append(eg) cP = ComaparedData(oPitch, dPitch, ePitch) cG = ComaparedData(oGain, dGain, eGain) cLpc = ComaparedData(oLpc, dLpc, None) return cP, cG, cLpc def compareVisu(self): for i in range(len(self.cP.o)): if (i in self.pc.stepInto6 or i in self.pc.stepIntoAll): self.p.prnt(2, str(i) + "------------------ start", 1) self.p.prnt(4, str("In Sixth Cycle"), 1) self.p.prnt(2, ' ', 1) self.p.prnt( 2, ' gain ' + str(i) + " " + " -> " + str(round(self.cG.o[i], 0)) + "\t" + str(int(self.cG.d[i])) + "\t" + str(int(self.cG.e[i])), 1) self.p.prnt( 2, ' pitch ' + str(i) + " " + " -> " + str(round(self.cP.o[i], 0)) + "\t" + str(int(self.cP.d[i])) + "\t" + str(int(self.cP.e[i])), 1) for j in range(len(self.cLpc.o[i])): self.p.prnt( 2, ' lpc ' + str(i) + " " + str(j) + " -> " + str(round(self.cLpc.o[i][j], 3)) + "\t" + str(round(self.cLpc.d[i][j], 3)), 1) start = i * self.param.step end = start + self.param.step tag = str(i) + "th " self.p.plot([ (self.cSn.raw[start:end], ' raw audio', 'k', 0), (self.cSn.o[start:end], tag + ' original synth', 'b', 0), (self.cSn.d[start:end], tag + ' decoded synth', 'g', 0), (self.cSn.e[start:end], tag + ' error synth', 'r', 0) ]) self.p.plot([(self.cSn.raw, ' raw audio', 'k', 0), (self.cSn.o, ' original synth', 'b', 0), (self.cSn.d, ' decoded synth', 'g', 0), (self.cSn.e, ' error synth', 'r', 0)]) self.p.plot([(self.cP.o, ' original gain', 'b', 0), (self.cP.d, 'decoded gain', 'g*', 0), (self.cP.e, 'error', 'r--', 0)]) self.p.plot([(self.cG.o, ' original gain', 'b', 0), (self.cG.d, 'decoded gain', 'g*', 0), (self.cG.e, 'error', 'r--', 0)]) originalFileSize = self.pickle.getFileSize(self.paths.file) * 8 codedFileSize = len(self.coded.binaries) self.p.prnt( 2, ' --------------------------------------------------- ', 1) self.p.prnt( 2, ' original file size -> ' + str(originalFileSize) + ' bits', 1) self.p.prnt( 2, ' coded file size -> ' + str(codedFileSize) + ' bits', 1) self.p.prnt( 2, ' compression -> ' + str(round((codedFileSize / originalFileSize) * 100, 3)) + ' %', 1) def compareAudio(self): self.p.prnt(4, "", 1) self.p.prnt(4, str("Listen"), 1) self.p.prnt(4, " 1. original file", 1) self.p.prnt(4, " 2. sytesized before coding", 1) self.p.prnt(4, " 3. sytesized after decoding", 1) self.am.playOriginalAudio(self.paths.file) self.am.playSyntesizedAudio(self.cSn.o) self.am.playSyntesizedAudio(self.cSn.d)
class Synthesizer: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag2, sTag=self.paths.tag3) self.data = self.pickle.LoadData() def gNoise(self, length, val=1): return np.array([np.random.normal() for i in range(int(length))]) * val def linearPredictor(self, lpcCoefs, Sni, SniPrev): lpcLen = len(lpcCoefs) SniPrev = SniPrev[len(SniPrev) - lpcLen:] SniPrefixed = np.append(SniPrev, Sni) for i, n in enumerate(range(lpcLen, len(SniPrefixed))): SniLastN = SniPrefixed[i:n] pred = -np.dot(lpcCoefs, SniLastN[::-1]) SniPrefixed[n] += pred Sni = SniPrefixed[lpcLen:] return Sni def glutealPulse(self, pitch, gain=1): N0 = np.int(pitch) Nop = np.int(N0 * 2 / 3) pulse = np.zeros(N0) for n in range(Nop): pulse[n] = ((2 * Nop - 1) * n - 3 * n**2) / (Nop**2 - 3 * Nop + 2) return pulse * gain def synth(self, LPC, Pitch, Gain): stp = self.param.step Sn = np.array([]) SniPrev = np.zeros(stp) stepInset = 0 for step, pitch in enumerate(Pitch[:]): lpcCoefs = LPC[step] gain = float(Gain[step]) stepLeftover = stp - stepInset Sni = np.array([]) if pitch == 0: G = float(np.sqrt(1 / stp) * gain) Sni = np.append(Sni, self.gNoise(stepLeftover, G)) stepInset = 0 else: G = float(np.sqrt(pitch / stp) * gain) innerJumps = int(1 if pitch == 0 else np.ceil(stepLeftover / pitch)) spannedStep = 0 Snisub = np.array([]) for ij in range(innerJumps): Snisubi = self.glutealPulse(pitch, G) Snisub = np.append(Snisub, Snisubi) spannedStep += pitch Sni = np.append(Sni, Snisub) stepInset = spannedStep - stepLeftover Sni = self.linearPredictor(lpcCoefs, Sni, SniPrev) SniPrev = Sni Sn = np.append(Sn, np.array(Sni)) return Sn def run(self, save=1): stp = self.param.step Sn = np.array([]) SniPrev = np.zeros(self.param.step) stepInset = 0 for step, pitch in enumerate(self.data.pitch[:]): lpcCoefs = self.data.lpc[step] gain = float(self.data.gain[step]) stepLeftover = stp - stepInset Sni = np.array([]) if pitch == 0: G = float(np.sqrt(1 / self.param.step) * gain) Sni = np.append(Sni, self.gNoise(stepLeftover, G)) stepInset = 0 else: G = float(np.sqrt(pitch / self.param.step) * gain) innerJumps = int(1 if pitch == 0 else np.ceil(stepLeftover / pitch)) spannedStep = 0 Snisub = np.array([]) for ij in range(innerJumps): Snisubi = self.glutealPulse(pitch, G) Snisub = np.append(Snisub, Snisubi) spannedStep += pitch Sni = np.append(Sni, Snisub) stepInset = spannedStep - stepLeftover Sni = self.linearPredictor(lpcCoefs, Sni, SniPrev) SniPrev = Sni Sn = np.append(Sn, np.array(Sni)) if (step in self.pc.stepInto3 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step) + "------------------ start", 1) self.p.prnt(4, str("In Third Cycle"), 1) self.p.plot([(Sn, 'Sn', 'b', 0), (Sni, 'Sni', 'r', len(Sn) - len(Sni)), (self.data.raw[:len(Sn)] * 1, 'raw*0.3', 'c', 0)], 0) prev = 3 since = len(Sn) - len(Sni) * prev since = 0 if since < 0 else since self.p.plot( [(self.data.raw[since:len(Sn)] * 2, 'raw*2', 'c', since), (Sn[since:len(Sn) - len(Sni)], 'Sn', 'b', since), (Sni, 'Sni', 'r', len(Sn) - len(Sni))], 0) if self.pc.stop3: input(" ...") self.syntesized = Sn if save: self.pickle.SaveData(self.data) self.pickle.save('syntesized', self.syntesized)