def __init__(self): self.p = Printer(1) self.param = Params() self.m = Math() self.am = AudioManager() self.paths = Paths() self.trainingDesc, self.testingDesc = self.scanForAudioFiles()
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1) self.data.raw = self.am.readAudio(self.paths.file)
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.am = AudioManager() self.p = Printer(1) self.S = Synthesizer() self.pickle = Pickle(self.paths.pickle) self.decoded, self.original, self.coded = self.loadAll() self.cP, self.cG, self.cLpc = self.organize() self.cSn = self.SynthAll()
class PreProcessing: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.data = SpeachData() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1) self.data.raw = self.am.readAudio(self.paths.file) def filterPitch(self, p): for i, v in enumerate(p): if i == 0 or i == len(p) - 1: continue if p[i - 1] == 0 and v != 0 and p[i + 1] == 0: p[i] = 0 elif p[i - 1] != 0 and v == 0 and p[i + 1] != 0: p[i] = np.mean([p[i - 1], p[i + 1]]) return p def run(self, save=1): stp = self.param.step for step, idx in enumerate(range(0, len(self.data.raw), stp)): trama = self.data.raw[idx:idx + self.param.pf] tramaAC = self.m.autocorrelation(trama) power = np.sum(self.data.raw[idx:idx + stp]**2) / self.param.step pitch = 0 if (max(tramaAC[self.param.pi:self.param.pfN]) > self.param.threshold): pitch = np.argmax( tramaAC[self.param.pi:self.param.pfN]) + self.param.pi self.data.pitch.append(pitch) self.data.power.append(power) if (step in self.pc.stepInto1 or step in self.pc.stepIntoAll): self.p.prnt(2, str(step) + "------------------ start", 1) self.p.prnt(4, str("In First Cycle"), 1) self.p.plot([(self.data.raw, 'speech', 'c', 0), (trama, 'trama', 'r', idx)]) self.p.plot([(trama, 'trama', 'c', 0)]) self.p.plot([(tramaAC, 'tramaAC', 'c', 0)]) self.p.plot([(tramaAC, 'tramaAC', 'c', 0)]) self.p.prnt(2, str(step) + "------------------ end", 1) if self.pc.stop1: input(" ...") self.data.pitch = self.filterPitch(self.data.pitch) if save: self.pickle.SaveData(self.data)
def __init__(self): self.paths = Paths(); self.param = Params(); self.pc = PrintConfig(); self.p = Printer(1); self.am = AudioManager(); self.m = Math(); self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2); self.data = self.pickle.LoadData();
def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.p = Printer(1) self.am = AudioManager() self.m = Math() self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag4, sTag=self.paths.tag5) self.cc = CodeConfig() self.cu = CodingUtils() self.encoded = self.pickle.LoadEncoded()
class Preprocess: def __init__(self): self.p = Printer(1) self.param = Params() self.m = Math() self.am = AudioManager() self.paths = Paths() self.trainingDesc, self.testingDesc = self.scanForAudioFiles() def scanForAudioFiles(self): trainPaths = self.am.scanDirectory(self.paths.folderTrain) testPaths = self.am.scanDirectory(self.paths.folderTest) return trainPaths, testPaths def readAudioFile(self, desc): path = desc[2] raw = self.am.readAudio(path) return raw def getSignalEnergy(self, raw): energy = [] stp = self.param.step for step, idx in enumerate(range(0, len(raw), stp)): e = np.sum(raw[idx:idx + stp]**2) energy.append(e) return energy # def getSpeech(self, raw, energy): rawAbs = abs(raw) stp = self.param.step whiteNoiseRef = 100 activationScale = [50, 1000, 100] dectivationScale = [100] activated = [0, 0, 0] lastActivated = 0 spans = [] span = [] spanMaxRef = [] maxRef = 0 maxRaw = [] for i, e in enumerate(energy): mx = max(rawAbs[:(i + 1) * stp]) if i == 0 else max(rawAbs[(i) * stp:(i + 1) * stp]) maxRaw.append(mx) for i, e in enumerate(energy): wait = 0 # passed the minimum activation if e >= whiteNoiseRef * activationScale[0] and activated[0] == 0: activated[0] = 1 lastActivated = i # bellow the deactivation value elif e < whiteNoiseRef * dectivationScale[ 0] and i - lastActivated > wait and activated[0] == 1: if activated[0] == 1 and activated[1] == 1: span = [lastActivated * stp, i * stp] spans.append(span) spanMaxRef.append(maxRef) activated = [0, 0, 0] maxRef = maxRaw[i] # passed the second activation if activated[0] == 1 and e >= whiteNoiseRef * activationScale[1]: activated[1] = 1 maxRef = max([maxRef, maxRaw[i]]) # join spans, which are close joinedSpans = [] joinedSpanMaxRef = [] join = [] jmaxRef = 0 maxG = 1000 for i, s in enumerate(spans): if i == 0: join = [s[0], s[1]] jmaxRef = spanMaxRef[i] elif s[0] - join[1] < maxG: join[1] = s[1] jmaxRef = max([jmaxRef, spanMaxRef[i]]) else: joinedSpans.append(join) joinedSpanMaxRef.append(jmaxRef) jmaxRef = spanMaxRef[i] join = s if i == len(spans) - 1 and len(join) == 2: joinedSpans.append(join) joinedSpanMaxRef.append(jmaxRef) # remove short spans minL = 1500 longEnoughSpans = [] longEnoughMaxRef = [] for i, s in enumerate(joinedSpans): if s[1] - s[0] > minL: longEnoughSpans.append(s) longEnoughMaxRef.append(joinedSpanMaxRef[i]) # the most probable span bestSpan = [longEnoughSpans[np.argmax(longEnoughMaxRef)]] speech = [] speechIdx = [] for s in bestSpan: speech.append(raw[s[0]:s[1]]) speechIdx.append(s[0]) return speech, speechIdx def extractSpeech(self, desc, visu=False): raw = self.readAudioFile(desc) energy = self.getSignalEnergy(raw) speech, speechIdx = self.getSpeech(raw, energy) if (visu): title = str(" content: " + desc[1][0]) + " | orator: " + str( desc[1][2]) + " | version: " + str(desc[1][1]) self.p.plotSpeech(raw, speech, speechIdx, title) return speech def getDistanceMap(self, sR, sT): R = len(sR) T = len(sT) D = np.zeros([R, T]) for r in range(R): for t in range(T): tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R))) tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R))) if tMin <= t and t <= tMax: D[r, t] = np.sqrt((sR[r] - sT[t])**2) else: D[r, t] = np.Inf return D def processSpeech(self, raw): stp = self.param.step wndw = self.param.window p = self.param.p tramasAC = [] for step, idx in enumerate(range(0, len(raw), stp)): trama = raw[idx:idx + wndw] if len(trama) < wndw: expTrama = trama for i in range(0, np.ceil((wndw / len(trama)) - 1).astype(int)): expTrama = np.hstack([expTrama, trama]) expTrama = expTrama[0:wndw] trama = expTrama tAC = self.m.autocorrelation(trama) ptAC = tAC[:p] tramasAC = np.vstack([ptAC] if step == 0 else [tramasAC, ptAC]) # if step == 20: # self.p.plot([ (tAC, 'all', 'b*-', 0), (ptAC, 'order p='+str(p), 'y', 0) ], 0, 'Autocorrelation of segment'); # title = str( " trama: " + str(step)) # self.p.plot([ (raw, 'speech', 'r', 0), (trama, 'segment ', 'b', idx) ], 0, title) return tramasAC def getExpandedDistanceMap(self, D): eD = np.zeros(np.array(D.shape) + 1) + np.Inf eD[1:, 1:] = D eD[0, 0] = 0 return eD def getDistanceMapOfAc(self, sR, sT): R = len(sR) T = len(sT) D = np.zeros([R, T]) for r in range(R): for t in range(T): tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R))) tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R))) if not (tMin <= t + 1 and t - 1 <= tMax): D[r, t] = np.Inf else: D[r, t] = (sum((sT[t] - sR[r])**2)**(0.5)) return D def stepOne(self, dist, position, arround): dim = arround.shape if 2 < sum(dim): dirs = np.array([]) if 1 < dim[0] and 1 < dim[1]: dirs = np.array([[1, 0], [0, 1], [1, 1]]) elif 1 < dim[0]: dirs = np.array([[1, 0]]) elif 1 < dim[1]: dirs = np.array([[0, 1]]) minDir = dirs[0] minVal = arround[minDir[0], minDir[1]] for d in dirs: thisVal = arround[d[0], d[1]] if thisVal <= minVal: minDir = d minVal = thisVal dist = dist + minVal position = position + minDir return position, dist, minVal def getDistanceRoute(self, expD): target = expD.shape Route = np.zeros(expD.shape) expDRoute = np.array(expD) baseline = 0.5 pos = np.array([0, 0]) dist = 0 Route[pos[0], pos[1]] = baseline step = 0 while ((target[0] - 1) - pos[0] + (target[0] - 1) - pos[0]) != 0: around = expD[pos[0]:pos[0] + 2, pos[1]:pos[1] + 2] pos, dist, delta = self.stepOne(dist, pos, around) step = step + 1 Route[pos[0], pos[1]] = baseline + delta expDRoute[pos[0], pos[1]] = expDRoute[pos[0], pos[1]] + 3 globalDist = np.inf if 0 < step: globalDist = dist / step return globalDist, Route, expDRoute def getDistance(self, sR, sT): D = self.getDistanceMapOfAc(sR, sT) expD = self.getExpandedDistanceMap(D) globalDist, route, expdRoute = self.getDistanceRoute(expD) return globalDist, expD, route, expdRoute def processAll(self, descs): ACs = [] for d in descs: speech = self.extractSpeech(d, False)[0] speechAC = self.processSpeech(speech) ACs.append(speechAC) return ACs def compareAC(self, speechA_AC, speechB_AC): globalDistance, expD, route, expdRoute = self.getDistance( speechA_AC, speechB_AC) return globalDistance, expD, route, expdRoute def compare(self, descA, descB, visu=False, speechAlreadyProcessed=False): speechA = self.extractSpeech(descA, visu)[0] speechB = self.extractSpeech(descB, visu)[0] speechA_AC = self.processSpeech(speechA) speechB_AC = self.processSpeech(speechB) globalDistance, expD, route, expdRoute = self.compareAC( speechA_AC, speechB_AC) return globalDistance, expD, route, expdRoute def compare1toN(self, one, many, visu=False): dA = one for dK in many: globalDistance, expD, route, expdRoute = self.compare( dA, dK, visu) if (visu): self.p.imShow( expD, "expD of " + str(dA[1]) + " v " + str(dK[1]) + " dist=" + str(round(globalDistance, 3))) self.p.imShow( expdRoute, "expdRoute of " + str(dA[1]) + " v " + str(dK[1]) + " dist=" + str(round(globalDistance, 3))) self.p.imShow( route, "route of " + str(dA[1]) + " v " + str(dK[1]) + " dist=" + str(round(globalDistance, 3))) def compareTestToTrain(self, test, train, visu=False): testACs = self.processAll(test) trainACs = self.processAll(train) rows = len(test) cols = len(train) scoreMap = np.zeros([rows, cols]) matchMap = np.zeros([rows, cols]) matchScoreMap = np.zeros([rows * 3, cols]) - np.inf # print("compareTestToTrain 1 - ", testACs) matchCount = 0 testCount = 0 print("compareTestToTrain - ", len(test[0]), len(train[0])) for i, iTest in enumerate(test): iexp = i * 3 iTestAC = testACs[i] scores = [] for j, jTrain in enumerate(train): jTrainAC = trainACs[j] globalDistance, expD, route, expdRoute = self.compareAC( iTestAC, jTrainAC) scoreMap[i, j] = globalDistance matchScoreMap[iexp, j] = globalDistance scores.append(globalDistance) isSame = iTest[1][0] == jTrain[1][0] matchScoreMap[iexp + 1, j] = 1 if isSame else np.inf # print(" -> ", i, j, " - ", iTest[1][0]," v ", jTrain[1][0], " \t",round(globalDistance, 3) ) lowestScoreIdx = np.argmin(scores) isMatch = iTest[1][0] == train[lowestScoreIdx][1][0] matchCount = matchCount + (1 if isMatch else 0) testCount = testCount + 1 print(" -> ", i, lowestScoreIdx, " - ", iTest[1][0], " v ", train[lowestScoreIdx][1][0], " \t", round(scores[lowestScoreIdx], 3), " \t", isMatch) matchMap[i, lowestScoreIdx] = 1 * (1 if isMatch else -1) matchScoreMap[iexp + 1, lowestScoreIdx] = 1 * (2 if isMatch else -0.5) matchRatio = (matchCount / testCount) if testCount != 0 else 0 self.p.imShow(scoreMap, "scoreMap ") self.p.imShow(matchMap, "matchMap ") self.p.imShow(matchScoreMap, "matchScoreMap ") print(" matchRatio ", matchRatio, " ", matchCount, testCount) def compareAll(self, data): numOfSamples = len(data) confusionMap = np.ones([numOfSamples, numOfSamples]) # matchMap = np.ones([numOfSamples, numOfSamples]) matchMap = np.zeros([numOfSamples, numOfSamples]) xMap = np.zeros([numOfSamples, numOfSamples]) ACs = self.processAll(data) for i, di in enumerate(data): iAC = ACs[i] iValue = di[1][0] for j, dj in enumerate(data): if i > j - 1: jValue = dj[1][0] jAC = ACs[j] globalDistance, expD, route, expdRoute = self.compareAC( iAC, jAC) confusionMap[i, j] = globalDistance confusionMap[j, i] = globalDistance matchMap[i, j] = (jValue == iValue) and (globalDistance < 0.8) xMap[i, j] = (globalDistance < 0.5) print(" -> ", i, j, " - ", di[1][0], " v ", dj[1][0], " \t", round(globalDistance, 3)) self.p.imShow(confusionMap, "confusionMap ") self.p.imShow(matchMap, "matchMap ") self.p.imShow(xMap, "xMap ") def run(self): # self.compare1toN(self.testingDesc[21], [self.trainingDesc[30]], True) # self.compare1toN(self.trainingDesc[1], [self.trainingDesc[0]], True) # self.compareAll(self.trainingDesc[:40]) self.compareTestToTrain(self.testingDesc[:], self.trainingDesc[:])
class Analysis: def __init__(self): self.paths = Paths() self.param = Params() self.pc = PrintConfig() self.am = AudioManager() self.p = Printer(1) self.S = Synthesizer() self.pickle = Pickle(self.paths.pickle) self.decoded, self.original, self.coded = self.loadAll() self.cP, self.cG, self.cLpc = self.organize() self.cSn = self.SynthAll() def loadAll(self): coded = self.pickle.LoadEncoded(self.paths.tag4) decoded = self.pickle.LoadDecoded(self.paths.tag5) data = self.pickle.LoadData(self.paths.tag3) return decoded, data, coded def SynthAll(self): snO = self.S.synth(self.cLpc.o, self.cP.o, self.cG.o) snD = self.S.synth(self.cLpc.d, self.cP.d, self.cG.d) snE = [] for i, sno in enumerate(snO): snE.append(snO[i] - snD[i]) rw = self.am.readAudio(self.paths.file) return ComaparedData(snO, snD, snE, rw) def organize(self): oPitch = [] dPitch = [] ePitch = [] oGain = [] dGain = [] eGain = [] oLpc = self.original.lpc dLpc = self.decoded.lpc for i in range(len(self.original.lpc)): op = int(self.original.pitch[i][0, 0]) dp = self.decoded.pitch[i] ep = op - dp oPitch.append(op) dPitch.append(dp) ePitch.append(ep) og = round(self.original.gain[i][0, 0], 3) dg = self.decoded.gain[i] eg = og - dg oGain.append(og) dGain.append(dg) eGain.append(eg) cP = ComaparedData(oPitch, dPitch, ePitch) cG = ComaparedData(oGain, dGain, eGain) cLpc = ComaparedData(oLpc, dLpc, None) return cP, cG, cLpc def compareVisu(self): for i in range(len(self.cP.o)): if (i in self.pc.stepInto6 or i in self.pc.stepIntoAll): self.p.prnt(2, str(i) + "------------------ start", 1) self.p.prnt(4, str("In Sixth Cycle"), 1) self.p.prnt(2, ' ', 1) self.p.prnt( 2, ' gain ' + str(i) + " " + " -> " + str(round(self.cG.o[i], 0)) + "\t" + str(int(self.cG.d[i])) + "\t" + str(int(self.cG.e[i])), 1) self.p.prnt( 2, ' pitch ' + str(i) + " " + " -> " + str(round(self.cP.o[i], 0)) + "\t" + str(int(self.cP.d[i])) + "\t" + str(int(self.cP.e[i])), 1) for j in range(len(self.cLpc.o[i])): self.p.prnt( 2, ' lpc ' + str(i) + " " + str(j) + " -> " + str(round(self.cLpc.o[i][j], 3)) + "\t" + str(round(self.cLpc.d[i][j], 3)), 1) start = i * self.param.step end = start + self.param.step tag = str(i) + "th " self.p.plot([ (self.cSn.raw[start:end], ' raw audio', 'k', 0), (self.cSn.o[start:end], tag + ' original synth', 'b', 0), (self.cSn.d[start:end], tag + ' decoded synth', 'g', 0), (self.cSn.e[start:end], tag + ' error synth', 'r', 0) ]) self.p.plot([(self.cSn.raw, ' raw audio', 'k', 0), (self.cSn.o, ' original synth', 'b', 0), (self.cSn.d, ' decoded synth', 'g', 0), (self.cSn.e, ' error synth', 'r', 0)]) self.p.plot([(self.cP.o, ' original gain', 'b', 0), (self.cP.d, 'decoded gain', 'g*', 0), (self.cP.e, 'error', 'r--', 0)]) self.p.plot([(self.cG.o, ' original gain', 'b', 0), (self.cG.d, 'decoded gain', 'g*', 0), (self.cG.e, 'error', 'r--', 0)]) originalFileSize = self.pickle.getFileSize(self.paths.file) * 8 codedFileSize = len(self.coded.binaries) self.p.prnt( 2, ' --------------------------------------------------- ', 1) self.p.prnt( 2, ' original file size -> ' + str(originalFileSize) + ' bits', 1) self.p.prnt( 2, ' coded file size -> ' + str(codedFileSize) + ' bits', 1) self.p.prnt( 2, ' compression -> ' + str(round((codedFileSize / originalFileSize) * 100, 3)) + ' %', 1) def compareAudio(self): self.p.prnt(4, "", 1) self.p.prnt(4, str("Listen"), 1) self.p.prnt(4, " 1. original file", 1) self.p.prnt(4, " 2. sytesized before coding", 1) self.p.prnt(4, " 3. sytesized after decoding", 1) self.am.playOriginalAudio(self.paths.file) self.am.playSyntesizedAudio(self.cSn.o) self.am.playSyntesizedAudio(self.cSn.d)