Python AudioManagerの例、core.AudioManager Pythonの例

コード例 #1

0

ファイルを表示

ファイル: part1.py プロジェクト: padamban/Speech-Processing

 def __init__(self):
     self.p = Printer(1)
     self.param = Params()
     self.m = Math()
     self.am = AudioManager()
     self.paths = Paths()
     self.trainingDesc, self.testingDesc = self.scanForAudioFiles()

コード例 #2

0

ファイルを表示

ファイル: part1.py プロジェクト: padamban/Speech-Processing

    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.data = SpeachData()
        self.p = Printer(1)
        self.am = AudioManager()
        self.m = Math()
        self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1)

        self.data.raw = self.am.readAudio(self.paths.file)

コード例 #3

0

ファイルを表示

ファイル: part6.py プロジェクト: padamban/Speech-Processing

    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.am = AudioManager()

        self.p = Printer(1)
        self.S = Synthesizer()
        self.pickle = Pickle(self.paths.pickle)
        self.decoded, self.original, self.coded = self.loadAll()
        self.cP, self.cG, self.cLpc = self.organize()
        self.cSn = self.SynthAll()

コード例 #4

0

ファイルを表示

ファイル: part1.py プロジェクト: padamban/Speech-Processing

class PreProcessing:
    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.data = SpeachData()
        self.p = Printer(1)
        self.am = AudioManager()
        self.m = Math()
        self.pickle = Pickle(self.paths.pickle, sTag=self.paths.tag1)

        self.data.raw = self.am.readAudio(self.paths.file)

    def filterPitch(self, p):
        for i, v in enumerate(p):
            if i == 0 or i == len(p) - 1:
                continue
            if p[i - 1] == 0 and v != 0 and p[i + 1] == 0:
                p[i] = 0
            elif p[i - 1] != 0 and v == 0 and p[i + 1] != 0:
                p[i] = np.mean([p[i - 1], p[i + 1]])
        return p

    def run(self, save=1):

        stp = self.param.step
        for step, idx in enumerate(range(0, len(self.data.raw), stp)):

            trama = self.data.raw[idx:idx + self.param.pf]
            tramaAC = self.m.autocorrelation(trama)
            power = np.sum(self.data.raw[idx:idx + stp]**2) / self.param.step
            pitch = 0
            if (max(tramaAC[self.param.pi:self.param.pfN]) >
                    self.param.threshold):
                pitch = np.argmax(
                    tramaAC[self.param.pi:self.param.pfN]) + self.param.pi

            self.data.pitch.append(pitch)
            self.data.power.append(power)

            if (step in self.pc.stepInto1 or step in self.pc.stepIntoAll):
                self.p.prnt(2, str(step) + "------------------ start", 1)
                self.p.prnt(4, str("In First Cycle"), 1)
                self.p.plot([(self.data.raw, 'speech', 'c', 0),
                             (trama, 'trama', 'r', idx)])
                self.p.plot([(trama, 'trama', 'c', 0)])
                self.p.plot([(tramaAC, 'tramaAC', 'c', 0)])
                self.p.plot([(tramaAC, 'tramaAC', 'c', 0)])

                self.p.prnt(2, str(step) + "------------------ end", 1)
                if self.pc.stop1:
                    input("   ...")

        self.data.pitch = self.filterPitch(self.data.pitch)

        if save:
            self.pickle.SaveData(self.data)

コード例 #5

0

ファイルを表示

ファイル: part2.py プロジェクト: padamban/Speech-Processing

 def __init__(self):
     self.paths = Paths();
     self.param = Params();
     self.pc = PrintConfig();
     self.p = Printer(1);
     self.am = AudioManager();
     self.m = Math();
     self.pickle = Pickle(self.paths.pickle, lTag=self.paths.tag1, sTag=self.paths.tag2);
     
     self.data = self.pickle.LoadData();

コード例 #6

0

ファイルを表示

ファイル: part5.py プロジェクト: padamban/Speech-Processing

 def __init__(self):
     self.paths = Paths()
     self.param = Params()
     self.pc = PrintConfig()
     self.p = Printer(1)
     self.am = AudioManager()
     self.m = Math()
     self.pickle = Pickle(self.paths.pickle,
                          lTag=self.paths.tag4,
                          sTag=self.paths.tag5)
     self.cc = CodeConfig()
     self.cu = CodingUtils()
     self.encoded = self.pickle.LoadEncoded()

コード例 #7

0

ファイルを表示

ファイル: part1.py プロジェクト: padamban/Speech-Processing

class Preprocess:
    def __init__(self):
        self.p = Printer(1)
        self.param = Params()
        self.m = Math()
        self.am = AudioManager()
        self.paths = Paths()
        self.trainingDesc, self.testingDesc = self.scanForAudioFiles()

    def scanForAudioFiles(self):
        trainPaths = self.am.scanDirectory(self.paths.folderTrain)
        testPaths = self.am.scanDirectory(self.paths.folderTest)
        return trainPaths, testPaths

    def readAudioFile(self, desc):
        path = desc[2]
        raw = self.am.readAudio(path)
        return raw

    def getSignalEnergy(self, raw):
        energy = []
        stp = self.param.step
        for step, idx in enumerate(range(0, len(raw), stp)):
            e = np.sum(raw[idx:idx + stp]**2)
            energy.append(e)
        return energy

#

    def getSpeech(self, raw, energy):
        rawAbs = abs(raw)
        stp = self.param.step

        whiteNoiseRef = 100
        activationScale = [50, 1000, 100]
        dectivationScale = [100]

        activated = [0, 0, 0]

        lastActivated = 0
        spans = []
        span = []
        spanMaxRef = []
        maxRef = 0

        maxRaw = []
        for i, e in enumerate(energy):
            mx = max(rawAbs[:(i + 1) *
                            stp]) if i == 0 else max(rawAbs[(i) * stp:(i + 1) *
                                                            stp])
            maxRaw.append(mx)

        for i, e in enumerate(energy):
            wait = 0
            # passed the minimum activation
            if e >= whiteNoiseRef * activationScale[0] and activated[0] == 0:
                activated[0] = 1
                lastActivated = i
            # bellow the deactivation value
            elif e < whiteNoiseRef * dectivationScale[
                    0] and i - lastActivated > wait and activated[0] == 1:
                if activated[0] == 1 and activated[1] == 1:
                    span = [lastActivated * stp, i * stp]
                    spans.append(span)
                    spanMaxRef.append(maxRef)
                activated = [0, 0, 0]
                maxRef = maxRaw[i]
            # passed the second activation
            if activated[0] == 1 and e >= whiteNoiseRef * activationScale[1]:
                activated[1] = 1
                maxRef = max([maxRef, maxRaw[i]])

        # join spans, which are close
        joinedSpans = []
        joinedSpanMaxRef = []
        join = []
        jmaxRef = 0
        maxG = 1000
        for i, s in enumerate(spans):

            if i == 0:
                join = [s[0], s[1]]
                jmaxRef = spanMaxRef[i]
            elif s[0] - join[1] < maxG:
                join[1] = s[1]
                jmaxRef = max([jmaxRef, spanMaxRef[i]])
            else:
                joinedSpans.append(join)
                joinedSpanMaxRef.append(jmaxRef)
                jmaxRef = spanMaxRef[i]
                join = s
            if i == len(spans) - 1 and len(join) == 2:
                joinedSpans.append(join)
                joinedSpanMaxRef.append(jmaxRef)

        # remove short spans
        minL = 1500
        longEnoughSpans = []
        longEnoughMaxRef = []
        for i, s in enumerate(joinedSpans):
            if s[1] - s[0] > minL:
                longEnoughSpans.append(s)
                longEnoughMaxRef.append(joinedSpanMaxRef[i])

        # the most probable span
        bestSpan = [longEnoughSpans[np.argmax(longEnoughMaxRef)]]
        speech = []
        speechIdx = []
        for s in bestSpan:
            speech.append(raw[s[0]:s[1]])
            speechIdx.append(s[0])
        return speech, speechIdx

    def extractSpeech(self, desc, visu=False):
        raw = self.readAudioFile(desc)
        energy = self.getSignalEnergy(raw)
        speech, speechIdx = self.getSpeech(raw, energy)
        if (visu):
            title = str(" content: " + desc[1][0]) + " | orator: " + str(
                desc[1][2]) + " |  version: " + str(desc[1][1])
            self.p.plotSpeech(raw, speech, speechIdx, title)
        return speech

    def getDistanceMap(self, sR, sT):
        R = len(sR)
        T = len(sT)
        D = np.zeros([R, T])
        for r in range(R):
            for t in range(T):
                tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R)))
                tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R)))
                if tMin <= t and t <= tMax:
                    D[r, t] = np.sqrt((sR[r] - sT[t])**2)
                else:
                    D[r, t] = np.Inf
        return D

    def processSpeech(self, raw):
        stp = self.param.step
        wndw = self.param.window
        p = self.param.p

        tramasAC = []

        for step, idx in enumerate(range(0, len(raw), stp)):
            trama = raw[idx:idx + wndw]

            if len(trama) < wndw:
                expTrama = trama
                for i in range(0,
                               np.ceil((wndw / len(trama)) - 1).astype(int)):
                    expTrama = np.hstack([expTrama, trama])
                expTrama = expTrama[0:wndw]
                trama = expTrama

            tAC = self.m.autocorrelation(trama)
            ptAC = tAC[:p]
            tramasAC = np.vstack([ptAC] if step == 0 else [tramasAC, ptAC])
#            if step == 20:
#                self.p.plot([ (tAC, 'all', 'b*-',  0), (ptAC, 'order p='+str(p), 'y',  0) ], 0, 'Autocorrelation of segment');
#                title = str( " trama: " + str(step))
#                self.p.plot([ (raw, 'speech', 'r',  0), (trama, 'segment ', 'b',  idx) ], 0, title)

        return tramasAC

    def getExpandedDistanceMap(self, D):
        eD = np.zeros(np.array(D.shape) + 1) + np.Inf
        eD[1:, 1:] = D
        eD[0, 0] = 0
        return eD

    def getDistanceMapOfAc(self, sR, sT):
        R = len(sR)
        T = len(sT)
        D = np.zeros([R, T])
        for r in range(R):
            for t in range(T):
                tMin = (max(r * (T / (R * 2)), (r - R * 0.5) * (2 * T / R)))
                tMax = (min(r * (2 * T / R), (r + R) * (T / 2 / R)))
                if not (tMin <= t + 1 and t - 1 <= tMax):
                    D[r, t] = np.Inf
                else:
                    D[r, t] = (sum((sT[t] - sR[r])**2)**(0.5))
        return D

    def stepOne(self, dist, position, arround):
        dim = arround.shape
        if 2 < sum(dim):
            dirs = np.array([])
            if 1 < dim[0] and 1 < dim[1]:
                dirs = np.array([[1, 0], [0, 1], [1, 1]])
            elif 1 < dim[0]:
                dirs = np.array([[1, 0]])
            elif 1 < dim[1]:
                dirs = np.array([[0, 1]])

            minDir = dirs[0]
            minVal = arround[minDir[0], minDir[1]]

            for d in dirs:
                thisVal = arround[d[0], d[1]]
                if thisVal <= minVal:
                    minDir = d
                    minVal = thisVal

            dist = dist + minVal
            position = position + minDir

        return position, dist, minVal

    def getDistanceRoute(self, expD):
        target = expD.shape
        Route = np.zeros(expD.shape)
        expDRoute = np.array(expD)

        baseline = 0.5
        pos = np.array([0, 0])
        dist = 0
        Route[pos[0], pos[1]] = baseline
        step = 0

        while ((target[0] - 1) - pos[0] + (target[0] - 1) - pos[0]) != 0:
            around = expD[pos[0]:pos[0] + 2, pos[1]:pos[1] + 2]
            pos, dist, delta = self.stepOne(dist, pos, around)
            step = step + 1
            Route[pos[0], pos[1]] = baseline + delta
            expDRoute[pos[0], pos[1]] = expDRoute[pos[0], pos[1]] + 3
        globalDist = np.inf
        if 0 < step:
            globalDist = dist / step

        return globalDist, Route, expDRoute

    def getDistance(self, sR, sT):
        D = self.getDistanceMapOfAc(sR, sT)
        expD = self.getExpandedDistanceMap(D)
        globalDist, route, expdRoute = self.getDistanceRoute(expD)
        return globalDist, expD, route, expdRoute

    def processAll(self, descs):
        ACs = []
        for d in descs:
            speech = self.extractSpeech(d, False)[0]
            speechAC = self.processSpeech(speech)
            ACs.append(speechAC)
        return ACs

    def compareAC(self, speechA_AC, speechB_AC):
        globalDistance, expD, route, expdRoute = self.getDistance(
            speechA_AC, speechB_AC)
        return globalDistance, expD, route, expdRoute

    def compare(self, descA, descB, visu=False, speechAlreadyProcessed=False):
        speechA = self.extractSpeech(descA, visu)[0]
        speechB = self.extractSpeech(descB, visu)[0]
        speechA_AC = self.processSpeech(speechA)
        speechB_AC = self.processSpeech(speechB)
        globalDistance, expD, route, expdRoute = self.compareAC(
            speechA_AC, speechB_AC)
        return globalDistance, expD, route, expdRoute

    def compare1toN(self, one, many, visu=False):
        dA = one
        for dK in many:
            globalDistance, expD, route, expdRoute = self.compare(
                dA, dK, visu)
            if (visu):
                self.p.imShow(
                    expD, "expD of " + str(dA[1]) + " v " + str(dK[1]) +
                    "      dist=" + str(round(globalDistance, 3)))

                self.p.imShow(
                    expdRoute, "expdRoute of " + str(dA[1]) + " v " +
                    str(dK[1]) + "      dist=" + str(round(globalDistance, 3)))

                self.p.imShow(
                    route, "route of " + str(dA[1]) + " v " + str(dK[1]) +
                    "      dist=" + str(round(globalDistance, 3)))

    def compareTestToTrain(self, test, train, visu=False):

        testACs = self.processAll(test)
        trainACs = self.processAll(train)

        rows = len(test)
        cols = len(train)
        scoreMap = np.zeros([rows, cols])
        matchMap = np.zeros([rows, cols])
        matchScoreMap = np.zeros([rows * 3, cols]) - np.inf

        #        print("compareTestToTrain 1 - ", testACs)

        matchCount = 0
        testCount = 0

        print("compareTestToTrain  - ", len(test[0]), len(train[0]))
        for i, iTest in enumerate(test):
            iexp = i * 3

            iTestAC = testACs[i]
            scores = []
            for j, jTrain in enumerate(train):
                jTrainAC = trainACs[j]
                globalDistance, expD, route, expdRoute = self.compareAC(
                    iTestAC, jTrainAC)

                scoreMap[i, j] = globalDistance
                matchScoreMap[iexp, j] = globalDistance

                scores.append(globalDistance)
                isSame = iTest[1][0] == jTrain[1][0]
                matchScoreMap[iexp + 1, j] = 1 if isSame else np.inf


#                print("   -> ", i, j, "   -  ", iTest[1][0]," v ", jTrain[1][0], "  \t",round(globalDistance, 3) )

            lowestScoreIdx = np.argmin(scores)

            isMatch = iTest[1][0] == train[lowestScoreIdx][1][0]

            matchCount = matchCount + (1 if isMatch else 0)
            testCount = testCount + 1

            print("   -> ", i, lowestScoreIdx, "   -  ", iTest[1][0], " v ",
                  train[lowestScoreIdx][1][0], "  \t",
                  round(scores[lowestScoreIdx], 3), "  \t", isMatch)

            matchMap[i, lowestScoreIdx] = 1 * (1 if isMatch else -1)
            matchScoreMap[iexp + 1,
                          lowestScoreIdx] = 1 * (2 if isMatch else -0.5)

        matchRatio = (matchCount / testCount) if testCount != 0 else 0

        self.p.imShow(scoreMap, "scoreMap ")

        self.p.imShow(matchMap, "matchMap ")
        self.p.imShow(matchScoreMap, "matchScoreMap ")
        print(" matchRatio ", matchRatio, "    ", matchCount, testCount)

    def compareAll(self, data):

        numOfSamples = len(data)

        confusionMap = np.ones([numOfSamples, numOfSamples])
        #        matchMap = np.ones([numOfSamples, numOfSamples])
        matchMap = np.zeros([numOfSamples, numOfSamples])

        xMap = np.zeros([numOfSamples, numOfSamples])

        ACs = self.processAll(data)

        for i, di in enumerate(data):
            iAC = ACs[i]
            iValue = di[1][0]

            for j, dj in enumerate(data):
                if i > j - 1:
                    jValue = dj[1][0]
                    jAC = ACs[j]
                    globalDistance, expD, route, expdRoute = self.compareAC(
                        iAC, jAC)
                    confusionMap[i, j] = globalDistance
                    confusionMap[j, i] = globalDistance
                    matchMap[i,
                             j] = (jValue == iValue) and (globalDistance < 0.8)
                    xMap[i, j] = (globalDistance < 0.5)
                    print("   -> ", i, j, "   -  ", di[1][0], " v ", dj[1][0],
                          "  \t", round(globalDistance, 3))

        self.p.imShow(confusionMap, "confusionMap ")
        self.p.imShow(matchMap, "matchMap ")
        self.p.imShow(xMap, "xMap ")

    def run(self):
        #        self.compare1toN(self.testingDesc[21], [self.trainingDesc[30]], True)
        #        self.compare1toN(self.trainingDesc[1], [self.trainingDesc[0]], True)

        #        self.compareAll(self.trainingDesc[:40])
        self.compareTestToTrain(self.testingDesc[:], self.trainingDesc[:])

コード例 #8

0

ファイルを表示

ファイル: part6.py プロジェクト: padamban/Speech-Processing

class Analysis:
    def __init__(self):
        self.paths = Paths()
        self.param = Params()
        self.pc = PrintConfig()
        self.am = AudioManager()

        self.p = Printer(1)
        self.S = Synthesizer()
        self.pickle = Pickle(self.paths.pickle)
        self.decoded, self.original, self.coded = self.loadAll()
        self.cP, self.cG, self.cLpc = self.organize()
        self.cSn = self.SynthAll()

    def loadAll(self):
        coded = self.pickle.LoadEncoded(self.paths.tag4)
        decoded = self.pickle.LoadDecoded(self.paths.tag5)
        data = self.pickle.LoadData(self.paths.tag3)
        return decoded, data, coded

    def SynthAll(self):
        snO = self.S.synth(self.cLpc.o, self.cP.o, self.cG.o)
        snD = self.S.synth(self.cLpc.d, self.cP.d, self.cG.d)
        snE = []
        for i, sno in enumerate(snO):
            snE.append(snO[i] - snD[i])

        rw = self.am.readAudio(self.paths.file)
        return ComaparedData(snO, snD, snE, rw)

    def organize(self):
        oPitch = []
        dPitch = []
        ePitch = []
        oGain = []
        dGain = []
        eGain = []
        oLpc = self.original.lpc
        dLpc = self.decoded.lpc

        for i in range(len(self.original.lpc)):
            op = int(self.original.pitch[i][0, 0])
            dp = self.decoded.pitch[i]
            ep = op - dp
            oPitch.append(op)
            dPitch.append(dp)
            ePitch.append(ep)
            og = round(self.original.gain[i][0, 0], 3)
            dg = self.decoded.gain[i]
            eg = og - dg
            oGain.append(og)
            dGain.append(dg)
            eGain.append(eg)

        cP = ComaparedData(oPitch, dPitch, ePitch)
        cG = ComaparedData(oGain, dGain, eGain)
        cLpc = ComaparedData(oLpc, dLpc, None)
        return cP, cG, cLpc

    def compareVisu(self):

        for i in range(len(self.cP.o)):
            if (i in self.pc.stepInto6 or i in self.pc.stepIntoAll):
                self.p.prnt(2, str(i) + "------------------ start", 1)
                self.p.prnt(4, str("In Sixth Cycle"), 1)
                self.p.prnt(2, '  ', 1)
                self.p.prnt(
                    2, '    gain   ' + str(i) + "  " + " ->    " +
                    str(round(self.cG.o[i], 0)) + "\t" +
                    str(int(self.cG.d[i])) + "\t" + str(int(self.cG.e[i])), 1)
                self.p.prnt(
                    2, '    pitch  ' + str(i) + "  " + " ->    " +
                    str(round(self.cP.o[i], 0)) + "\t" +
                    str(int(self.cP.d[i])) + "\t" + str(int(self.cP.e[i])), 1)

                for j in range(len(self.cLpc.o[i])):
                    self.p.prnt(
                        2, '     lpc   ' + str(i) + " " + str(j) + " ->   " +
                        str(round(self.cLpc.o[i][j], 3)) + "\t" +
                        str(round(self.cLpc.d[i][j], 3)), 1)

                start = i * self.param.step
                end = start + self.param.step
                tag = str(i) + "th "
                self.p.plot([
                    (self.cSn.raw[start:end], ' raw audio', 'k', 0),
                    (self.cSn.o[start:end], tag + ' original synth', 'b', 0),
                    (self.cSn.d[start:end], tag + '  decoded synth', 'g', 0),
                    (self.cSn.e[start:end], tag + '    error synth', 'r', 0)
                ])

        self.p.plot([(self.cSn.raw, ' raw audio', 'k', 0),
                     (self.cSn.o, ' original synth', 'b', 0),
                     (self.cSn.d, '  decoded synth', 'g', 0),
                     (self.cSn.e, '    error synth', 'r', 0)])
        self.p.plot([(self.cP.o, ' original gain', 'b', 0),
                     (self.cP.d, 'decoded gain', 'g*', 0),
                     (self.cP.e, 'error', 'r--', 0)])
        self.p.plot([(self.cG.o, ' original gain', 'b', 0),
                     (self.cG.d, 'decoded gain', 'g*', 0),
                     (self.cG.e, 'error', 'r--', 0)])
        originalFileSize = self.pickle.getFileSize(self.paths.file) * 8
        codedFileSize = len(self.coded.binaries)
        self.p.prnt(
            2, '  ---------------------------------------------------   ', 1)
        self.p.prnt(
            2, '    original file size    ->    ' + str(originalFileSize) +
            ' bits', 1)
        self.p.prnt(
            2,
            '       coded file size    ->    ' + str(codedFileSize) + ' bits',
            1)
        self.p.prnt(
            2, '           compression    ->    ' +
            str(round((codedFileSize / originalFileSize) * 100, 3)) + ' %', 1)

    def compareAudio(self):
        self.p.prnt(4, "", 1)

        self.p.prnt(4, str("Listen"), 1)
        self.p.prnt(4, "      1.  original file", 1)
        self.p.prnt(4, "      2.  sytesized before coding", 1)
        self.p.prnt(4, "      3.  sytesized after decoding", 1)

        self.am.playOriginalAudio(self.paths.file)
        self.am.playSyntesizedAudio(self.cSn.o)
        self.am.playSyntesizedAudio(self.cSn.d)