Python totalInputLengthの例、model.net.totalInputLength Pythonの例

コード例 #1

0

ファイルを表示

ファイル: recording.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def compLongRepresentationSingle(net, waves, xp, trimInputLen=True):
    net.reset()

    x = waves
    batchSize, waveLen = x.shape

    inputLen = totalInputLength(net.structure)
    x = np.concatenate((np.zeros((batchSize, inputLen - 1), x.dtype), x),
                       axis=1)

    x = x[:, newaxis, :, newaxis]
    x = xp.asarray(x, float32)
    x = Variable(x)
    layerRepre = compRepresentationSingle(net, x)
    for li, r in enumerate(layerRepre):
        r = r[..., 0]
        if xp != np: r = cupy.asnumpy(r)

        r = r[..., -waveLen:]
        if trimInputLen:
            r = r[..., inputLen - 1:]
        layerRepre[li] = r

    layerRepre = np.stack(layerRepre, axis=0)
    return layerRepre

コード例 #2

0

ファイルを表示

ファイル: recording.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def compNoiseAmAveSyn(stimSec, waveFs, fileModel, architecture, gpu_id,
                      trainingRms):
    with chainer.using_config("enable_backprop", False):
        if cupy is not None and gpu_id >= 0:
            xp = cupy
            cupy.cuda.Device(gpu_id).use()
        else:
            xp = np

        modDepth = 1
        waveLen = int(stimSec * waveFs)

        np.random.seed(0)

        times = np.arange(waveLen) / waveFs
        freqs = np.logspace(np.log10(1), np.log10(2000), 2**8)
        meanSize = 2**2  #we used 2**4 in our paper

        batchSizeUpper = meanSize
        batchSize = np.array_split(np.arange(meanSize),
                                   int(np.ceil(meanSize / batchSizeUpper)))
        batchSize = [len(x) for x in batchSize]
        segmentSecUpper = 2  #decrease this for a GPU with smaller memory
        segmentLenUpper = int(segmentSecUpper * waveFs)

        net = loadNet(architecture, fileModel)
        if gpu_id >= 0: net.to_gpu(gpu_id)
        inputLen = totalInputLength(net.structure)

        freqResponse = []
        for fi, freq in enumerate(freqs):
            print("Conducting physiology:", "stimulus AM freq:", freq)
            cos, sin = vectorCosSin(freq, inputLen, waveLen, waveFs)

            batchResponse = []
            for bi, bs in enumerate(batchSize):
                waves = np.random.randn(bs, waveLen)
                waves *= (1 - modDepth * np.cos(freq * 2 * np.pi * times))
                waves = scaleRms(waves, trainingRms)
                # 				repre=compLongRepresentation(net, waves, segmentLenUpper, waveLen, xp) #compLongRepresentationSingle() returns the same result when trimInputLen=True
                repre = compLongRepresentationSingle(net, waves, xp)

                repre += 1  #elu
                ave = repre.mean(axis=-1)
                s = repre.sum(axis=-1)
                syn = (((repre * cos).sum(axis=-1) / s)**2 +
                       ((repre * sin).sum(axis=-1) / s)**2)**0.5
                syn[s == 0] = 0
                resp = np.stack((ave, syn),
                                axis=0)  #shape=(type, layer, batch, channel)
                batchResponse.append(resp)
            batchResponse = np.concatenate(batchResponse, axis=-2)

            batchResponse = batchResponse.mean(
                axis=-2)  #shape=(type, layer, channel)
            freqResponse.append(batchResponse)
        freqResponse = np.array(
            freqResponse)  #shape=(freq, type, layer, channel)

        return freqResponse

コード例 #3

0

ファイルを表示

ファイル: train_eval.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def train(architecture, waves, infos, gpu_id, waveFs, numEpoch, seed):
    if cupy is not None and gpu_id >= 0:
        xp = cupy
        cupy.cuda.Device(gpu_id).use()
    else:
        xp = np

    inputLength = totalInputLength(architecture)
    labels = getLabels()
    numLabel = len(labels)
    groupFold = ((0, 1, 2), (3, ), (4, ))

    insLabelSize = 2**2

    np.random.seed(seed)
    net = Net(numLabel, architecture, functions.elu)
    # 	opt=Eve(1e-4)
    opt = optimizers.Adam(1e-4)
    opt.setup(net)
    if gpu_id >= 0: net.to_gpu(gpu_id)

    insFold = set(itertools.chain.from_iterable(groupFold[:2]))
    insLabelWave = groupLabelWave((insFold, ), infos)[0]
    insLabelWaveIndex = [[] for i in range(len(labels))]
    for li, la in enumerate(labels):
        for i in insLabelWave[la]:
            wave = waves[i]
            timeIndex = np.arange(len(wave))
            waveIndex = np.ones(len(wave), int32) * i
            index = np.stack((waveIndex, timeIndex), axis=1)
            insLabelWaveIndex[li].append(index)
        insLabelWaveIndex[li] = np.concatenate(insLabelWaveIndex[li], axis=0)

    insRemainingLabelWave = [
        np.random.permutation(insLabelWaveIndex[li])
        for li in range(len(labels))
    ]

    for epoch in range(numEpoch):
        print("Training: Epoch", epoch, "/", numEpoch)

        x, tr = makeInpTru(insLabelWaveIndex, waves, insRemainingLabelWave,
                           inputLength, insLabelSize, numLabel)
        x = x[:, newaxis, :, newaxis]
        x = xp.asarray(x)
        x = Variable(x)
        x = net.callSingle(x, True)
        tr = tr[..., newaxis, newaxis]
        tr = xp.asarray(tr)
        e = functions.softmax_cross_entropy(x, tr)

        net.cleargrads()
        e.backward()
        e.unchain_backward()
        opt.update(loss=e.data)


# 		opt.update()

    return net

コード例 #4

0

ファイルを表示

ファイル: recording.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def compLongRepresentation(net,
                           waves,
                           segmentLenUpper,
                           waveLen,
                           xp,
                           trimInputLen=True):
    '''
	Deprecated.
	compLongRepresentationSingle() returns the same result when trimInputLen=True
	
	'''
    segmentTimes = np.array_split(np.arange(waveLen),
                                  int(np.ceil(waveLen / segmentLenUpper)))
    segmentTimes = [(x[0], x[-1] + 1) for x in segmentTimes]
    net.reset()
    repre = None
    for si, (t0, t1) in enumerate(segmentTimes):
        x = waves[:, t0:t1]
        x = x[:, newaxis, :, newaxis]
        x = xp.asarray(x, float32)
        x = Variable(x)
        r = compRepresentation(net, x)
        r = xp.stack(r, axis=0)
        r = r[..., 0]
        if repre is None:
            repre = np.empty((r.shape[0], len(waves), r.shape[2], waveLen),
                             r.dtype)
        if xp != np: r = cupy.asnumpy(r)
        repre[..., t0:t1] = r

    if trimInputLen:
        inputLen = totalInputLength(net.structure)
        repre = repre[..., inputLen - 1:]

    return repre

コード例 #5

0

ファイルを表示

ファイル: train_eval.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def evaluate(architecture, waves, trues, labels, infos, gpu_id, waveFs,
             fileParam):
    if cupy is not None and gpu_id >= 0:
        xp = cupy
        cupy.cuda.Device(gpu_id).use()
    else:
        xp = np

    valIndex = coreTestIndex(infos)

    devBatchSizeUpper = 2**8
    devSegmentSecUpper = 0.1
    devSegmentLenUpper = int(devSegmentSecUpper * waveFs)

    devIndex = sorted(valIndex, key=lambda i: len(waves[i]))
    devIndex = np.array(devIndex)
    devBatchIndex = np.array_split(
        devIndex, int(np.ceil(len(devIndex) / devBatchSizeUpper)))
    devLabelSize = np.zeros(len(labels), int32)
    for i in devIndex:
        for li, la in enumerate(labels):
            devLabelSize[li] += (trues[i] == li).sum()

    net = Net(len(labels), architecture, functions.elu)
    serializers.load_hdf5(fileParam, net)
    if gpu_id >= 0: net.to_gpu(gpu_id)
    inputLength = totalInputLength(architecture)

    with chainer.using_config("enable_backprop", False):
        confusion = np.zeros((len(labels), len(labels)), int32)
        for index in devBatchIndex:
            waveLen = len(waves[index[-1]])
            segmentTimes = np.array_split(
                np.arange(waveLen), int(np.ceil(waveLen / devSegmentLenUpper)))
            net.reset()
            for si, segTime in enumerate(segmentTimes):
                t0 = segTime[0]
                t1 = segTime[-1] + 1
                x = np.zeros((len(index), t1 - t0), float32)
                tr = -np.ones((len(index), t1 - t0), int32)
                for xi, wi in enumerate(index):
                    if len(waves[wi]) > t0:
                        w = waves[wi][t0:t1]
                        x[xi, :len(w)] = w
                    if len(waves[wi]) > t0: tr[xi, :len(w)] = trues[wi][t0:t1]

                x = x[:, newaxis, :, newaxis]
                x = xp.asarray(x)
                x = Variable(x)
                x = net(x, False)

                x = xp.argmax(x.data, axis=1)
                if cupy is not None: x = cupy.asnumpy(x)
                x = x.flatten()
                tr = tr.flatten()
                for xi, ti in zip(x, tr):
                    if ti >= 0: confusion[ti, xi] += 1

        assert (np.sum(confusion, axis=1) == devLabelSize).all()
        return confusion

コード例 #6

0

ファイルを表示

ファイル: train_eval.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def train(architecture, waves, trues, labels, infos, gpu_id, waveFs, numEpoch,
          seed):
    if cupy is not None and gpu_id >= 0:
        xp = cupy
        cupy.cuda.Device(gpu_id).use()
    else:
        xp = np

    valIndex = coreTestIndex(infos)
    np.random.seed(0)
    insIndex, = traGroupIndex(infos, 1)
    insIndex = np.array(insIndex)
    insLabelIndexTime = makeLabelIndexTime(insIndex, labels, trues)

    insLabelSize = 2**2  #la12 tot4096 ch128

    inputLength = totalInputLength(architecture)

    np.random.seed(seed)
    net = Net(len(labels), architecture, functions.elu)
    opt = optimizers.Adam(1e-4)
    # 	opt=Eve(1e-4)
    opt.setup(net)
    if gpu_id >= 0: net.to_gpu(gpu_id)

    remainingInsLabelIndexTime = [
        np.random.permutation(lt) for lt in insLabelIndexTime
    ]
    for epoch in range(numEpoch):
        print("Training: Epoch", epoch, "/", numEpoch)
        for li, lit in enumerate(remainingInsLabelIndexTime):
            if len(lit) < insLabelSize:
                remainingInsLabelIndexTime[li] = np.concatenate(
                    (lit, np.random.permutation(insLabelIndexTime[li])))
        x, tr = makeInpTru(labels, insLabelSize, inputLength,
                           remainingInsLabelIndexTime, waves, trues)

        x = x[:, newaxis, :, newaxis]
        x = xp.asarray(x)
        x = Variable(x)
        x = net.callSingle(x, True)
        tr = tr[..., newaxis, newaxis]
        tr = xp.asarray(tr)
        e = functions.softmax_cross_entropy(x, tr, normalize=True)

        net.cleargrads()
        e.backward()
        e.unchain_backward()
        opt.update()


# 		opt.update(loss=e.data)

    return net

コード例 #7

0

ファイルを表示

ファイル: recording.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def compToneAveSyn(stimSec, waveFs, fileModel, architecture, gpu_id,
                   trainingRms):
    with chainer.using_config("enable_backprop", False):
        if cupy is not None and gpu_id >= 0:
            xp = cupy
            cupy.cuda.Device(gpu_id).use()
        else:
            xp = np

        waveLen = int(stimSec * waveFs)
        batchSizeUpper = 1

        times = np.arange(waveLen) / waveFs
        freqs = np.logspace(np.log10(100), np.log10(5000), 2**8)
        ampScale = np.logspace(np.log10(1 / 512), np.log10(1), 2**8)

        batchAmps = np.array_split(
            ampScale, int(np.ceil(len(ampScale) / batchSizeUpper)))
        segmentSecUpper = 2
        segmentLenUpper = int(segmentSecUpper * waveFs)

        net = loadNet(architecture, fileModel)
        if gpu_id >= 0: net.to_gpu(gpu_id)
        inputLen = totalInputLength(net.structure)

        freqResponse = []
        for fi, freq in enumerate(freqs):
            cos, sin = vectorCosSin(freq, inputLen, waveLen, waveFs)
            batchResponse = []
            for bi, ba in enumerate(batchAmps):
                waves = np.sin(freq * 2 * np.pi * times)
                waves = scaleRms(waves, trainingRms)
                waves = waves * ba[:, newaxis]  #shape=(amp, length)
                # 				repre=compLongRepresentation(net, waves, segmentLenUpper, waveLen, xp) #compLongRepresentationSingle() returns the same result when trimInputLen=True
                repre = compLongRepresentationSingle(net, waves, xp)

                repre += 1  #elu
                ave = repre.mean(axis=-1)
                s = repre.sum(axis=-1)
                syn = (((repre * cos).sum(axis=-1) / s)**2 +
                       ((repre * sin).sum(axis=-1) / s)**2)**0.5
                syn[s == 0] = 0
                resp = np.stack((ave, syn),
                                axis=0)  #shape=(type, layer, amp, channel)

                batchResponse.append(resp)
            batchResponse = np.concatenate(
                batchResponse, axis=-2)  #shape=(type, layer, amp, channel)
            freqResponse.append(batchResponse)
        freqResponse = np.array(
            freqResponse)  #shape=(freq, type, layer, amp, channel)

        return freqResponse

コード例 #8

0

ファイルを表示

ファイル: recording.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def compSilenceAveSyn(stimSec, waveFs, fileModel, architecture, gpu_id):
    with chainer.using_config("enable_backprop", False):
        if cupy is not None and gpu_id >= 0:
            xp = cupy
            cupy.cuda.Device(gpu_id).use()
        else:
            xp = np

        waveLen = int(stimSec * waveFs)

        freqs = np.logspace(np.log10(100), np.log10(5000), 2**8)

        segmentSecUpper = 2
        segmentLenUpper = int(segmentSecUpper * waveFs)

        net = loadNet(architecture, fileModel)
        if gpu_id >= 0: net.to_gpu(gpu_id)
        inputLen = totalInputLength(net.structure)

        waves = np.zeros((1, waveLen), float32)
        # 		repre=compLongRepresentation(net, waves, segmentLenUpper, waveLen, xp) #shape=(layer, 1, channel, length) #compLongRepresentationSingle() returns the same result when trimInputLen=True
        repre = compLongRepresentationSingle(net, waves, xp)
        repre += 1  #elu
        repre = repre[:, 0, :, :]  #shape=(layer, channel, length)

        ave = repre.mean(axis=-1)  #shape=(layer, channel)

        freqResponse = []
        for fi, freq in enumerate(freqs):
            cos, sin = vectorCosSin(freq, inputLen, waveLen, waveFs)

            s = repre.sum(axis=-1)
            syn = (((repre * cos).sum(axis=-1) / s)**2 +
                   ((repre * sin).sum(axis=-1) / s)**2)**0.5
            syn[s == 0] = 0
            freqResponse.append(syn)
        freqResponse = np.array(freqResponse)  #shape=(freq, layer, channel)

        return ave, freqResponse

コード例 #9

0

ファイルを表示

ファイル: train_eval.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def findNumEpoch(architecture, waves, infos, gpu_id, waveFs):
    if cupy is not None and gpu_id >= 0:
        xp = cupy
        cupy.cuda.Device(gpu_id).use()
    else:
        xp = np

    inputLength = totalInputLength(architecture)
    labels = getLabels()
    numLabel = len(labels)
    groupFold = ((0, 1, 2), (3, ), (4, ))

    np.random.seed()
    seed = np.random.randint(0, np.iinfo(int32).max)
    np.random.seed(seed)
    net = Net(numLabel, architecture, functions.elu)
    # 	opt=Eve(1e-4)
    opt = optimizers.Adam(1e-4)
    opt.setup(net)
    if gpu_id >= 0: net.to_gpu(gpu_id)

    insLabelSize = 2**2
    devSize = 2**1
    devSegmentSecUpper = 10

    devEpoch = 2**5
    convergenceEpoch = 2**5 * devEpoch
    devSegmentLenUpper = int(devSegmentSecUpper * waveFs)
    devFold = sorted(set(groupFold[1]))
    devLabelWave = groupLabelWave((devFold, ), infos)[0]
    devLabelWave = list(
        itertools.chain.from_iterable([[(li, i) for i in devLabelWave[la]]
                                       for li, la in enumerate(labels)]))
    devLabelWave = sorted(devLabelWave, key=lambda lw: len(waves[lw[1]]))
    devBatchIndex = np.array_split(np.arange(len(devLabelWave)),
                                   int(np.ceil(len(devLabelWave) / devSize)))
    devLabelSize = np.zeros(numLabel, int32)
    for li, wi in devLabelWave:
        devLabelSize[li] += len(waves[wi])

    devWaves = {}
    for li, wi in devLabelWave:
        wave = waves[wi]
        wave = np.concatenate((wave, np.zeros((inputLength - 1) // 2,
                                              float32)))
        devWaves[wi] = wave

    insFold = sorted(set(groupFold[0]))
    insLabelWave = groupLabelWave((insFold, ), infos)[0]
    insLabelWaveIndex = [[] for i in range(len(labels))]
    for li, la in enumerate(labels):
        for i in insLabelWave[la]:
            wave = waves[i]
            timeIndex = np.arange(len(wave))
            waveIndex = np.ones(len(wave), int32) * i
            index = np.stack((waveIndex, timeIndex), axis=1)
            insLabelWaveIndex[li].append(index)
        insLabelWaveIndex[li] = np.concatenate(insLabelWaveIndex[li], axis=0)

    insRemainingLabelWave = [
        np.random.permutation(insLabelWaveIndex[li])
        for li in range(len(labels))
    ]

    epoch = 0
    bestEpoch = 0
    epochIncorrect = {}
    while epoch < bestEpoch + convergenceEpoch:
        x, tr = makeInpTru(insLabelWaveIndex, waves, insRemainingLabelWave,
                           inputLength, insLabelSize, numLabel)
        x = x[:, newaxis, :, newaxis]
        x = xp.asarray(x)
        x = Variable(x)
        x = net.callSingle(x, True)
        tr = tr[..., newaxis, newaxis]
        tr = xp.asarray(tr)
        e = functions.softmax_cross_entropy(x, tr)

        net.cleargrads()
        e.backward()
        e.unchain_backward()
        # 		opt.update(loss=e.data)
        opt.update()

        if epoch % devEpoch != devEpoch - 1:
            epoch += 1
            continue
        incorrect = xp.zeros(numLabel, int32)
        with chainer.using_config("enable_backprop", False):
            for bi, index in enumerate(devBatchIndex):
                waveIndex = np.array([devLabelWave[i][1] for i in index])
                tru = np.array([devLabelWave[i][0] for i in index])
                waveLen = len(devWaves[waveIndex[-1]])
                segmentTimes = np.array_split(
                    np.arange(waveLen),
                    int(np.ceil((waveLen) / devSegmentLenUpper)))
                net.reset()
                for si, segTime in enumerate(segmentTimes):
                    t0 = segTime[0]
                    t1 = segTime[-1] + 1
                    x = np.zeros((len(index), t1 - t0), float32)
                    tr = -np.ones((len(index), t1 - t0), int32)
                    for xi, wi in enumerate(waveIndex):
                        if len(devWaves[wi]) <= t0: continue
                        w = devWaves[wi][t0:t1]
                        x[xi, :len(w)] = w
                        tr[xi, :len(w)] = tru[xi]
                    if t0 < (inputLength - 1) // 2:
                        tr[:, :(inputLength - 1) // 2 - t0] = -1

                    x = x[:, newaxis, :, newaxis]
                    x = xp.asarray(x)
                    x = Variable(x)
                    x = net(x, False)
                    x.unchain_backward()

                    x = xp.argmax(x.data, axis=1)
                    tr = tr[..., newaxis]
                    tr = xp.asarray(tr)
                    for li, la in enumerate(labels):
                        incorrect[li] += (x[tr == li] != li).sum()

            net.reset()
            if gpu_id >= 0: incorrect = cupy.asnumpy(incorrect)
            incorrect = (incorrect / devLabelSize).mean()
            print("epoch", epoch, "incorrect", incorrect)

        if len(epochIncorrect) == 0 or incorrect < epochIncorrect[bestEpoch]:
            bestEpoch = epoch
        epochIncorrect[epoch] = incorrect
        epoch += 1

    devEpochs = np.array(sorted(epochIncorrect), int32)
    bestScore = epochIncorrect[bestEpoch]
    epochIncorrect = np.array([epochIncorrect[ep] for ep in devEpochs])

    return bestEpoch, bestScore, seed

コード例 #10

0

ファイルを表示

ファイル: train_eval.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def evaluate(architecture, waves, infos, gpu_id, waveFs, fileParam):
    if cupy is not None and gpu_id >= 0:
        xp = cupy
        cupy.cuda.Device(gpu_id).use()
    else:
        xp = np

    inputLength = totalInputLength(architecture)
    labels = getLabels()
    numLabel = len(labels)
    groupFold = ((0, 1, 2), (3, ), (4, ))

    devSize = 2**1
    devSegmentSecUpper = 10

    net = Net(numLabel, architecture, functions.elu)
    serializers.load_hdf5(fileParam, net)
    if gpu_id >= 0: net.to_gpu(gpu_id)

    devSegmentLenUpper = int(devSegmentSecUpper * waveFs)
    devFold = sorted(set(groupFold[2]))
    devLabelWave = groupLabelWave((devFold, ), infos)[0]
    devLabelWave = list(
        itertools.chain.from_iterable([[(li, i) for i in devLabelWave[la]]
                                       for li, la in enumerate(labels)]))
    devLabelWave = sorted(devLabelWave, key=lambda lw: len(waves[lw[1]]))
    devBatchIndex = np.array_split(np.arange(len(devLabelWave)),
                                   int(np.ceil(len(devLabelWave) / devSize)))
    devLabelSize = np.zeros(numLabel, int32)
    for li, wi in devLabelWave:
        devLabelSize[li] += len(waves[wi])

    devWaves = {}
    for li, wi in devLabelWave:
        wave = waves[wi]
        wave = np.concatenate((wave, np.zeros((inputLength - 1) // 2,
                                              float32)))
        devWaves[wi] = wave

    with chainer.using_config("enable_backprop", False):
        confusion = np.zeros((numLabel, numLabel), int32)
        for bi, index in enumerate(devBatchIndex):
            waveIndex = np.array([devLabelWave[i][1] for i in index])
            tru = np.array([devLabelWave[i][0] for i in index])
            waveLen = len(devWaves[waveIndex[-1]])
            segmentTimes = np.array_split(
                np.arange(waveLen), int(np.ceil(
                    (waveLen) / devSegmentLenUpper)))
            net.reset()
            for si, segTime in enumerate(segmentTimes):
                t0 = segTime[0]
                t1 = segTime[-1] + 1
                x = np.zeros((len(index), t1 - t0), float32)
                tr = -np.ones((len(index), t1 - t0), int32)
                for xi, wi in enumerate(waveIndex):
                    if len(devWaves[wi]) <= t0: continue
                    w = devWaves[wi][t0:t1]
                    x[xi, :len(w)] = w
                    tr[xi, :len(w)] = tru[xi]
                if t0 < (inputLength - 1) // 2:
                    tr[:, :(inputLength - 1) // 2 - t0] = -1

                x = x[:, newaxis, :, newaxis]
                x = xp.asarray(x)
                x = Variable(x)
                x = net(x, False)
                x.unchain_backward()

                x = xp.argmax(x.data, axis=1)
                if gpu_id >= 0: x = cupy.asnumpy(x)
                x = x.flatten()
                tr = tr.flatten()
                for xi, ti in zip(x[tr >= 0], tr[tr >= 0]):
                    confusion[ti, xi] += 1

        net.reset()
        assert (np.sum(confusion, axis=1) == devLabelSize).all()
        return confusion

コード例 #11

0

ファイルを表示

ファイル: train_eval.py プロジェクト: cycentum/cascaded-am-tuning-for-sound-recognition

def findNumEpoch(architecture, waves, trues, labels, infos, gpu_id, waveFs):
    if cupy is not None and gpu_id >= 0:
        xp = cupy
        cupy.cuda.Device(gpu_id).use()
    else:
        xp = np

    valIndex = coreTestIndex(infos)
    np.random.seed(0)
    insIndex, devIndex = traGroupIndex(infos, 2)
    insIndex = np.array(insIndex)
    insLabelIndexTime = makeLabelIndexTime(insIndex, labels, trues)

    insLabelSize = 2**2
    devEpoch = 2**5
    convergenceEpoch = 2**5 * devEpoch

    devBatchSizeUpper = 2**8
    devSegmentSecUpper = 0.1
    devSegmentLenUpper = int(devSegmentSecUpper * waveFs)

    devIndex = sorted(devIndex, key=lambda i: len(waves[i]))
    devIndex = np.array(devIndex)
    devBatchIndex = np.array_split(
        devIndex, int(np.ceil(len(devIndex) / devBatchSizeUpper)))
    devLabelSize = np.zeros(len(labels), int32)
    for i in devIndex:
        for li, la in enumerate(labels):
            devLabelSize[li] += (trues[i] == li).sum()

    inputLength = totalInputLength(architecture)

    np.random.seed()
    seed = np.random.randint(0, np.iinfo(int32).max)
    np.random.seed(seed)

    net = Net(len(labels), architecture, functions.elu)
    opt = optimizers.Adam(1e-4)
    # 	opt=Eve(1e-4)
    opt.setup(net)
    if gpu_id >= 0: net.to_gpu(gpu_id)

    remainingInsLabelIndexTime = [
        np.random.permutation(lt) for lt in insLabelIndexTime
    ]

    epoch = 0
    bestEpoch = 0
    epochIncorrect = {}
    while epoch < bestEpoch + convergenceEpoch:
        for li, lit in enumerate(remainingInsLabelIndexTime):
            if len(lit) < insLabelSize:
                remainingInsLabelIndexTime[li] = np.concatenate(
                    (lit, np.random.permutation(insLabelIndexTime[li])))
        x, tr = makeInpTru(labels, insLabelSize, inputLength,
                           remainingInsLabelIndexTime, waves, trues)

        x = x[:, newaxis, :, newaxis]
        x = xp.asarray(x)
        x = Variable(x)
        x = net.callSingle(x, True)
        tr = tr[..., newaxis, newaxis]
        tr = xp.asarray(tr)
        e = functions.softmax_cross_entropy(x, tr, normalize=True)

        net.cleargrads()
        e.backward()
        e.unchain_backward()
        opt.update()
        # 		opt.update(loss=e.data)

        if epoch % devEpoch != devEpoch - 1:
            epoch += 1
            continue
        incorrect = xp.zeros(len(labels), int32)
        with chainer.using_config("enable_backprop", False):
            for index in devBatchIndex:
                waveLen = len(waves[index[-1]])
                segmentTimes = np.array_split(
                    np.arange(waveLen),
                    int(np.ceil(waveLen / devSegmentLenUpper)))
                net.reset()
                for si, segTime in enumerate(segmentTimes):
                    t0 = segTime[0]
                    t1 = segTime[-1] + 1
                    x = np.zeros((len(index), t1 - t0), float32)
                    tr = -np.ones((len(index), t1 - t0), int32)
                    for xi, wi in enumerate(index):
                        if len(waves[wi]) > t0:
                            w = waves[wi][t0:t1]
                            x[xi, :len(w)] = w
                        if len(waves[wi]) > t0:
                            tr[xi, :len(w)] = trues[wi][t0:t1]

                    x = x[:, newaxis, :, newaxis]
                    x = xp.asarray(x)
                    x = Variable(x)
                    x = net(x, False)
                    x.unchain_backward()

                    x = xp.argmax(x.data, axis=1)
                    tr = tr[..., newaxis]
                    tr = xp.asarray(tr)
                    for li, la in enumerate(labels):
                        incorrect[li] += (x[tr == li] != li).sum()

            net.reset()
            if cupy is not None: incorrect = cupy.asnumpy(incorrect)
            incorrect = (incorrect / devLabelSize).mean()
            print("epoch", epoch, "incorrect", incorrect)

            if len(epochIncorrect) == 0 or incorrect < min(
                [epochIncorrect[ep] for ep in epochIncorrect]):
                bestEpoch = epoch
            epochIncorrect[epoch] = incorrect
            epoch += 1

    devEpochs = np.array(sorted(epochIncorrect), int32)
    epochIncorrect = np.array([epochIncorrect[ep] for ep in devEpochs])
    bestIncorrect = epochIncorrect.min()

    return bestEpoch, bestIncorrect, seed