Example #1
0
def main():
	sess = tf.InteractiveSession()
	bg = batchGenerator.batchGenerator("../features/input", "../features/output")
	
	x = tf.placeholder(tf.float32, shape=[None, 650])
	y_ = tf.placeholder(tf.float32, shape=[None, 2])
	
	W_conv1 = weight_variable([5, 5, 1, 32])
	b_conv1 = bias_variable([32])
	
	x_image = tf.reshape(x, [-1,50,13,1])
	
	h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
	h_pool1 = max_pool_2x2(h_conv1)
	
	W_conv2 = weight_variable([3, 3, 32, 64])
	b_conv2 = bias_variable([64])

	h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
	h_pool2 = max_pool_2x2(h_conv2)
	
	W_fc1 = weight_variable([13 * 4 * 64, 1024])
	b_fc1 = bias_variable([1024])

	h_pool2_flat = tf.reshape(h_pool2, [-1, 13*4*64])
	h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
	
	keep_prob = tf.placeholder(tf.float32)
	h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
	
	W_fc2 = weight_variable([1024, 2])
	b_fc2 = bias_variable([2])

	y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
	
	#cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
	cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
	#cross_entropy = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y_conv, 1e-10, 1.0)))
	# use one of these if doesn't work
	train_step = tf.train.AdamOptimizer(1e-5).minimize(cross_entropy)
	correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
	saver = tf.train.Saver()
	sess.run(tf.initialize_all_variables())
	for i in range(100):
		print i
		batch = bg.getBatch(0, 64)	
		if i%2 == 0:
			train_accuracy = sess.run(accuracy, feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
			train_loss = sess.run(cross_entropy, feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
			print("step %d, training accuracy %g, loss %g"%(i, train_accuracy, train_loss))
		train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
	
	randomTestSet = bg.getBatch(1, 20) # multiple of the same data might be used, but doesn't matter since data is incredibly big
	print("test accuracy %g"%accuracy.eval(feed_dict={x: randomTestSet[0], y_: randomTestSet[1], keep_prob: 1.0}))
	saveNetwork(saver, sess)
def main():
    sess = tf.InteractiveSession()
    if RANDOM_TRAINING:
        bg = batchGenerator.batchGenerator("train/features/", "jamendo_lab")
    else:
        statictrainbg = staticBatchGenerator.staticBatchGenerator(
            "train/features/", "jamendo_lab")

    x = tf.placeholder(tf.float32, shape=[None, 3360])
    y_ = tf.placeholder(tf.float32, shape=[None, 2])

    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])

    x_image = tf.reshape(x, [-1, 84, 40, 1])

    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    W_conv2 = weight_variable([3, 3, 32, 64])
    b_conv2 = bias_variable([64])

    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    h_pool2_flat = tf.reshape(h_pool2, [-1, 21 * 10 * 64])

    W_fc1 = weight_variable([21 * 10 * 64, 1024])
    b_fc1 = bias_variable([1024])

    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    W_fc2 = weight_variable([1024, 2])
    b_fc2 = bias_variable([2])

    h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
    h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

    W_fc3 = weight_variable([2, 2])
    b_fc3 = bias_variable([2])

    y_conv = tf.nn.softmax(tf.matmul(h_fc2, W_fc3) + b_fc3)
    #y_conv = tf.nn.sigmoid(tf.matmul(h_fc2, W_fc3) + b_fc3)

    #cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) # this one doesnt seem to work well
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
    #cross_entropy = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y_conv, 1e-10, 1.0))) # this one also works well
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    #prediction = y_conv
    #prediction = tf.argmax(y_conv, 1)
    prediction = y_conv[:, 1]

    sess.run(tf.initialize_all_variables())
    saver = tf.train.Saver()

    args = sys.argv[1:]
    if int(args[0]) == 1 or int(args[0]) == 2:
        loadNetwork(saver, sess)

    if int(args[0]) == 0:  # Training
        if RANDOM_TRAINING:
            for i in range(20000):
                batch = bg.getBatch(64)
                if i % 100 == 0:
                    train_accuracy = sess.run(accuracy,
                                              feed_dict={
                                                  x: batch[0],
                                                  y_: batch[1],
                                                  keep_prob: 1.0
                                              })
                    train_loss = sess.run(cross_entropy,
                                          feed_dict={
                                              x: batch[0],
                                              y_: batch[1],
                                              keep_prob: 1.0
                                          })
                    print("step %d, training accuracy %g, loss %g" %
                          (i, train_accuracy, train_loss))
                train_step.run(feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    keep_prob: 0.5
                })

        else:  # Static training (way faster and ensures that all data in training set gets used)
            traininput, trainoutput = statictrainbg.getSet()
            #traininputMean = np.mean(traininput, axis=0)
            #traininputStd = np.std(traininput, axis=0)
            traininputMean = 0
            traininputStd = 1
            normalizedtrainInput = (traininput -
                                    traininputMean) / traininputStd

            staticvalidbg = staticBatchGenerator.staticBatchGenerator(
                "valid/features/", "jamendo_lab")
            validinput, validoutput = staticvalidbg.getSet()

            normalizedvalidInput = (validinput -
                                    traininputMean) / traininputStd

            i = 0
            loopCount = 0
            while i < traininput.shape[0] - 64:
                if i % 100 == 0:

                    train_accuracy = sess.run(accuracy,
                                              feed_dict={
                                                  x: normalizedtrainInput[i:i +
                                                                          64],
                                                  y_: trainoutput[i:i + 64],
                                                  keep_prob: 1.0
                                              })
                    train_loss = sess.run(cross_entropy,
                                          feed_dict={
                                              x:
                                              normalizedtrainInput[i:i + 64],
                                              y_: trainoutput[i:i + 64],
                                              keep_prob: 1.0
                                          })
                    print("loop %d, step %d, training accuracy %g, loss %g" %
                          (loopCount + 1, i, train_accuracy, train_loss))

                train_step.run(
                    feed_dict={
                        x: normalizedtrainInput[i:i + 64],
                        y_: trainoutput[i:i + 64],
                        keep_prob: 0.5
                    })

                if i % 6400 == 0:
                    idx = random.randint(0,
                                         normalizedvalidInput.shape[0] - 500)

                    valid_accuracy = sess.run(
                        accuracy,
                        feed_dict={
                            x: normalizedvalidInput[idx:idx + 500],
                            y_: validoutput[idx:idx + 500],
                            keep_prob: 1.0
                        })
                    valid_loss = sess.run(cross_entropy,
                                          feed_dict={
                                              x:
                                              normalizedvalidInput[idx:idx +
                                                                   500],
                                              y_:
                                              validoutput[idx:idx + 500],
                                              keep_prob:
                                              1.0
                                          })
                    print("loop %d, step %d, valid accuracy %g, loss %g" %
                          (loopCount + 1, i, valid_accuracy, valid_loss))

                i += 64

                if i >= traininput.shape[0] - 64:
                    if loopCount < LOOPAMOUNT - 1:
                        p = np.random.permutation(len(traininput))
                        normalizedInput = traininput[p]
                        trainoutput = trainoutput[p]
                        loopCount += 1
                        i = 0
                    else:
                        break

        saveNetwork(saver, sess)
    elif int(args[0]) == 1:  # Validating
        MFCCFileNames = glob.glob("valid/features/" + "/*h5")

        thresholds = np.arange(0.4, 0.62, 0.01)
        bestThreshold = 0.5
        bestAccuracy = 0

        for threshold in thresholds:
            totalSongAccuracy = 0
            songCount = 0
            for MFCCFileName in MFCCFileNames:
                index = MFCCFileName.rfind("/")
                labelFileName = list("jamendo_lab") + list("/") + list(
                    MFCCFileName[index + 1:])
                labelFileName[-2] = 'l'
                labelFileName[-1] = 'a'
                labelFileName.append('b')

                sbg = songBatchGenerator.songBatchGenerator(
                    MFCCFileName, "".join(labelFileName))
                songBatch = sbg.getBatch()

                songPrediction = prediction.eval(feed_dict={
                    x: songBatch[0],
                    y_: songBatch[1],
                    keep_prob: 1.0
                })
                songPrediction = songPrediction > threshold
                y = songBatch[1]

                songCount += 1
                totalSongAccuracy += float(np.sum(
                    songPrediction == y[:, 1])) / songPrediction.shape[0]

            if totalSongAccuracy / songCount > bestAccuracy:
                bestAccuracy = totalSongAccuracy / songCount
                bestThreshold = threshold

        #print bestAccuracy
        print 'Best threshold found on the validation set: ', bestThreshold

    else:  # Testing
        MFCCFileNames = glob.glob("test/features/" + "/*h5")

        totalAccuracy = 0
        totalSpec = 0
        totalRecall = 0
        totalPrecision = 0
        totalFmeasure = 0

        totalSongAccuracy = 0

        vocalWindowCount = 0
        totalClassifications = 0
        songCount = 0

        for MFCCFileName in MFCCFileNames:
            index = MFCCFileName.rfind("/")
            labelFileName = list("jamendo_lab") + list("/") + list(
                MFCCFileName[index + 1:])
            labelFileName[-2] = 'l'
            labelFileName[-1] = 'a'
            labelFileName.append('b')

            sbg = songBatchGenerator.songBatchGenerator(
                MFCCFileName, "".join(labelFileName))
            songBatch = sbg.getBatch()
            y = songBatch[1]
            songLength = y.shape[0]

            print MFCCFileName
            print songBatch[0].shape

            songPrediction = prediction.eval(feed_dict={
                x: songBatch[0],
                y_: songBatch[1],
                keep_prob: 1.0
            })
            if SHOW_PLOTS:
                fig = pp.figure()
                pp.plot(songPrediction)
                pp.axis([0, songLength, 0, 0.6])
                pp.show()

            songPredictionThreshold = songPrediction > THRESHOLD
            if SHOW_PLOTS:
                fig = pp.figure()
                pp.imshow(np.logical_not(songPredictionThreshold).reshape(
                    1, songLength),
                          aspect='auto',
                          cmap=pp.cm.gray,
                          interpolation='nearest')
                pp.axis([0, songLength, 0, 0.5])
                pp.show()

            vocalWindowCount += sum(y[:, 1])
            if SHOW_PLOTS:
                fig = pp.figure()
                pp.imshow(np.logical_not(y[:, 1]).reshape(1, songLength),
                          aspect='auto',
                          cmap=pp.cm.gray,
                          interpolation='nearest')
                pp.axis([0, songLength, 0, 0.5])
                pp.show()

            #acc = accuracy.eval(feed_dict={x: songBatch[0], y_: songBatch[1], keep_prob: 1.0}) # without different threshold: Does seem to perform slightly better / equally good
            if GAUSSIAN_SMOOTHING:
                songPredictionSmoothed = ndimage.gaussian_filter1d(
                    songPrediction, 1)

                if SHOW_PLOTS:
                    fig = pp.figure()
                    pp.plot(songPredictionSmoothed)
                    pp.axis([0, songLength, 0, 0.6])
                    pp.show()

            if MEDIAN_SMOOTHING:
                songPredictionSmoothed = medfilt(songPrediction, 9)

                if SHOW_PLOTS:
                    fig = pp.figure()
                    pp.plot(songPredictionSmoothed)
                    pp.axis([0, songLength, 0, 0.6])
                    pp.show()

            if GAUSSIAN_SMOOTHING:
                songPredictionSmoothedThresholded = songPredictionSmoothed > THRESHOLD
                acc = float(
                    np.sum(songPredictionSmoothedThresholded ==
                           y[:, 1])) / songLength

            elif MEDIAN_SMOOTHING:
                songPredictionSmoothedThresholded = songPredictionSmoothed > THRESHOLD

                counts = Counter(
                    zip(songPredictionSmoothedThresholded, y[:, 1]))
                true_pos = float(counts[1, 1])
                false_pos = float(counts[1, 0])
                false_neg = float(counts[0, 1])
                true_neg = float(counts[0, 0])

                acc = (true_pos + true_neg) / songLength
                spec = true_neg / (false_pos + true_neg)
                recall = true_pos / (true_pos + false_neg)
                precision = true_pos / (true_pos + false_pos)
                fmeasure = 2 * (precision * recall) / (precision + recall)
            else:
                acc = float(
                    np.sum(songPredictionThreshold == y[:, 1])) / songLength

            print "test accuracy: %g" % acc
            print "f-measure: %g" % fmeasure
            print ''
            totalAccuracy += songLength * acc
            totalSpec += songLength * spec
            totalRecall += songLength * recall
            totalPrecision += songLength * precision
            totalFmeasure += songLength * fmeasure
            totalSongAccuracy += acc

            totalClassifications += songLength
            songCount += 1

        print 'Average accuracy over all songs, not taking song length into account: %g' % (
            totalSongAccuracy / songCount)
        print 'Average accuracy over all songs, taking song length into account: %g' % (
            totalAccuracy / totalClassifications)
        print 'Average spec over all songs, taking song length into account: %g' % (
            totalSpec / totalClassifications)
        print 'Average recall over all songs, taking song length into account: %g' % (
            totalRecall / totalClassifications)
        print 'Average precision over all songs, taking song length into account: %g' % (
            totalRecall / totalClassifications)
        print 'Average fmeasure over all songs, taking song length into account: %g' % (
            totalFmeasure / totalClassifications)
        print 'Percentage of vocal windows in the test set: %g' % (
            float(vocalWindowCount) / totalClassifications)
Example #3
0
import batchGenerator

bg = batchGenerator.batchGenerator('../features/input', '../features/output')
print bg.getBatch(0, 64)
    print('\n\nResuming training from file: ' + lastModelPath +
          ' and epoch: ' + str(init_epoch - 1))

    if init_epoch >= cfg['train_epochs']:
        print(
            'The target number of epochs specified (' +
            str(cfg['train_epochs']) +
            ') has already been reached. To continue training, increase the number of epochs in the cfg json file'
        )
        exit()

######################## TRAINING AND VALIDATION DATA GENERATORS CREATION
train_pair_array = np.load(cfg['train_data_pairs'])
val_pair_array = np.load(cfg['val_data_pairs'])

train_generator = batchGenerator(train_pair_array, cfg['batch_size'],
                                 cfg['input_shape'])
val_generator = batchGenerator(val_pair_array, cfg['batch_size'],
                               cfg['input_shape'])

##### MODEL FITTING ######
# subprocess.Popen(["tensorboard", "--port","3468","--logdir",summariesPath])
# subprocess.Popen(["google-chrome","http://localhost:3468"])

print('\n\nTensorboard command:\ntensorboard --port 3468 --logdir=' +
      summariesPath + '\n\n')

model.fit_generator(train_generator.generator(),
                    steps_per_epoch=train_generator.steps_per_epoch,
                    initial_epoch=init_epoch,
                    epochs=cfg['train_epochs'],
                    validation_data=val_generator.generator(),