# | # V # # ******** Fully connected layer # | # V # # ABC Per-instrument classification neurons (outputs) # # We vary the following parameters to find the best accuracy. # - MFCC computation time intervals: 5ms, 10ms, 20ms, 25ms (matching various of the intervals in papers above) # - MFCC intervals in a sliding window presented to the neural network: 3, 4, 5 # (i.e. 15ms up to 125ms when multiplied by the time intervals). # - Training batch size instruments = InstrumentLoader(samplesDirPath, []) Log("Max, min MFCC rows across all instruments: ", instruments.maxMfccRows, instruments.minMfccRows) Log("Number of instruments by length in MFCC rows:") for k, v in sorted(instruments.mfccLenToSamplesMap.items()): suffix = '' if len(v) == 1: suffix = '(' + os.path.basename(v[0].wavPath) + ')' Log(" ", k, ": ", len(v), suffix) if instruments.minWavHz < wavMinAllowedHz: print( "ERROR: One or more wav files found with rate in Hz less than configured minimum. Min found:", instruments.minWavHz, " allowed min:", wavMinAllowedHz) exit(1)
import keras.models from KerasTensorFlowAnalyzer import KerasTensorFlowAnalyzer import MfccComparisonAnalyzer import os from SoundStreamAnalyzer import SoundStreamAnalyzer import sys if len(sys.argv) < 5: print('Usage:') print(' <wav-file-path> <instruments-folder-path> <model-file-path> <model-params-json-path>') exit(1) wavFilePath = sys.argv[1] instrumentsFolderPath = sys.argv[2] modelFilePath = sys.argv[3] modelParamsPath = sys.argv[4] trainedModel = keras.models.load_model(modelFilePath) # See SoundModelParams.py f = open(modelParamsPath) modelParams = json.load(f) orderedResultInstrumentLabels = modelParams["instruments"] print("Ordered labels:", orderedResultInstrumentLabels) instruments = InstrumentLoader(instrumentsFolderPath, orderedResultInstrumentLabels) analyzers = [ KerasTensorFlowAnalyzer(trainedModel, modelParams) ] + list(MfccComparisonAnalyzer.constructFromInstruments(instruments)) analyzer = SoundStreamAnalyzer(wavFilePath, instruments, analyzers, 0.9) analyzer.getMatches()