def train_classifier(dataset, n_classes, Fs=16000): datalist = os.listdir(dataset) n_data_set = len(datalist) for i in range(n_data_set): filepath = dataset + '/' + datalist[i] if datalist[i].find('.wav') == 1: try: [x, Fs, n_channels, n_samples] = read_wave(filepath) except: print(e.msg) elif datalist[i].find('.raw'): try: x = read_raw(filepath) if len(x.shape) > 1: n_channels = x.shape[1] n_samples = x.shape[0] else: n_channels = 1 n_samples = len(x) except: print(e.msg) features = get_mfcc(x, Fs) dimension = features.shape[1] size = features.shape[0] emHMM_algorithm(features, dimension, 2, size)
def train_classifier(dataset,n_classes,Fs=16000): datalist = os.listdir(dataset) n_data_set = len(datalist) for i in range(n_data_set) : filepath = dataset + '/' + datalist[i] if datalist[i].find('.wav')==1 : try: [x, Fs, n_channels, n_samples] = read_wave(filepath) except: print(e.msg) elif datalist[i].find('.raw') : try: x = read_raw(filepath) if len(x.shape)>1: n_channels = x.shape[1] n_samples = x.shape[0] else: n_channels = 1 n_samples = len(x) except: print(e.msg) features = get_mfcc(x,Fs) dimension = features.shape[1] size = features.shape[0] emHMM_algorithm(features,dimension,2,size)
def main(): if dimension==1 : # gmm = np.zeros(number_of_components*size) # mu = np.zeros(number_of_components) # sigma = np.zeros(number_of_components) # for i in range(number_of_components) : # gmm[i*size:(i+1)*size], mu[i], sigma[i] = create_data(dimension,size,i) gmm = np.zeros((1,number_of_components*size),dtype=float) mu = np.zeros((number_of_components,1),dtype=float) sigma = np.zeros((number_of_components,1,1),dtype=float) matrix = np.zeros((number_of_components,number_of_components),dtype=float) # for i in range(number_of_components): # x, mu[i,0], sigma[i,0,0] = create_data(dimension,size,i) else: gmm = np.zeros((dimension,number_of_components*size),dtype=float) mu = np.zeros((number_of_components,dimension),dtype=float) sigma = np.zeros((number_of_components,dimension,dimension),dtype=float) matrix = np.zeros((number_of_components,number_of_components),dtype=float) # for i in range(number_of_components): # x, mu[i,:], sigma[i,:,:] = create_data(dimension,size,i) weights = np.array([0.6, 0.4]) matrix = np.array([[0.7, 0.3], [0.1, 0.9]]) model = hmm.GaussianHMM(2, "full", weights, matrix) model.means_ = mu model.covars_ = sigma gmm, Z = model.sample(number_of_components*size) # else : # gmm = np.zeros((dimension,number_of_components*size)) # mu = np.zeros((number_of_components,dimension)) # sigma = np.zeros((number_of_components,dimension,dimension)) # for i in range(number_of_components) : # gmm[:,i*size:(i+1)*size], mu[i,:], sigma[i,:,:] = create_data(dimension,size,i) means, variances, pi, a = emHMM_algorithm(gmm,dimension,number_of_components,number_of_components*size) # num_bins = 50 # n, bins, patches = plt.hist(gmm, num_bins, normed=1, facecolor='green', alpha=0.5) # # add a 'best fit' line # for i in range(number_of_components) : # y = mlab.normpdf(bins, means[i], variances[i]) # plt.plot(bins, y, 'r--') # plt.xlabel('Values') # plt.ylabel('Probability') # plt.title('Data Histogram vs predicted distribution') # # # Tweak spacing to prevent clipping of ylabel # plt.subplots_adjust(left=0.15) # plt.show() b = np.zeros((number_of_components,number_of_components*size)) #Evaluate posterior if dimension==1: for i in range(number_of_components) : # Calculate the probability of seeing the observation given each state pdf = pi[i]*mlab.normpdf(gmm, means[i], variances[i,0]) b[i,:] = pdf[:,0] else: centered_data = np.zeros((number_of_components,number_of_components*size,dimension)) den = np.zeros((number_of_components,number_of_components*size)) num = np.zeros((number_of_components,number_of_components*size)) for i in range(number_of_components) : # Calculate the probability of seeing the observation given each state for n in range(number_of_components*size): centered_data[i, n, :] = gmm[n, :]-means[i, :] den[i,n] = np.sqrt((2*math.pi)**(dimension)*np.linalg.det(variances[i,:,:])) num[i,n] = np.exp((-1/2)*np.dot(np.dot(centered_data[i,n,:][np.newaxis],np.linalg.inv(variances[i,:,:])),centered_data[i,n,:][:,np.newaxis])) b[i,n] = num[i,n] / den[i,n] # Predict path, x, y = viterbi(size*number_of_components,a,b,pi) plt.figure(); plt.plot(path[0,:],'ro') plt.plot(path[0,:],'r') plt.plot(Z,'g') plt.show() if dimension==1: print "initial means: ", mu[:,0], "\n", "initial variances: ", sigma[:,0,0], "\n", "initial weights: ", weights, "\n" print "means:", means, "\n" "sigmas:", variances, "\n", "weights:", pi, "\n" print "initial mixing mgmmatrix:", matrix, "\n" print "mixing matrix:", a, "\n" else: print "initial means: ", mu, "\n", "initial variances: ", sigma, "\n", "initial weights: ", weights, "\n" print "means:", means, "\n" "sigmas:", variances, "\n", "weights:", pi, "\n" print "initial mixing matrix:", matrix, "\n" print "mixing matrix:", a, "\n"