def predict_wav(input_file, window_size=WINDOW_SIZE, fft_size=FFT_SIZE, hop_size=HOP_SIZE): rescaled_output = [] time_limit = 132300 sr, input_vector = uf.wavread(input_file) input_vector = input_vector[:time_limit] mags = uf.gen_mags_spectrum(input_vector) mags = np.asarray(mags) mags = mags / np.max(mags) #norm reshaped_spectrum = mags.reshape(1, mags.shape[0], mags.shape[1], 1) prediction = conv_model.predict(reshaped_spectrum) if prediction[0][0] >= prediction[0][1]: verdict = 'CHAOS' else: verdict = 'ORDERED' #outstring = 'chaos_prob: ' + str(rescaled_output[0]) + ' | order_prob: ' + str(rescaled_output[1]) outstring = 'chaos_prob: ' + str( prediction[0][0]) + ' | order_prob: ' + str( prediction[0][1]) + " | verdict: " + verdict print prediction
def create_synth_datapoint(addr, tags, data, client_address): ''' -take the recorded wav file through ssh (generated by gen_random_synth_settings > Max) -segment the file to fit the shape required by the classification NN model -predict chaos class of every segment and compute the mean ''' global wait_status global temp_datapoint_settings global temp_datapoint_verdict time.sleep(0.5) bash_string = "scp " + TEMP_SAMPLE_CLIENT + " " + TEMP_SAMPLE_SERVER os.system(bash_string) #get sound through ssh sr, temp_sample = uf.wavread(TEMP_SAMPLE_SERVER) loudness = np.sqrt(np.mean(temp_sample**2)) if loudness >= 0.05: verdicts = [] segment_matrix = uf.cutter(temp_sample, DUR) #segment temp_sample with graph.as_default(): #use the graph on the other thread, otherwise it doesn't run for segment in segment_matrix: #mean of verdicts of every sound segment temp_verdict = conv.predict_vector_regression(segment) temp_verdict = np.argmax(temp_verdict) verdicts.append(temp_verdict) verdict = np.mean(verdicts) verdict = int(np.round(verdict)) print "Verdict: " + str(verdict) temp_datapoint_settings = data temp_datapoint_verdict = verdict wait_status = "go!" #tells create_synth_dataset to proceed else: print "Sound too low: discarded" print "" gen_random_synth_settings()
def main(num_extensions, input_dir, output_dir): #creates alternative versions of sounds of an entire dataset trying to keep the chaos/order feature contents = os.listdir(input_dir) num_sounds = len(list(filter(lambda x: x[-3:] == "wav", contents))) count = 0 #processed input files count for file in os.listdir(input_dir): if file[-3:] == "wav": #takes just .wav files in_file = os.path.join(input_dir, file) output_filename = file.split('.')[0] + '.original.wav' out_file = os.path.join(output_dir, output_filename) sr, original_sound = uf.wavread(in_file) #normalize original sound original_sound = np.divide(original_sound, np.max(original_sound)) original_sound = np.multiply(original_sound, 0.9) #copyfile(in_file, out_file) uf.wavwrite(original_sound, global_sr, out_file) status = [ num_sounds, count ] #pass to extend_datapoint() input folder numsounds and current precessei infiles count extend_datapoint(file_name=in_file, output_dir=output_dir, num_extensions=num_extensions, status=status) #create extension files count = count + 1 #progress input files count print('Dataset successfully augmented from ' + str(num_sounds) + ' to ' + str(num_sounds * num_extensions + num_sounds) + ' sounds')
def predict_wav_mfcc(input_file, window_size=WINDOW_SIZE, fft_size=FFT_SIZE, hop_size=HOP_SIZE): sr, input_vector = uf.wavread(input_file) input_vector = input_vector[:DUR] MFCCS = mfcc(input_vector, sr=sr, n_mfcc=NUM_MFCCS) print MFCCS.shape reshaped_MFCCS = MFCCS.reshape(1, MFCCS.shape[1], MFCCS.shape[0], 1) prediction = conv_model.predict(reshaped_MFCCS) print prediction
def process_sound(soundfile): sr, samples = uf.wavread(soundfile) dur = len(samples) samples = uf.strip_silence(samples, threshold=31) #cut initial and final silence dur_stripped = len(samples) hfc, centroid, energy, F0, salience = compute_description(samples, fs=sr) yell_factor = compute_yell_factor(energy, F0, centroid) F0 = cut_silence(F0, threshold=0.05) salience = squarify(salience) mean_hfc = np.mean(hfc) std_hfc = np.std(hfc) mean_centroid = np.mean(centroid) std_centroid = np.std(centroid) mean_energy = np.mean(energy) std_energy = np.std(energy) mean_F0 = np.mean(F0) std_F0 = np.std(F0) mean_yell_factor = np.mean(yell_factor) std_yell_factor = np.std(yell_factor) perc_salience = np.mean(salience) description = { 'mean_hfc': mean_hfc, 'std_hfc': std_hfc, 'mean_centroid': mean_centroid, 'std_centroid': std_centroid, 'mean_energy': mean_energy, 'std_energy': std_energy, 'mean_F0': mean_F0, 'std_F0': std_F0, 'mean_yell_factor': mean_yell_factor, 'std_yell_factor': std_yell_factor, 'perc_salience': perc_salience, 'dur': dur, 'dur_stripped': dur_stripped } return description
def extend_datapoint(file_name, output_dir, num_extensions=1, status=[1, 0]): #creates alternative versions of sounds of a single sound trying to keep the chaos/order feature internal_sr = 44100 sound_name = file_name.split('/')[-1] sound_string = sound_name[:-4] label_string = sound_name[-4:] label = label_string[1] sr, vector_input = uf.wavread(file_name) #resample to 44100 for better filters vector_input = librosa.core.resample(vector_input, SR, internal_sr) DUR = len(vector_input) funcs = ['random_stretch', 'bg_noise', 'random_eq'] for new_sound in range(num_extensions): np.random.shuffle(funcs) #scramble order of functions rev_prob = np.random.randint(1, 3) #1/3 files will have reverb rand_samp_prob = np.random.randint( 1, 3) #1/3 files will have random samples num_nodes = np.random.randint(1, 3) #nodes probability random_appendix = np.random.randint( 10000 ) #random number to append to filename (so.. validation split of dataset will be composed of random sounds) node1 = 'node1_out = ' + funcs[0] + '(vector_input, dur=DUR)' node2 = 'node2_out = ' + funcs[1] + '(node1_out, dur=DUR)' node3 = 'node3_out = ' + funcs[2] + '(node2_out, dur=DUR)' if num_nodes == 1: exec(node1) vector_output = locals()['node1_out'] if num_nodes == 2: exec(node1) exec(node2) vector_output = locals()['node2_out'] if num_nodes == 3: exec(node1) exec(node2) exec(node3) vector_output = locals()['node3_out'] if rev_prob == 1: vector_output = random_rev(vector_output, dur=DUR) ''' if rand_samp_prob == 1: vector_output = random_samples(vector_output, dur=DUR) ''' if NORMALIZATION: #output_normalization vector_output = np.divide(vector_output, np.max(vector_output)) vector_output = np.multiply(vector_output, 0.8) #resample to original sr vector_output = librosa.core.resample(vector_output, internal_sr, SR) #formatting strings to print success_string = sound_string + ' augmented: ' + str( new_sound + 1) #describe last prcessed sound infolder_num_files = status[0] #number of input files to extend current_batch = status[1] #count of processed input files total_num_files = infolder_num_files * num_extensions #total number of datapoint extension files to create current_processed_file = (num_extensions * current_batch) + ( new_sound + 1) #number of currently processed files perc_progress = (current_processed_file * 100 ) / total_num_files #compute percentage of progress status_string = 'status: ' + str( perc_progress) + '% | ' + 'processed files: ' + str( current_processed_file) + '/' + str( total_num_files) #format progress string sound_name = sound_name.split('.')[0] output_file_name = output_dir + '/' + sound_name + '.' + '.augmented_' + str( new_sound + 1) + '.mp4.wav' #build output file name uf.wavwrite(vector_output, global_sr, output_file_name) #create output file #print progress and status strings print(success_string) print(status_string)