Example #1
0
def predict_wav(input_file,
                window_size=WINDOW_SIZE,
                fft_size=FFT_SIZE,
                hop_size=HOP_SIZE):

    rescaled_output = []
    time_limit = 132300
    sr, input_vector = uf.wavread(input_file)
    input_vector = input_vector[:time_limit]
    mags = uf.gen_mags_spectrum(input_vector)
    mags = np.asarray(mags)
    mags = mags / np.max(mags)  #norm
    reshaped_spectrum = mags.reshape(1, mags.shape[0], mags.shape[1], 1)
    prediction = conv_model.predict(reshaped_spectrum)

    if prediction[0][0] >= prediction[0][1]:
        verdict = 'CHAOS'
    else:
        verdict = 'ORDERED'

    #outstring = 'chaos_prob: ' + str(rescaled_output[0]) + ' | order_prob: ' + str(rescaled_output[1])
    outstring = 'chaos_prob: ' + str(
        prediction[0][0]) + ' | order_prob: ' + str(
            prediction[0][1]) + " | verdict: " + verdict

    print prediction
Example #2
0
def create_synth_datapoint(addr, tags, data, client_address):
    '''
    -take the recorded wav file through ssh (generated by gen_random_synth_settings > Max)
    -segment the file to fit the shape required by the classification NN model
    -predict chaos class of every segment and compute the mean
    '''
    global wait_status
    global temp_datapoint_settings
    global temp_datapoint_verdict
    time.sleep(0.5)
    bash_string = "scp " + TEMP_SAMPLE_CLIENT + " " + TEMP_SAMPLE_SERVER
    os.system(bash_string)  #get sound through ssh
    sr, temp_sample = uf.wavread(TEMP_SAMPLE_SERVER)
    loudness = np.sqrt(np.mean(temp_sample**2))
    if loudness >= 0.05:
        verdicts = []
        segment_matrix = uf.cutter(temp_sample, DUR)  #segment temp_sample

        with graph.as_default():  #use the graph on the other thread, otherwise it doesn't run
            for segment in segment_matrix:  #mean of verdicts of every sound segment
                temp_verdict = conv.predict_vector_regression(segment)
                temp_verdict = np.argmax(temp_verdict)
                verdicts.append(temp_verdict)
                verdict = np.mean(verdicts)
                verdict = int(np.round(verdict))
        print "Verdict: " + str(verdict)
        temp_datapoint_settings = data
        temp_datapoint_verdict = verdict
        wait_status = "go!"  #tells create_synth_dataset to proceed
    else:
        print "Sound too low: discarded"
        print ""
        gen_random_synth_settings()
Example #3
0
def main(num_extensions, input_dir, output_dir):
    #creates alternative versions of sounds of an entire dataset trying to keep the chaos/order feature
    contents = os.listdir(input_dir)

    num_sounds = len(list(filter(lambda x: x[-3:] == "wav", contents)))

    count = 0  #processed input files count
    for file in os.listdir(input_dir):
        if file[-3:] == "wav":  #takes just .wav files
            in_file = os.path.join(input_dir, file)
            output_filename = file.split('.')[0] + '.original.wav'
            out_file = os.path.join(output_dir, output_filename)
            sr, original_sound = uf.wavread(in_file)
            #normalize original sound
            original_sound = np.divide(original_sound, np.max(original_sound))
            original_sound = np.multiply(original_sound, 0.9)
            #copyfile(in_file, out_file)
            uf.wavwrite(original_sound, global_sr, out_file)
            status = [
                num_sounds, count
            ]  #pass to extend_datapoint() input folder numsounds and current precessei infiles count
            extend_datapoint(file_name=in_file,
                             output_dir=output_dir,
                             num_extensions=num_extensions,
                             status=status)  #create extension files

            count = count + 1  #progress input files count

    print('Dataset successfully augmented from ' + str(num_sounds) + ' to ' +
          str(num_sounds * num_extensions + num_sounds) + ' sounds')
Example #4
0
def predict_wav_mfcc(input_file,
                     window_size=WINDOW_SIZE,
                     fft_size=FFT_SIZE,
                     hop_size=HOP_SIZE):

    sr, input_vector = uf.wavread(input_file)
    input_vector = input_vector[:DUR]
    MFCCS = mfcc(input_vector, sr=sr, n_mfcc=NUM_MFCCS)
    print MFCCS.shape
    reshaped_MFCCS = MFCCS.reshape(1, MFCCS.shape[1], MFCCS.shape[0], 1)
    prediction = conv_model.predict(reshaped_MFCCS)

    print prediction
Example #5
0
def process_sound(soundfile):
    sr, samples = uf.wavread(soundfile)
    dur = len(samples)
    samples = uf.strip_silence(samples,
                               threshold=31)  #cut initial and final silence
    dur_stripped = len(samples)

    hfc, centroid, energy, F0, salience = compute_description(samples, fs=sr)

    yell_factor = compute_yell_factor(energy, F0, centroid)
    F0 = cut_silence(F0, threshold=0.05)
    salience = squarify(salience)

    mean_hfc = np.mean(hfc)
    std_hfc = np.std(hfc)
    mean_centroid = np.mean(centroid)
    std_centroid = np.std(centroid)
    mean_energy = np.mean(energy)
    std_energy = np.std(energy)
    mean_F0 = np.mean(F0)
    std_F0 = np.std(F0)
    mean_yell_factor = np.mean(yell_factor)
    std_yell_factor = np.std(yell_factor)
    perc_salience = np.mean(salience)

    description = {
        'mean_hfc': mean_hfc,
        'std_hfc': std_hfc,
        'mean_centroid': mean_centroid,
        'std_centroid': std_centroid,
        'mean_energy': mean_energy,
        'std_energy': std_energy,
        'mean_F0': mean_F0,
        'std_F0': std_F0,
        'mean_yell_factor': mean_yell_factor,
        'std_yell_factor': std_yell_factor,
        'perc_salience': perc_salience,
        'dur': dur,
        'dur_stripped': dur_stripped
    }

    return description
Example #6
0
def extend_datapoint(file_name, output_dir, num_extensions=1, status=[1, 0]):
    #creates alternative versions of sounds of a single sound trying to keep the chaos/order feature

    internal_sr = 44100
    sound_name = file_name.split('/')[-1]
    sound_string = sound_name[:-4]
    label_string = sound_name[-4:]
    label = label_string[1]
    sr, vector_input = uf.wavread(file_name)

    #resample to 44100 for better filters
    vector_input = librosa.core.resample(vector_input, SR, internal_sr)

    DUR = len(vector_input)
    funcs = ['random_stretch', 'bg_noise', 'random_eq']

    for new_sound in range(num_extensions):

        np.random.shuffle(funcs)  #scramble order of functions
        rev_prob = np.random.randint(1, 3)  #1/3 files will have reverb
        rand_samp_prob = np.random.randint(
            1, 3)  #1/3 files will have random samples
        num_nodes = np.random.randint(1, 3)  #nodes probability
        random_appendix = np.random.randint(
            10000
        )  #random number to append to filename (so.. validation split of dataset will be composed of random sounds)

        node1 = 'node1_out = ' + funcs[0] + '(vector_input, dur=DUR)'
        node2 = 'node2_out = ' + funcs[1] + '(node1_out, dur=DUR)'
        node3 = 'node3_out = ' + funcs[2] + '(node2_out, dur=DUR)'

        if num_nodes == 1:
            exec(node1)
            vector_output = locals()['node1_out']
        if num_nodes == 2:
            exec(node1)
            exec(node2)
            vector_output = locals()['node2_out']
        if num_nodes == 3:
            exec(node1)
            exec(node2)
            exec(node3)
            vector_output = locals()['node3_out']

        if rev_prob == 1:
            vector_output = random_rev(vector_output, dur=DUR)
        '''
        if rand_samp_prob == 1:
            vector_output = random_samples(vector_output, dur=DUR)
        '''

        if NORMALIZATION:
            #output_normalization
            vector_output = np.divide(vector_output, np.max(vector_output))
            vector_output = np.multiply(vector_output, 0.8)

        #resample to original sr
        vector_output = librosa.core.resample(vector_output, internal_sr, SR)

        #formatting strings to print
        success_string = sound_string + ' augmented: ' + str(
            new_sound + 1)  #describe last prcessed sound
        infolder_num_files = status[0]  #number of input files to extend
        current_batch = status[1]  #count of processed input files
        total_num_files = infolder_num_files * num_extensions  #total number of datapoint extension files to create
        current_processed_file = (num_extensions * current_batch) + (
            new_sound + 1)  #number of currently processed files
        perc_progress = (current_processed_file * 100
                         ) / total_num_files  #compute percentage of progress
        status_string = 'status: ' + str(
            perc_progress) + '% | ' + 'processed files: ' + str(
                current_processed_file) + '/' + str(
                    total_num_files)  #format progress string

        sound_name = sound_name.split('.')[0]
        output_file_name = output_dir + '/' + sound_name + '.' + '.augmented_' + str(
            new_sound + 1) + '.mp4.wav'  #build output file name
        uf.wavwrite(vector_output, global_sr,
                    output_file_name)  #create output file
        #print progress and status strings
        print(success_string)
        print(status_string)