Beispiel #1
0
def about():
    # Initializing variables passed to HTML files
    spectrogram_3d = None
    # Creating variables for 3d spectrogram plot
    vis_text, vis_spectrogram_feature, vis_audio_path, sample_rate, samples = make_predictions.vis_audio_features(
        index=np.random.randint(0, 4176), partition='test')
    freqs, times, log_spectrogram = make_predictions.log_spectrogram_feature(
        samples, sample_rate)
    mean = np.mean(log_spectrogram, axis=0)
    std = np.std(log_spectrogram, axis=0)
    log_spectrogram = (log_spectrogram - mean) / std

    # 3d plot of the spectrogram of a random audio file from the test set, plotting amplitude over frequency over time.
    def plot_3d_spectrogram(log_spectrogram):
        data = [go.Surface(z=log_spectrogram.T, colorscale='Viridis')]
        layout = go.Layout(title='3D Spectrogram',
                           autosize=True,
                           width=700,
                           height=700,
                           margin=dict(l=50, r=50, b=50, t=50))
        fig = go.Figure(data=data, layout=layout)
        div_output = plot(fig, output_type='div', include_plotlyjs=False)
        return div_output

    # Converting 3d plot for JavaScript rendering
    spectrogram_3d = plot_3d_spectrogram(log_spectrogram)
    spectrogram_3d = Markup(spectrogram_3d)
    # render the HTML page
    return render_template('about.html', spectrogram_3d=spectrogram_3d)
def about():
    spectrogram_3d = None
    vis_text, vis_spectrogram_feature, vis_audio_path, sample_rate, samples = make_predictions.vis_audio_features(
        index=95, partition='test')
    freqs, times, log_spectrogram = log_spectrogram_feature(
        samples, sample_rate)
    mean = np.mean(log_spectrogram, axis=0)
    std = np.std(log_spectrogram, axis=0)
    log_spectrogram = (log_spectrogram - mean) / std

    def plot_3d_spectrogram(log_spectrogram):
        data = [go.Surface(z=log_spectrogram.T, colorscale='Viridis')]
        layout = go.Layout(
            title='3D Spectrogram',
            scene=dict(
                yaxis=dict(title='Frequency', range=freqs),
                xaxis=dict(title='Time (s)', range=times),
                zaxis=dict(title='Log Amplitude'),
            ),
        )
        fig = go.Figure(data=data, layout=layout)
        div_output = plot(fig, output_type='div', include_plotlyjs=False)
        return div_output

    spectrogram_3d = plot_3d_spectrogram(log_spectrogram)
    spectrogram_3d = Markup(spectrogram_3d)

    return render_template('about.html',
                           title='Hey, Jetson!',
                           spectrogram_3d=spectrogram_3d)
Beispiel #3
0
def index():
    form = InferenceForm()

    truth_transcription = None
    prediction_transcription = None
    raw_plot = None
    raw_shape = None
    spectrogram_plot = None
    spectrogram_shape = None
    error_rate = None
    similarity = None

    def plot_raw_audio(vis_raw_audio):
        # Plot the raw audio signal
        fig = plt.figure(figsize=(7,3))
        ax = fig.add_subplot(111)
        steps = len(vis_raw_audio)
        ax.plot(np.linspace(1, steps, steps), vis_raw_audio)
        plt.title('Raw Audio Signal')
        plt.xlabel('Time')
        plt.ylabel('Amplitude')
        figfile1 = BytesIO()
        plt.savefig(figfile1, format='png')
        figfile1.seek(0)
        raw_plot = base64.b64encode(figfile1.getvalue())
        return raw_plot.decode('utf8')

    def plot_spectrogram_feature(vis_spectrogram_feature):
        # Plot a normalized spectrogram
        fig = plt.figure(figsize=(7,3))
        ax = fig.add_subplot(111)
        im = ax.imshow(vis_spectrogram_feature, cmap=plt.cm.jet, aspect='auto')
        plt.title('Spectrogram')
        plt.ylabel('Time')
        plt.xlabel('Frequency')
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        plt.colorbar(im, cax=cax)
        figfile2 = BytesIO()
        plt.savefig(figfile2, format='png')
        figfile2.seek(0)
        spectrogram_plot = base64.b64encode(figfile2.getvalue())
        return spectrogram_plot.decode('utf8')

    def wer_calc(ref, pred):
        # Calcualte word error rate
        d = np.zeros((len(ref) + 1) * (len(pred) + 1), dtype=np.uint16)
        d = d.reshape((len(ref) + 1, len(pred) + 1))
        for i in range(len(ref) + 1):
            for j in range(len(pred) + 1):
                if i == 0:
                    d[0][j] = j
                elif j == 0:
                    d[i][0] = i
        for i in range(1, len(ref) + 1):
            for j in range(1, len(pred) + 1):
                if ref[i - 1] == pred[j - 1]:
                    d[i][j] = d[i - 1][j - 1]
                else:
                    substitution = d[i - 1][j - 1] + 1
                    insertion = d[i][j - 1] + 1
                    deletion = d[i - 1][j] + 1
                    d[i][j] = min(substitution, insertion, deletion)
        result = float(d[len(ref)][len(pred)]) / len(ref) * 100
        return result

    if form.validate_on_submit():
        partition = form.partition.data
        instance_number = form.instance_number.data

        truth_transcription = make_predictions.get_ground_truth(index=instance_number, partition=partition, input_to_softmax=make_predictions.final_keras, model_path='./results/final_keras.h5')
        prediction_transcription = make_predictions.get_prediction(index=instance_number, partition=partition, input_to_softmax=make_predictions.final_keras, model_path='./results/final_keras.h5')

        vis_text, vis_raw_audio, vis_spectrogram_feature, vis_audio_path = make_predictions.vis_audio_features(index=instance_number, partition=partition)

        raw_plot = plot_raw_audio(vis_raw_audio)
        raw_shape = 'The shape of the waveform of the chosen audio file: ' + str(vis_raw_audio.shape)

        spectrogram_plot = plot_spectrogram_feature(vis_spectrogram_feature)
        spectrogram_shape = 'The shape of the spectrogram of the chosen audio file: ' + str(vis_spectrogram_feature.shape)

        cv = CountVectorizer()
        ground_truth_vec = cv.fit_transform([truth_transcription])
        pred_transcription_vec = cv.transform([prediction_transcription])
        similarity = cosine_similarity(ground_truth_vec, pred_transcription_vec)

        error_rate = wer_calc(truth_transcription, prediction_transcription)
    
    return render_template('index.html', title='Hey, Jetson!', form=form, truth_transcription=truth_transcription, prediction_transcription=prediction_transcription, raw_plot=raw_plot, raw_shape=raw_shape,
    spectrogram_plot=spectrogram_plot, spectrogram_shape=spectrogram_shape, error_rate=error_rate, similarity=similarity)
Beispiel #4
0
def visualization():
    # Initializing form for user input
    visualization_form = VisualizationForm()
    # Initializing variables passed to HTML files
    truth_transcription = None
    prediction_transcription = None
    raw_plot = None
    spectrogram_plot = None
    spectrogram_shape = None
    log_spectrogram_plot = None
    spectrogram_3d = None
    cortana_transcription = None
    recognitionstatus = None
    offset = None
    duration = None
    nbest = None
    confidence = None
    lexical = None
    itn = None
    maskeditn = None
    display = None
    play_audio = None

    # Form for visualization engine
    if visualization_form.validate_on_submit():
        v_model_number = visualization_form.viz_model_number.data
        v_partition = visualization_form.viz_partition.data
        v_instance_number = visualization_form.viz_instance_number.data
        # Get ground truth and predicted transcriptions
        if v_model_number == 'model_10':
            truth_transcription = make_predictions.get_ground_truth(
                index=v_instance_number,
                partition=v_partition,
                input_to_softmax=make_predictions.model_10,
                model_path='./results/model_10.h5')
            prediction_transcription = make_predictions.get_prediction(
                index=v_instance_number,
                partition=v_partition,
                input_to_softmax=make_predictions.model_10,
                model_path='./results/model_10.h5')
        else:
            truth_transcription = make_predictions.get_ground_truth(
                index=v_instance_number,
                partition=v_partition,
                input_to_softmax=make_predictions.model_8,
                model_path='./results/model_8.h5')
            prediction_transcription = make_predictions.get_prediction(
                index=v_instance_number,
                partition=v_partition,
                input_to_softmax=make_predictions.model_8,
                model_path='./results/model_8.h5')
        # Get features for visualizations
        vis_text, vis_spectrogram_feature, vis_audio_path, sample_rate, samples = make_predictions.vis_audio_features(
            index=v_instance_number, partition=v_partition)
        # Plot the audio waveform
        raw_plot = make_predictions.plot_raw_audio(sample_rate, samples)
        # Plot the spectrogram of the audio file
        spectrogram_plot = make_predictions.plot_spectrogram_feature(
            vis_spectrogram_feature)
        spectrogram_shape = 'The shape of the spectrogram of the chosen audio file: ' + str(
            vis_spectrogram_feature.shape)
        # 2nd way to plot the spectrogram of the audio file
        freqs, times, log_spectrogram = make_predictions.log_spectrogram_feature(
            samples, sample_rate)
        mean = np.mean(log_spectrogram, axis=0)
        std = np.std(log_spectrogram, axis=0)
        log_spectrogram = (log_spectrogram - mean) / std
        log_spectrogram_plot = make_predictions.plot_log_spectrogram_feature(
            freqs, times, log_spectrogram)

        # 3d plot of the spectrogram of a random audio file from the test set, plotting amplitude over frequency over time.
        def plot_3d_spectrogram(log_spectrogram):
            data = [go.Surface(z=log_spectrogram.T, colorscale='Viridis')]
            layout = go.Layout(title='3D Spectrogram',
                               autosize=True,
                               width=700,
                               height=700,
                               margin=dict(l=50, r=50, b=50, t=50))
            fig = go.Figure(data=data, layout=layout)
            div_output = plot(fig, output_type='div', include_plotlyjs=False)
            return div_output

        # 3d spectrogram plot
        spectrogram_3d = plot_3d_spectrogram(log_spectrogram)
        spectrogram_3d = Markup(spectrogram_3d)
        # Connecting to Microsoft Speech API for Cortana's predicted transcription
        filepath = make_predictions.azure_inference(index=v_instance_number,
                                                    partition=v_partition)
        audiofile = open(filepath, 'rb')
        response = requests.post(
            'https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1',
            headers=headers,
            params=params,
            data=make_predictions.read_in_chunks(audiofile))
        cortana_transcription = response.content
        val = json.loads(response.text)
        recognitionstatus = val["RecognitionStatus"]
        offset = val["Offset"]
        duration = val["Duration"]
        nbest = val["NBest"]
        confidence = val["NBest"][0]["Confidence"]
        lexical = val["NBest"][0]["Lexical"]
        itn = val["NBest"][0]["ITN"]
        maskeditn = val["NBest"][0]["MaskedITN"]
        display = val["NBest"][0]["Display"]
        # Serve the audio file for the audio player
        play_audio = filepath.replace("/home/brice/Hey-Jetson/app/", "")

    # Render the html page.
    return render_template('visualization.html',
                           visualization_form=visualization_form,
                           truth_transcription=truth_transcription,
                           prediction_transcription=prediction_transcription,
                           raw_plot=raw_plot,
                           spectrogram_plot=spectrogram_plot,
                           log_spectrogram_plot=log_spectrogram_plot,
                           spectrogram_shape=spectrogram_shape,
                           spectrogram_3d=spectrogram_3d,
                           cortana_transcription=cortana_transcription,
                           confidence=confidence,
                           lexical=lexical,
                           itn=itn,
                           maskeditn=maskeditn,
                           display=display,
                           play_audio=play_audio)
def index():
    # Initializing form for user input
    form = InferenceForm()
    # Initializing variables passed to HTML files
    truth_transcription = None
    prediction_transcription = None
    raw_plot = None
    spectrogram_plot = None
    spectrogram_shape = None
    log_spectrogram_plot = None
    error_rate = None
    cv_similarity = None
    tfidf_similarity = None

    # Form for inference engine
    if form.validate_on_submit():
        partition = form.partition.data
        instance_number = form.instance_number.data
        # Get ground truth and predicted transcriptions
        truth_transcription = make_predictions.get_ground_truth(
            index=instance_number,
            partition=partition,
            input_to_softmax=make_predictions.model_8,
            model_path='./results/model_8.h5')
        prediction_transcription = make_predictions.get_prediction(
            index=instance_number,
            partition=partition,
            input_to_softmax=make_predictions.model_8,
            model_path='./results/model_8.h5')
        # Get features for visualizations
        vis_text, vis_spectrogram_feature, vis_audio_path, sample_rate, samples = make_predictions.vis_audio_features(
            index=instance_number, partition=partition)
        # Plot the audio waveform
        raw_plot = plot_raw_audio(sample_rate, samples)
        # Plot the spectrogram of the audio file
        spectrogram_plot = plot_spectrogram_feature(vis_spectrogram_feature)
        spectrogram_shape = 'The shape of the spectrogram of the chosen audio file: ' + str(
            vis_spectrogram_feature.shape)
        # 2nd and better plot of the spectrogram of the audio file
        freqs, times, log_spectrogram = log_spectrogram_feature(
            samples, sample_rate)
        mean = np.mean(log_spectrogram, axis=0)
        std = np.std(log_spectrogram, axis=0)
        log_spectrogram = (log_spectrogram - mean) / std
        log_spectrogram_plot = plot_log_spectrogram_feature(
            freqs, times, log_spectrogram)
        # Calculate cosine similarity of individual transcriptions using Count Vectorizer
        cv = CountVectorizer()
        cv_ground_truth_vec = cv.fit_transform([truth_transcription])
        cv_pred_transcription_vec = cv.transform([prediction_transcription])
        cv_similarity = cosine_similarity(cv_ground_truth_vec,
                                          cv_pred_transcription_vec)
        # Calculate cosine similarity of individual transcriptions using Tfidf Vectorizer
        tfidf = TfidfVectorizer()
        tfidf_ground_truth_vec = tfidf.fit_transform([truth_transcription])
        tfidf_pred_transcription_vec = tfidf.transform(
            [prediction_transcription])
        tfidf_similarity = cosine_similarity(tfidf_ground_truth_vec,
                                             tfidf_pred_transcription_vec)
        # calculate word error rate of individual transcription
        error_rate = wer_calc(truth_transcription, prediction_transcription)
    # Render the html page with
    return render_template('index.html',
                           title='Hey, Jetson!',
                           form=form,
                           truth_transcription=truth_transcription,
                           prediction_transcription=prediction_transcription,
                           raw_plot=raw_plot,
                           spectrogram_plot=spectrogram_plot,
                           log_spectrogram_plot=log_spectrogram_plot,
                           spectrogram_shape=spectrogram_shape,
                           error_rate=error_rate,
                           cv_similarity=cv_similarity,
                           tfidf_similarity=tfidf_similarity)