Esempio n. 1
0
def sentiment():
    # Initializing form for user input
    sentiment_form = SentimentForm()
    # Initializing variables passed to HTML files
    truth_transcription = None
    prediction_transcription = None
    cortana_transcription = None
    recognitionstatus = None
    offset = None
    duration = None
    nbest = None
    confidence = None
    lexical = None
    itn = None
    maskeditn = None
    display = None
    sentiments = None
    documents = None
    errors = None
    truth_score = None
    truth_id = None
    prediction_score = None
    prediction_id = None
    cortana_score = None
    cortana_id = None
    play_audio = None

    # Form for sentiment engine
    if sentiment_form.validate_on_submit():
        s_model_number = sentiment_form.sent_model_number.data
        s_partition = sentiment_form.sent_partition.data
        s_instance_number = sentiment_form.sent_instance_number.data
        # Get ground truth and predicted transcriptions
        if s_model_number == 'model_10':
            truth_transcription = make_predictions.get_ground_truth(
                index=s_instance_number,
                partition=s_partition,
                input_to_softmax=make_predictions.model_10,
                model_path='./results/model_10.h5')
            prediction_transcription = make_predictions.get_prediction(
                index=s_instance_number,
                partition=s_partition,
                input_to_softmax=make_predictions.model_10,
                model_path='./results/model_10.h5')
        else:
            truth_transcription = make_predictions.get_ground_truth(
                index=s_instance_number,
                partition=s_partition,
                input_to_softmax=make_predictions.model_8,
                model_path='./results/model_8.h5')
            prediction_transcription = make_predictions.get_prediction(
                index=s_instance_number,
                partition=s_partition,
                input_to_softmax=make_predictions.model_8,
                model_path='./results/model_8.h5')
        # Connecting to Microsoft Speech API for Cortana's predicted transcription
        filepath = make_predictions.azure_inference(index=s_instance_number,
                                                    partition=s_partition)
        audiofile = open(filepath, 'rb')
        response = requests.post(
            'https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1',
            headers=headers,
            params=params,
            data=make_predictions.read_in_chunks(audiofile))
        cortana_transcription = response.content
        val = json.loads(response.text)
        recognitionstatus = val["RecognitionStatus"]
        offset = val["Offset"]
        duration = val["Duration"]
        nbest = val["NBest"]
        confidence = val["NBest"][0]["Confidence"]
        lexical = val["NBest"][0]["Lexical"]
        itn = val["NBest"][0]["ITN"]
        maskeditn = val["NBest"][0]["MaskedITN"]
        display = val["NBest"][0]["Display"]
        # Connecting to Microsoft Text Analytics API for sentiment analysis
        text_documents = {
            'documents': [{
                'id': 'Ground Truth Transcription',
                'language': 'en',
                'text': truth_transcription
            }, {
                'id': 'Predicted Transcription',
                'language': 'en',
                'text': prediction_transcription
            }, {
                'id': 'Cortana Transcription',
                'language': 'en',
                'text': lexical
            }]
        }
        sentiment_response = requests.post(sentiment_api_url,
                                           headers=text_headers,
                                           json=text_documents)
        sentiments = sentiment_response.json()
        documents = sentiments["documents"]
        errors = sentiments["errors"]
        truth_score = sentiments["documents"][0]["score"]
        truth_id = sentiments["documents"][0]["id"]
        prediction_score = sentiments["documents"][1]["score"]
        prediction_id = sentiments["documents"][1]["id"]
        cortana_score = sentiments["documents"][2]["score"]
        cortana_id = sentiments["documents"][2]["id"]
        # Serve the audio file for the audio player
        play_audio = filepath.replace("/home/brice/Hey-Jetson/app/", "")

    # Render the html page
    return render_template('sentiment.html',
                           sentiment_form=sentiment_form,
                           truth_transcription=truth_transcription,
                           prediction_transcription=prediction_transcription,
                           cortana_transcription=cortana_transcription,
                           confidence=confidence,
                           lexical=lexical,
                           itn=itn,
                           maskeditn=maskeditn,
                           display=display,
                           truth_score=truth_score,
                           prediction_score=prediction_score,
                           cortana_score=cortana_score,
                           play_audio=play_audio)
Esempio n. 2
0
def visualization():
    # Initializing form for user input
    visualization_form = VisualizationForm()
    # Initializing variables passed to HTML files
    truth_transcription = None
    prediction_transcription = None
    raw_plot = None
    spectrogram_plot = None
    spectrogram_shape = None
    log_spectrogram_plot = None
    spectrogram_3d = None
    cortana_transcription = None
    recognitionstatus = None
    offset = None
    duration = None
    nbest = None
    confidence = None
    lexical = None
    itn = None
    maskeditn = None
    display = None
    play_audio = None

    # Form for visualization engine
    if visualization_form.validate_on_submit():
        v_model_number = visualization_form.viz_model_number.data
        v_partition = visualization_form.viz_partition.data
        v_instance_number = visualization_form.viz_instance_number.data
        # Get ground truth and predicted transcriptions
        if v_model_number == 'model_10':
            truth_transcription = make_predictions.get_ground_truth(
                index=v_instance_number,
                partition=v_partition,
                input_to_softmax=make_predictions.model_10,
                model_path='./results/model_10.h5')
            prediction_transcription = make_predictions.get_prediction(
                index=v_instance_number,
                partition=v_partition,
                input_to_softmax=make_predictions.model_10,
                model_path='./results/model_10.h5')
        else:
            truth_transcription = make_predictions.get_ground_truth(
                index=v_instance_number,
                partition=v_partition,
                input_to_softmax=make_predictions.model_8,
                model_path='./results/model_8.h5')
            prediction_transcription = make_predictions.get_prediction(
                index=v_instance_number,
                partition=v_partition,
                input_to_softmax=make_predictions.model_8,
                model_path='./results/model_8.h5')
        # Get features for visualizations
        vis_text, vis_spectrogram_feature, vis_audio_path, sample_rate, samples = make_predictions.vis_audio_features(
            index=v_instance_number, partition=v_partition)
        # Plot the audio waveform
        raw_plot = make_predictions.plot_raw_audio(sample_rate, samples)
        # Plot the spectrogram of the audio file
        spectrogram_plot = make_predictions.plot_spectrogram_feature(
            vis_spectrogram_feature)
        spectrogram_shape = 'The shape of the spectrogram of the chosen audio file: ' + str(
            vis_spectrogram_feature.shape)
        # 2nd way to plot the spectrogram of the audio file
        freqs, times, log_spectrogram = make_predictions.log_spectrogram_feature(
            samples, sample_rate)
        mean = np.mean(log_spectrogram, axis=0)
        std = np.std(log_spectrogram, axis=0)
        log_spectrogram = (log_spectrogram - mean) / std
        log_spectrogram_plot = make_predictions.plot_log_spectrogram_feature(
            freqs, times, log_spectrogram)

        # 3d plot of the spectrogram of a random audio file from the test set, plotting amplitude over frequency over time.
        def plot_3d_spectrogram(log_spectrogram):
            data = [go.Surface(z=log_spectrogram.T, colorscale='Viridis')]
            layout = go.Layout(title='3D Spectrogram',
                               autosize=True,
                               width=700,
                               height=700,
                               margin=dict(l=50, r=50, b=50, t=50))
            fig = go.Figure(data=data, layout=layout)
            div_output = plot(fig, output_type='div', include_plotlyjs=False)
            return div_output

        # 3d spectrogram plot
        spectrogram_3d = plot_3d_spectrogram(log_spectrogram)
        spectrogram_3d = Markup(spectrogram_3d)
        # Connecting to Microsoft Speech API for Cortana's predicted transcription
        filepath = make_predictions.azure_inference(index=v_instance_number,
                                                    partition=v_partition)
        audiofile = open(filepath, 'rb')
        response = requests.post(
            'https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1',
            headers=headers,
            params=params,
            data=make_predictions.read_in_chunks(audiofile))
        cortana_transcription = response.content
        val = json.loads(response.text)
        recognitionstatus = val["RecognitionStatus"]
        offset = val["Offset"]
        duration = val["Duration"]
        nbest = val["NBest"]
        confidence = val["NBest"][0]["Confidence"]
        lexical = val["NBest"][0]["Lexical"]
        itn = val["NBest"][0]["ITN"]
        maskeditn = val["NBest"][0]["MaskedITN"]
        display = val["NBest"][0]["Display"]
        # Serve the audio file for the audio player
        play_audio = filepath.replace("/home/brice/Hey-Jetson/app/", "")

    # Render the html page.
    return render_template('visualization.html',
                           visualization_form=visualization_form,
                           truth_transcription=truth_transcription,
                           prediction_transcription=prediction_transcription,
                           raw_plot=raw_plot,
                           spectrogram_plot=spectrogram_plot,
                           log_spectrogram_plot=log_spectrogram_plot,
                           spectrogram_shape=spectrogram_shape,
                           spectrogram_3d=spectrogram_3d,
                           cortana_transcription=cortana_transcription,
                           confidence=confidence,
                           lexical=lexical,
                           itn=itn,
                           maskeditn=maskeditn,
                           display=display,
                           play_audio=play_audio)
Esempio n. 3
0
def performance():
    # Initializing form for user input
    performance_form = PerformanceForm()
    # Initializing variables passed to HTML files
    truth_transcription = None
    prediction_transcription = None
    word_error_rate = None
    cv_similarity = None
    tfidf_similarity = None
    jetson_time_to_predict = None
    cortana_time_to_predict = None
    cortana_transcription = None
    recognitionstatus = None
    offset = None
    duration = None
    nbest = None
    confidence = None
    lexical = None
    itn = None
    maskeditn = None
    display = None
    cortana_cv = None
    cortana_tfidf = None
    cortana_wer = None
    play_audio = None

    # Form for performance engine
    if performance_form.validate_on_submit():
        p_model_number = performance_form.perf_model_number.data
        p_partition = performance_form.perf_partition.data
        p_instance_number = performance_form.perf_instance_number.data
        # Get ground truth and predicted transcriptions
        if p_model_number == 'model_10':
            truth_transcription = make_predictions.get_ground_truth(
                index=p_instance_number,
                partition=p_partition,
                input_to_softmax=make_predictions.model_10,
                model_path='./results/model_10.h5')
            start = time.time()
            prediction_transcription = make_predictions.get_prediction(
                index=p_instance_number,
                partition=p_partition,
                input_to_softmax=make_predictions.model_10,
                model_path='./results/model_10.h5')
            end = time.time()
            jetson_time_to_predict = end - start
        else:
            truth_transcription = make_predictions.get_ground_truth(
                index=p_instance_number,
                partition=p_partition,
                input_to_softmax=make_predictions.model_8,
                model_path='./results/model_8.h5')
            start = time.time()
            prediction_transcription = make_predictions.get_prediction(
                index=p_instance_number,
                partition=p_partition,
                input_to_softmax=make_predictions.model_8,
                model_path='./results/model_8.h5')
            end = time.time()
            jetson_time_to_predict = end - start
        # Calculate cosine similarity of individual transcriptions using Count Vectorizer
        cv = CountVectorizer()
        cv_ground_truth_vec = cv.fit_transform([truth_transcription])
        cv_pred_transcription_vec = cv.transform([prediction_transcription])
        cv_similarity = cosine_similarity(cv_ground_truth_vec,
                                          cv_pred_transcription_vec)
        # Calculate cosine similarity of individual transcriptions using Tfidf Vectorizer
        tfidf = TfidfVectorizer()
        tfidf_ground_truth_vec = tfidf.fit_transform([truth_transcription])
        tfidf_pred_transcription_vec = tfidf.transform(
            [prediction_transcription])
        tfidf_similarity = cosine_similarity(tfidf_ground_truth_vec,
                                             tfidf_pred_transcription_vec)
        # Calculate word error rate of individual transcription
        word_error_rate = make_predictions.wer_calc(truth_transcription,
                                                    prediction_transcription)
        # Connecting to Microsoft Speech API for Cortana's predicted transcription
        c_start = time.time()
        filepath = make_predictions.azure_inference(index=p_instance_number,
                                                    partition=p_partition)
        audiofile = open(filepath, 'rb')
        response = requests.post(
            'https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1',
            headers=headers,
            params=params,
            data=make_predictions.read_in_chunks(audiofile))
        cortana_transcription = response.content
        c_end = time.time()
        cortana_time_to_predict = c_end - c_start
        val = json.loads(response.text)
        recognitionstatus = val["RecognitionStatus"]
        offset = val["Offset"]
        duration = val["Duration"]
        nbest = val["NBest"]
        confidence = val["NBest"][0]["Confidence"]
        lexical = val["NBest"][0]["Lexical"]
        itn = val["NBest"][0]["ITN"]
        maskeditn = val["NBest"][0]["MaskedITN"]
        display = val["NBest"][0]["Display"]
        # Calculate performance measures on AZURE transcript
        cv_cortana_vec = cv.transform([lexical])
        cortana_cv = cosine_similarity(cv_ground_truth_vec, cv_cortana_vec)
        tfidf_cortana_vec = tfidf.transform([lexical])
        cortana_tfidf = cosine_similarity(tfidf_ground_truth_vec,
                                          tfidf_cortana_vec)
        cortana_wer = make_predictions.wer_calc(truth_transcription, lexical)
        # Serve the audio file for the audio player
        play_audio = filepath.replace("/home/brice/Hey-Jetson/app/", "")

    # Render the html page
    return render_template('performance.html',
                           performance_form=performance_form,
                           truth_transcription=truth_transcription,
                           prediction_transcription=prediction_transcription,
                           word_error_rate=word_error_rate,
                           cv_similarity=cv_similarity,
                           tfidf_similarity=tfidf_similarity,
                           jetson_time_to_predict=jetson_time_to_predict,
                           cortana_transcription=cortana_transcription,
                           cortana_time_to_predict=cortana_time_to_predict,
                           confidence=confidence,
                           lexical=lexical,
                           itn=itn,
                           maskeditn=maskeditn,
                           display=display,
                           cortana_cv=cortana_cv,
                           cortana_tfidf=cortana_tfidf,
                           cortana_wer=cortana_wer,
                           play_audio=play_audio)
Esempio n. 4
0
def index():
    # Initializing form for user input
    audio_form = AudioForm(CombinedMultiDict((request.files, request.form)))
    # Initializing variables passed to HTML files
    filename = None
    prediction_transcription = None
    raw_plot = None
    spectrogram_plot = None
    spectrogram_shape = None
    log_spectrogram_plot = None
    spectrogram_3d = None
    word_error_rate = None
    cv_similarity = None
    jetson_time_to_predict = None
    cortana_time_to_predict = None
    cortana_transcription = None
    recognitionstatus = None
    offset = None
    duration = None
    nbest = None
    confidence = None
    lexical = None
    itn = None
    maskeditn = None
    display = None
    sentiments = None
    documents = None
    errors = None
    prediction_score = None
    prediction_id = None
    cortana_score = None
    cortana_id = None

    # Form for inference engine
    if audio_form.validate_on_submit():
        f = audio_form.audio_file.data
        filename = os.path.join('app/static/audio/', 'tmp.wav')
        f.save(filename)
        # Connecting to Microsoft Speech API for Cortana's predicted transcription
        c_start = time.time()
        audiofile = open(filename, 'rb')
        response = requests.post(
            'https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1',
            headers=headers,
            params=params,
            data=make_predictions.read_in_chunks(audiofile))
        cortana_transcription = response.content
        c_end = time.time()
        cortana_time_to_predict = c_end - c_start
        val = json.loads(response.text)
        recognitionstatus = val["RecognitionStatus"]
        offset = val["Offset"]
        duration = val["Duration"]
        nbest = val["NBest"]
        confidence = val["NBest"][0]["Confidence"]
        lexical = val["NBest"][0]["Lexical"]
        itn = val["NBest"][0]["ITN"]
        maskeditn = val["NBest"][0]["MaskedITN"]
        display = val["NBest"][0]["Display"]
        # Producing Hey, Jetson! predicted transcription
        s_start = time.time()
        prediction_transcription = make_predictions.run_inference(
            audio_path=filename,
            input_to_softmax=make_predictions.model_10,
            model_path='./results/model_10.h5')
        s_end = time.time()
        jetson_time_to_predict = s_end - s_start
        vis_spectrogram_feature, sample_rate, samples = make_predictions.inference_vis_audio_features(
            index=filename)
        # Plot the audio waveform
        raw_plot = make_predictions.plot_raw_audio(sample_rate, samples)
        # Plot the spectrogram of the audio file
        spectrogram_plot = make_predictions.plot_spectrogram_feature(
            vis_spectrogram_feature)
        spectrogram_shape = 'The shape of the spectrogram of the uploaded audio file: ' + str(
            vis_spectrogram_feature.shape)
        # 2nd way to plot the spectrogram of the audio file
        freqs, times, log_spectrogram = make_predictions.log_spectrogram_feature(
            samples, sample_rate)
        mean = np.mean(log_spectrogram, axis=0)
        std = np.std(log_spectrogram, axis=0)
        log_spectrogram = (log_spectrogram - mean) / std
        log_spectrogram_plot = make_predictions.plot_log_spectrogram_feature(
            freqs, times, log_spectrogram)

        # 3d plot of the spectrogram of a random audio file from the test set, plotting amplitude over frequency over time.
        def plot_3d_spectrogram(log_spectrogram):
            data = [go.Surface(z=log_spectrogram.T, colorscale='Viridis')]
            layout = go.Layout(title='3D Spectrogram',
                               autosize=True,
                               width=700,
                               height=700,
                               margin=dict(l=50, r=50, b=50, t=50))
            fig = go.Figure(data=data, layout=layout)
            div_output = plot(fig, output_type='div', include_plotlyjs=False)
            return div_output

        # 3d spectrogram plot
        spectrogram_3d = plot_3d_spectrogram(log_spectrogram)
        spectrogram_3d = Markup(spectrogram_3d)
        # Connecting to Microsoft Text Analytics API for sentiment analysis
        text_documents = {
            'documents': [{
                'id': 'Predicted Transcription',
                'language': 'en',
                'text': prediction_transcription
            }, {
                'id': 'Cortana Transcription',
                'language': 'en',
                'text': lexical
            }]
        }
        sentiment_response = requests.post(sentiment_api_url,
                                           headers=text_headers,
                                           json=text_documents)
        sentiments = sentiment_response.json()
        documents = sentiments["documents"]
        errors = sentiments["errors"]
        prediction_score = sentiments["documents"][0]["score"]
        prediction_id = sentiments["documents"][0]["id"]
        cortana_score = sentiments["documents"][1]["score"]
        cortana_id = sentiments["documents"][1]["id"]

    # Render the html page.
    return render_template('index.html',
                           audio_form=audio_form,
                           filename=filename,
                           prediction_transcription=prediction_transcription,
                           raw_plot=raw_plot,
                           spectrogram_plot=spectrogram_plot,
                           log_spectrogram_plot=log_spectrogram_plot,
                           spectrogram_shape=spectrogram_shape,
                           spectrogram_3d=spectrogram_3d,
                           jetson_time_to_predict=jetson_time_to_predict,
                           cortana_time_to_predict=cortana_time_to_predict,
                           confidence=confidence,
                           lexical=lexical,
                           itn=itn,
                           maskeditn=maskeditn,
                           display=display,
                           prediction_score=prediction_score,
                           cortana_score=cortana_score)