def about(): # Initializing variables passed to HTML files spectrogram_3d = None # Creating variables for 3d spectrogram plot vis_text, vis_spectrogram_feature, vis_audio_path, sample_rate, samples = make_predictions.vis_audio_features( index=np.random.randint(0, 4176), partition='test') freqs, times, log_spectrogram = make_predictions.log_spectrogram_feature( samples, sample_rate) mean = np.mean(log_spectrogram, axis=0) std = np.std(log_spectrogram, axis=0) log_spectrogram = (log_spectrogram - mean) / std # 3d plot of the spectrogram of a random audio file from the test set, plotting amplitude over frequency over time. def plot_3d_spectrogram(log_spectrogram): data = [go.Surface(z=log_spectrogram.T, colorscale='Viridis')] layout = go.Layout(title='3D Spectrogram', autosize=True, width=700, height=700, margin=dict(l=50, r=50, b=50, t=50)) fig = go.Figure(data=data, layout=layout) div_output = plot(fig, output_type='div', include_plotlyjs=False) return div_output # Converting 3d plot for JavaScript rendering spectrogram_3d = plot_3d_spectrogram(log_spectrogram) spectrogram_3d = Markup(spectrogram_3d) # render the HTML page return render_template('about.html', spectrogram_3d=spectrogram_3d)
def visualization(): # Initializing form for user input visualization_form = VisualizationForm() # Initializing variables passed to HTML files truth_transcription = None prediction_transcription = None raw_plot = None spectrogram_plot = None spectrogram_shape = None log_spectrogram_plot = None spectrogram_3d = None cortana_transcription = None recognitionstatus = None offset = None duration = None nbest = None confidence = None lexical = None itn = None maskeditn = None display = None play_audio = None # Form for visualization engine if visualization_form.validate_on_submit(): v_model_number = visualization_form.viz_model_number.data v_partition = visualization_form.viz_partition.data v_instance_number = visualization_form.viz_instance_number.data # Get ground truth and predicted transcriptions if v_model_number == 'model_10': truth_transcription = make_predictions.get_ground_truth( index=v_instance_number, partition=v_partition, input_to_softmax=make_predictions.model_10, model_path='./results/model_10.h5') prediction_transcription = make_predictions.get_prediction( index=v_instance_number, partition=v_partition, input_to_softmax=make_predictions.model_10, model_path='./results/model_10.h5') else: truth_transcription = make_predictions.get_ground_truth( index=v_instance_number, partition=v_partition, input_to_softmax=make_predictions.model_8, model_path='./results/model_8.h5') prediction_transcription = make_predictions.get_prediction( index=v_instance_number, partition=v_partition, input_to_softmax=make_predictions.model_8, model_path='./results/model_8.h5') # Get features for visualizations vis_text, vis_spectrogram_feature, vis_audio_path, sample_rate, samples = make_predictions.vis_audio_features( index=v_instance_number, partition=v_partition) # Plot the audio waveform raw_plot = make_predictions.plot_raw_audio(sample_rate, samples) # Plot the spectrogram of the audio file spectrogram_plot = make_predictions.plot_spectrogram_feature( vis_spectrogram_feature) spectrogram_shape = 'The shape of the spectrogram of the chosen audio file: ' + str( vis_spectrogram_feature.shape) # 2nd way to plot the spectrogram of the audio file freqs, times, log_spectrogram = make_predictions.log_spectrogram_feature( samples, sample_rate) mean = np.mean(log_spectrogram, axis=0) std = np.std(log_spectrogram, axis=0) log_spectrogram = (log_spectrogram - mean) / std log_spectrogram_plot = make_predictions.plot_log_spectrogram_feature( freqs, times, log_spectrogram) # 3d plot of the spectrogram of a random audio file from the test set, plotting amplitude over frequency over time. def plot_3d_spectrogram(log_spectrogram): data = [go.Surface(z=log_spectrogram.T, colorscale='Viridis')] layout = go.Layout(title='3D Spectrogram', autosize=True, width=700, height=700, margin=dict(l=50, r=50, b=50, t=50)) fig = go.Figure(data=data, layout=layout) div_output = plot(fig, output_type='div', include_plotlyjs=False) return div_output # 3d spectrogram plot spectrogram_3d = plot_3d_spectrogram(log_spectrogram) spectrogram_3d = Markup(spectrogram_3d) # Connecting to Microsoft Speech API for Cortana's predicted transcription filepath = make_predictions.azure_inference(index=v_instance_number, partition=v_partition) audiofile = open(filepath, 'rb') response = requests.post( 'https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1', headers=headers, params=params, data=make_predictions.read_in_chunks(audiofile)) cortana_transcription = response.content val = json.loads(response.text) recognitionstatus = val["RecognitionStatus"] offset = val["Offset"] duration = val["Duration"] nbest = val["NBest"] confidence = val["NBest"][0]["Confidence"] lexical = val["NBest"][0]["Lexical"] itn = val["NBest"][0]["ITN"] maskeditn = val["NBest"][0]["MaskedITN"] display = val["NBest"][0]["Display"] # Serve the audio file for the audio player play_audio = filepath.replace("/home/brice/Hey-Jetson/app/", "") # Render the html page. return render_template('visualization.html', visualization_form=visualization_form, truth_transcription=truth_transcription, prediction_transcription=prediction_transcription, raw_plot=raw_plot, spectrogram_plot=spectrogram_plot, log_spectrogram_plot=log_spectrogram_plot, spectrogram_shape=spectrogram_shape, spectrogram_3d=spectrogram_3d, cortana_transcription=cortana_transcription, confidence=confidence, lexical=lexical, itn=itn, maskeditn=maskeditn, display=display, play_audio=play_audio)
def index(): # Initializing form for user input audio_form = AudioForm(CombinedMultiDict((request.files, request.form))) # Initializing variables passed to HTML files filename = None prediction_transcription = None raw_plot = None spectrogram_plot = None spectrogram_shape = None log_spectrogram_plot = None spectrogram_3d = None word_error_rate = None cv_similarity = None jetson_time_to_predict = None cortana_time_to_predict = None cortana_transcription = None recognitionstatus = None offset = None duration = None nbest = None confidence = None lexical = None itn = None maskeditn = None display = None sentiments = None documents = None errors = None prediction_score = None prediction_id = None cortana_score = None cortana_id = None # Form for inference engine if audio_form.validate_on_submit(): f = audio_form.audio_file.data filename = os.path.join('app/static/audio/', 'tmp.wav') f.save(filename) # Connecting to Microsoft Speech API for Cortana's predicted transcription c_start = time.time() audiofile = open(filename, 'rb') response = requests.post( 'https://westus.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1', headers=headers, params=params, data=make_predictions.read_in_chunks(audiofile)) cortana_transcription = response.content c_end = time.time() cortana_time_to_predict = c_end - c_start val = json.loads(response.text) recognitionstatus = val["RecognitionStatus"] offset = val["Offset"] duration = val["Duration"] nbest = val["NBest"] confidence = val["NBest"][0]["Confidence"] lexical = val["NBest"][0]["Lexical"] itn = val["NBest"][0]["ITN"] maskeditn = val["NBest"][0]["MaskedITN"] display = val["NBest"][0]["Display"] # Producing Hey, Jetson! predicted transcription s_start = time.time() prediction_transcription = make_predictions.run_inference( audio_path=filename, input_to_softmax=make_predictions.model_10, model_path='./results/model_10.h5') s_end = time.time() jetson_time_to_predict = s_end - s_start vis_spectrogram_feature, sample_rate, samples = make_predictions.inference_vis_audio_features( index=filename) # Plot the audio waveform raw_plot = make_predictions.plot_raw_audio(sample_rate, samples) # Plot the spectrogram of the audio file spectrogram_plot = make_predictions.plot_spectrogram_feature( vis_spectrogram_feature) spectrogram_shape = 'The shape of the spectrogram of the uploaded audio file: ' + str( vis_spectrogram_feature.shape) # 2nd way to plot the spectrogram of the audio file freqs, times, log_spectrogram = make_predictions.log_spectrogram_feature( samples, sample_rate) mean = np.mean(log_spectrogram, axis=0) std = np.std(log_spectrogram, axis=0) log_spectrogram = (log_spectrogram - mean) / std log_spectrogram_plot = make_predictions.plot_log_spectrogram_feature( freqs, times, log_spectrogram) # 3d plot of the spectrogram of a random audio file from the test set, plotting amplitude over frequency over time. def plot_3d_spectrogram(log_spectrogram): data = [go.Surface(z=log_spectrogram.T, colorscale='Viridis')] layout = go.Layout(title='3D Spectrogram', autosize=True, width=700, height=700, margin=dict(l=50, r=50, b=50, t=50)) fig = go.Figure(data=data, layout=layout) div_output = plot(fig, output_type='div', include_plotlyjs=False) return div_output # 3d spectrogram plot spectrogram_3d = plot_3d_spectrogram(log_spectrogram) spectrogram_3d = Markup(spectrogram_3d) # Connecting to Microsoft Text Analytics API for sentiment analysis text_documents = { 'documents': [{ 'id': 'Predicted Transcription', 'language': 'en', 'text': prediction_transcription }, { 'id': 'Cortana Transcription', 'language': 'en', 'text': lexical }] } sentiment_response = requests.post(sentiment_api_url, headers=text_headers, json=text_documents) sentiments = sentiment_response.json() documents = sentiments["documents"] errors = sentiments["errors"] prediction_score = sentiments["documents"][0]["score"] prediction_id = sentiments["documents"][0]["id"] cortana_score = sentiments["documents"][1]["score"] cortana_id = sentiments["documents"][1]["id"] # Render the html page. return render_template('index.html', audio_form=audio_form, filename=filename, prediction_transcription=prediction_transcription, raw_plot=raw_plot, spectrogram_plot=spectrogram_plot, log_spectrogram_plot=log_spectrogram_plot, spectrogram_shape=spectrogram_shape, spectrogram_3d=spectrogram_3d, jetson_time_to_predict=jetson_time_to_predict, cortana_time_to_predict=cortana_time_to_predict, confidence=confidence, lexical=lexical, itn=itn, maskeditn=maskeditn, display=display, prediction_score=prediction_score, cortana_score=cortana_score)