def classifyLexical(): assert request.method == 'POST' text_file = request.form['textfile'] if not text_file: text = request.form['text'] else: file_name = os.path.join(route_folder, TEXT_DATA, text_file) inputTextReader = InputTextReader() text = inputTextReader.readFile(file_name) # predict lexical load_config(LEXICAL_MODEL_FOLDER, request.form['lexical_folder']) input_text = InputText(text) punctuations_probs = lexical_classifier.predict(input_text) (window_size, punctuation_pos, pos_tagging) = lexical_classifier.get_lexical_parameter() question_mark = sbd.config.getboolean('features', 'use_question_mark') classes = get_lexical_classes(question_mark) all_probs = convert_probabilities(len(input_text.tokens), punctuation_pos, punctuations_probs, classes) jsonConverter = JsonConverter(punctuation_pos, window_size, None, None, pos_tagging) data = jsonConverter.convert_lexical(input_text.tokens, all_probs) if not text_file: text_file = "custom_input" file_name = os.path.join(route_folder, TEXT_DATA, text_file + ".result") resultWriter = ResultWriter(classes) resultWriter.writeToFile(file_name, input_text.tokens, all_probs) return json.dumps(data)
def classifyAudioLexical(): assert request.method == 'POST' # get example folder example_folder = os.path.join(route_folder, AUDIO_EXAMPLE_FOLDER, request.form['example']) ctm_file, pitch_file, energy_file = get_audio_files(example_folder) # parse ctm_file, pitch_file and energy_file parser = AudioParser() talks = parser.parse(ctm_file) # predict audio load_config(AUDIO_MODEL_FOLDER, request.form['audio_folder']) audio_probs = audio_classifier.predict(InputAudio(talks)) # predict lexical load_config(LEXICAL_MODEL_FOLDER, request.form['lexical_folder']) input_text = InputText(talks) lexical_probs = lexical_classifier.predict(input_text) # get config parameter (lexical_window_size, lexical_punctuation_pos, pos_tagging) = lexical_classifier.get_lexical_parameter() (audio_window_size, audio_punctuation_pos) = audio_classifier.get_audio_parameter() # write audio results audio_classes = ["NONE", "PERIOD"] all_audio_probs = convert_probabilities(len(input_text.tokens), audio_punctuation_pos, audio_probs, audio_classes) file_name = os.path.join(route_folder, AUDIO_EXAMPLE_FOLDER, request.form['example'] + ".result") resultWriter = ResultWriter(audio_classes) resultWriter.writeToFile(file_name, input_text.tokens, all_audio_probs) # fusion fusion = ThresholdFusion() fusion.init_parameters(lexical_punctuation_pos, lexical_window_size, audio_punctuation_pos, audio_window_size) fusion_probs = fusion.fuse(len(input_text.tokens), lexical_probs, audio_probs) # convert it into json jsonConverter = JsonConverter(lexical_punctuation_pos, lexical_window_size, audio_punctuation_pos, audio_window_size, pos_tagging) data = jsonConverter.convert_fusion(input_text.tokens, fusion_probs, lexical_probs, audio_probs) return json.dumps(data)