Ejemplo n.º 1
0
def classifyLexical():
    assert request.method == 'POST'
    text_file = request.form['textfile']

    if not text_file:
        text = request.form['text']
    else:
        file_name = os.path.join(route_folder, TEXT_DATA, text_file)
        inputTextReader = InputTextReader()
        text = inputTextReader.readFile(file_name)

    # predict lexical
    load_config(LEXICAL_MODEL_FOLDER, request.form['lexical_folder'])
    input_text = InputText(text)
    punctuations_probs = lexical_classifier.predict(input_text)
    (window_size, punctuation_pos, pos_tagging) = lexical_classifier.get_lexical_parameter()
    question_mark = sbd.config.getboolean('features', 'use_question_mark')
    classes = get_lexical_classes(question_mark)

    all_probs = convert_probabilities(len(input_text.tokens), punctuation_pos, punctuations_probs, classes)

    jsonConverter = JsonConverter(punctuation_pos, window_size, None, None, pos_tagging)
    data = jsonConverter.convert_lexical(input_text.tokens, all_probs)

    if not text_file:
        text_file = "custom_input"
    file_name = os.path.join(route_folder, TEXT_DATA, text_file + ".result")
    resultWriter = ResultWriter(classes)
    resultWriter.writeToFile(file_name, input_text.tokens, all_probs)

    return json.dumps(data)
Ejemplo n.º 2
0
def classifyAudioLexical():
    assert request.method == 'POST'
    # get example folder
    example_folder = os.path.join(route_folder, AUDIO_EXAMPLE_FOLDER, request.form['example'])
    ctm_file, pitch_file, energy_file = get_audio_files(example_folder)

    # parse ctm_file, pitch_file and energy_file
    parser = AudioParser()
    talks = parser.parse(ctm_file)

    # predict audio
    load_config(AUDIO_MODEL_FOLDER, request.form['audio_folder'])
    audio_probs = audio_classifier.predict(InputAudio(talks))

    # predict lexical
    load_config(LEXICAL_MODEL_FOLDER, request.form['lexical_folder'])
    input_text = InputText(talks)
    lexical_probs = lexical_classifier.predict(input_text)

    # get config parameter
    (lexical_window_size, lexical_punctuation_pos, pos_tagging) = lexical_classifier.get_lexical_parameter()
    (audio_window_size, audio_punctuation_pos) = audio_classifier.get_audio_parameter()

    # write audio results
    audio_classes = ["NONE", "PERIOD"]
    all_audio_probs = convert_probabilities(len(input_text.tokens), audio_punctuation_pos, audio_probs, audio_classes)
    file_name = os.path.join(route_folder, AUDIO_EXAMPLE_FOLDER, request.form['example'] + ".result")
    resultWriter = ResultWriter(audio_classes)
    resultWriter.writeToFile(file_name, input_text.tokens, all_audio_probs)

    # fusion
    fusion = ThresholdFusion()
    fusion.init_parameters(lexical_punctuation_pos, lexical_window_size, audio_punctuation_pos, audio_window_size)
    fusion_probs = fusion.fuse(len(input_text.tokens), lexical_probs, audio_probs)

    # convert it into json
    jsonConverter = JsonConverter(lexical_punctuation_pos, lexical_window_size, audio_punctuation_pos, audio_window_size, pos_tagging)
    data = jsonConverter.convert_fusion(input_text.tokens, fusion_probs, lexical_probs, audio_probs)
    return json.dumps(data)