def main(model_folder, example_folder):
    config_file, caffemodel_file, net_proto = get_filenames(model_folder)
    sbd.SbdConfig(config_file)
    ctm_file, pitch_file, energy_file = get_audio_files(example_folder)

    # parse ctm_file, pitch_file and energy_file
    parser = AudioParser(ctm_file, pitch_file, energy_file)
    parser.parse()

    classifier = load_audio_classifier(model_folder)

    data = classifier.predict_audio(parser)
    print(data)
예제 #2
0
def classifyAudioLexical():
    assert request.method == 'POST'
    # get example folder
    example_folder = os.path.join(route_folder, AUDIO_EXAMPLE_FOLDER, request.form['example'])
    ctm_file, pitch_file, energy_file = get_audio_files(example_folder)

    # parse ctm_file, pitch_file and energy_file
    parser = AudioParser()
    talks = parser.parse(ctm_file)

    # predict audio
    load_config(AUDIO_MODEL_FOLDER, request.form['audio_folder'])
    audio_probs = audio_classifier.predict(InputAudio(talks))

    # predict lexical
    load_config(LEXICAL_MODEL_FOLDER, request.form['lexical_folder'])
    input_text = InputText(talks)
    lexical_probs = lexical_classifier.predict(input_text)

    # get config parameter
    (lexical_window_size, lexical_punctuation_pos, pos_tagging) = lexical_classifier.get_lexical_parameter()
    (audio_window_size, audio_punctuation_pos) = audio_classifier.get_audio_parameter()

    # write audio results
    audio_classes = ["NONE", "PERIOD"]
    all_audio_probs = convert_probabilities(len(input_text.tokens), audio_punctuation_pos, audio_probs, audio_classes)
    file_name = os.path.join(route_folder, AUDIO_EXAMPLE_FOLDER, request.form['example'] + ".result")
    resultWriter = ResultWriter(audio_classes)
    resultWriter.writeToFile(file_name, input_text.tokens, all_audio_probs)

    # fusion
    fusion = ThresholdFusion()
    fusion.init_parameters(lexical_punctuation_pos, lexical_window_size, audio_punctuation_pos, audio_window_size)
    fusion_probs = fusion.fuse(len(input_text.tokens), lexical_probs, audio_probs)

    # convert it into json
    jsonConverter = JsonConverter(lexical_punctuation_pos, lexical_window_size, audio_punctuation_pos, audio_window_size, pos_tagging)
    data = jsonConverter.convert_fusion(input_text.tokens, fusion_probs, lexical_probs, audio_probs)
    return json.dumps(data)
    parser = argparse.ArgumentParser(description='evaluates the fusion.')
    parser.add_argument('ctm_file', help="path to ctm_file", default="evaluation_data/data/tst2011_0.ctm", nargs='?')
    parser.add_argument('vectorfile', help='the google news word vector', default='evaluation_data/GoogleNews-vectors-negative300.bin', nargs='?')
    parser.add_argument('lexical_model_folder', help="path to lexical models", default="evaluation_data/lexical_models", nargs='?')
    parser.add_argument('audio_model_folder', help="path to audio models", default="evaluation_data/audio_models", nargs='?')
    parser.add_argument('--release', help="whether to test in release mode", action='store_true')
    args = parser.parse_args()

    if args.release:
        vector = Word2VecFile(args.vectorfile)
    else:
        vector = None

    # get all talks
    print("Reading all talks ...")
    audio_parser = AudioParser()
    talks = audio_parser.parse(args.ctm_file)


    # get all lexical models
    lexical_models = []
    for dirname, dirnames, filenames in os.walk(args.lexical_model_folder):
        for subdirname in dirnames:
            lexical_models.append(os.path.join(dirname, subdirname))

    # get all audio models
    audio_models = []
    for dirname, dirnames, filenames in os.walk(args.audio_model_folder):
        for subdirname in dirnames:
            audio_models.append(os.path.join(dirname, subdirname))