def main(in_dataset):
    # Hough and Schlangen 2015 config
    disf = DeepDisfluencyTagger(
        config_file=
        "deep_disfluency/deep_disfluency/experiments/experiment_configs.csv",
        config_number=21,
        saved_model_dir=
        "deep_disfluency/deep_disfluency/experiments/021/epoch_40")
    for idx, row in in_dataset.iterrows():
        print ' '.join(row['utterance'])
        tagger_input = [(word, pos, None)
                        for word, pos in zip(row['utterance'], row['pos'])]
        import pdb
        pdb.set_trace()
        print ' '.join(disf.tag_utterance(tagger_input))
コード例 #2
0
def tag_words(txt_file):
    disf = DeepDisfluencyTagger(
        config_file="../deep_disfluency/experiments/experiment_configs.csv",
        config_number=21,
        saved_model_dir="../deep_disfluency/experiments/021/epoch_40")
    with open(txt_file, "r") as f:
        for lines in f:
            line = lines.split(".")
            for words in line:
                word = words.split()
                for i in word:
                    disf.tag_new_word(i)

    tags = disf.output_tags

    return tags
def main(in_dataset, in_mode):
    # Hough and Schlangen 2015 config
    disf = DeepDisfluencyTagger(
        config_file=
        "deep_disfluency/deep_disfluency/experiments/experiment_configs.csv",
        config_number=21,
        saved_model_dir=
        "deep_disfluency/deep_disfluency/experiments/021/epoch_40")
    for key, value in eval_babi(disf, in_dataset).iteritems():
        print '{}:\t{:.3f}'.format(key, value)
コード例 #4
0
from deep_disfluency.tagger.deep_tagger import DeepDisfluencyTagger

# Initialize the tagger from the config file with a config number
# and saved model directory
MESSAGE = """1. Disfluency tagging on pre-segmented utterances
tags repair structure incrementally and other edit terms <e/>
(Hough and Schlangen Interspeech 2015 with an RNN)
"""
print MESSAGE
disf = DeepDisfluencyTagger(config_file="experiments/experiment_configs.csv",
                            config_number=21,
                            saved_model_dir="experiments/021/epoch_40")

# Tag each word incrementally
# Notice the incremental diff
# Set diff_only to False if you want the whole utterance's tag each time
with_pos = False
print "tagging..."
if with_pos:
    # if POS is provided use this:
    print disf.tag_new_word("john", pos="NNP")
    print disf.tag_new_word("likes", pos="VBP")
    print disf.tag_new_word("uh", pos="UH")
    print disf.tag_new_word("loves", pos="VBP")
    print disf.tag_new_word("mary", pos="NNP")
else:
    # else the internal POS tagger tags the words incrementally
    print disf.tag_new_word("john")
    print disf.tag_new_word("likes")
    print disf.tag_new_word("uh")
    print disf.tag_new_word("loves")
コード例 #5
0
                    word_graph[
                        new_output_start:],
                    self.disf.get_output_tags(with_words=False)[
                        new_output_start:],
                    range(
                        new_output_start, len(word_graph))
                        ):
                    print w, h, i
                    if ("<e" in h or "<rps" in h) and \
                            i not in self.words_with_disfluency:
                        self.label.config(text=str(self.counter.next()))
                        self.master.update()
                        self.words_with_disfluency.append(i)
            except:
                print "FAILED TO UPDATE"

        self.master.update()
        self.asr = IBMWatsonASR("credentials.json",
                                new_word_hypotheses_handler)
        self.asr.listen()

if __name__ == '__main__':
    disf = DeepDisfluencyTagger(
        config_file="deep_disfluency/experiments/experiment_configs.csv",
        config_number=35,
        saved_model_dir="deep_disfluency/experiments/035/epoch_6",
        use_timing_data=True
    )
    root = Tk()
    my_gui = DisfluencyGUI(root, disf)
コード例 #6
0
ファイル: EACL_2017.py プロジェクト: zpppy/deep_disfluency
    print 'Finished extracting features.'

# 4. Train the model on the transcripts (and audio data if available)
# NB each of these experiments can take up to 24 hours
systems_best_epoch = {}
if train_models:
    feature_matrices_filepath = THIS_DIR + '/../data/disfluency_detection/' + \
        'feature_matrices/train'
    validation_filepath = THIS_DIR + '/../data/disfluency_detection/' + \
        'feature_matrices/heldout'
    # train until convergence
    # on the settings according to the numbered experiments in
    # experiments/config.csv file
    for exp in experiments:
        disf = DeepDisfluencyTagger(config_file=THIS_DIR +
                                    "/experiment_configs.csv",
                                    config_number=exp)
        exp_str = '%03d' % exp
        e = disf.train_net(train_dialogues_filepath=feature_matrices_filepath,
                           validation_dialogues_filepath=validation_filepath,
                           model_dir=THIS_DIR + '/' + exp_str,
                           tag_accuracy_file_path=THIS_DIR +
                           '/results/tag_accuracies/{}.text'.format(exp_str))
        systems_best_epoch[exp] = e
else:
    # 33 RNN simple tags, disf + utt joint
    # 34 RNN complex tags, disf + utt joint
    # 35 LSTM simple tags, disf + utt joint
    # 36 LSTM complex tags, disf + utt joint
    # 37 LSTM simple tags, disf only
    # 38 LSTM simple tags, utt only
コード例 #7
0
ファイル: EACL_2017.py プロジェクト: aascode/deep_disfluency
    # (38, 8, 'LSTM (TTO only)'),
    # (39, 2, 'LSTM (complex tags)')

# 4. Test the models on the test transcripts according to the best epochs
# from training.
# The output from the models is made in the folders
# For now all use timing data

if test_models:
    print "testing models..."
    for exp, best_epoch in sorted(systems_best_epoch.items(), lambda x: x[0]):
        exp_str = '%03d' % exp
        # load the model
        disf = DeepDisfluencyTagger(
            config_file=THIS_DIR + '/experiment_configs.csv',
            config_number=exp,
            saved_model_dir=THIS_DIR +
            '/{0}/epoch_{1}'.format(exp_str, best_epoch))
        # simulating (or using real) ASR results
        # for now just saving these in the same folder as the best epoch
        # also outputs the speed
        partial_string = '_partial' if partial else ''
        disf.incremental_output_from_file(
            THIS_DIR + '/../data/disfluency_detection/switchboard/' +
            'swbd_disf_heldout{}_data_timings.csv'.format(partial_string),
            target_file_path=THIS_DIR +
            '/{0}/epoch_{1}/'.format(exp_str, best_epoch) +
            'heldout_output_increco.text')

# 5. To get the numbers run the notebook:
# experiments/analysis/EACL_2017/EACL_2017.ipynb