Esempio n. 1
0
def test_valid():
    paths = config.Paths()
    grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad',
                                'stacking_objects.pcfg')

    # sentence = 'null reaching moving placing'
    # grammar = grammarutils.read_grammar(grammar_file, index=False)
    # test_earley(grammar, sentence.split())

    sentence = 'null reaching'
    tokens = sentence.split()
    grammar = grammarutils.read_grammar(
        grammar_file,
        index=True,
        mapping=datasets.cad_metadata.subactivity_index)
    seg_length = 15
    correct_prob = 0.8
    classifier_output = np.ones((seg_length * 2, 10)) * 1e-10
    classifier_output[:seg_length, datasets.cad_metadata.
                      subactivity_index[tokens[0]]] = correct_prob
    classifier_output[
        seg_length:,
        datasets.cad_metadata.subactivity_index[tokens[1]]] = correct_prob

    classifier_output[:seg_length,
                      datasets.cad_metadata.subactivity_index[tokens[0]] +
                      1] = 1 - correct_prob
    classifier_output[seg_length:,
                      datasets.cad_metadata.subactivity_index[tokens[1]] +
                      1] = 1 - correct_prob
    test_generalized_earley(grammar, classifier_output)
Esempio n. 2
0
def test_time():
    paths = config.Paths()
    start_time = time.time()
    np.random.seed(int(start_time))
    classifier_output = np.random.rand(100000, 10)
    classifier_output = classifier_output / np.sum(
        classifier_output, axis=1)[:, None]  # Normalize to probability
    for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', 'cad')):
        if not pcfg.endswith('.pcfg'):
            continue
        grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', pcfg)
        grammar = grammarutils.read_grammar(
            grammar_file,
            index=True,
            mapping=datasets.cad_metadata.subactivity_index)
        test_generalized_earley(grammar, classifier_output)
    print('Time elapsed: {}s'.format(time.time() - start_time))
Esempio n. 3
0
def visualize_grammar():
    paths = config.Paths()
    dataset_name = 'wnp'
    for pcfg in os.listdir(
            os.path.join(paths.tmp_root, 'grammar', dataset_name)):
        if not pcfg.endswith('.pcfg'):
            continue
        grammar_file = os.path.join(paths.tmp_root, 'grammar', dataset_name,
                                    pcfg)
        grammar = grammarutils.read_grammar(grammar_file, insert=False)
        dot_filename = os.path.join(paths.tmp_root, 'visualize',
                                    'grammar', dataset_name,
                                    pcfg.replace('.pcfg', '.dot'))
        pdf_filename = os.path.join(paths.tmp_root, 'visualize',
                                    'grammar', dataset_name,
                                    pcfg.replace('.pcfg', '.pdf'))
        grammarutils.grammar_to_dot(grammar, dot_filename)
        os.system('dot -Tpdf {} -o {}'.format(dot_filename, pdf_filename))
Esempio n. 4
0
def test_grammar():
    paths = config.Paths()
    for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', 'cad')):
        if not pcfg.endswith('.pcfg'):
            continue
        grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', pcfg)
        grammar = grammarutils.read_grammar(
            grammar_file,
            index=True,
            mapping=datasets.cad_metadata.subactivity_index)
        corpus_file = os.path.join(paths.tmp_root, 'corpus', 'cad',
                                   pcfg.replace('pcfg', 'txt'))
        with open(corpus_file, 'r') as f:
            for line in f:
                tokens = [
                    str(datasets.cad_metadata.subactivity_index[token])
                    for token in line.strip(' *#\n').split(' ')
                ]
                earley_parser = nltk.EarleyChartParser(grammar, trace=0)
                e_chart = earley_parser.chart_parse(tokens)
                print e_chart.edges()[-1]