def test_valid(): paths = config.Paths() grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', 'stacking_objects.pcfg') # sentence = 'null reaching moving placing' # grammar = grammarutils.read_grammar(grammar_file, index=False) # test_earley(grammar, sentence.split()) sentence = 'null reaching' tokens = sentence.split() grammar = grammarutils.read_grammar( grammar_file, index=True, mapping=datasets.cad_metadata.subactivity_index) seg_length = 15 correct_prob = 0.8 classifier_output = np.ones((seg_length * 2, 10)) * 1e-10 classifier_output[:seg_length, datasets.cad_metadata. subactivity_index[tokens[0]]] = correct_prob classifier_output[ seg_length:, datasets.cad_metadata.subactivity_index[tokens[1]]] = correct_prob classifier_output[:seg_length, datasets.cad_metadata.subactivity_index[tokens[0]] + 1] = 1 - correct_prob classifier_output[seg_length:, datasets.cad_metadata.subactivity_index[tokens[1]] + 1] = 1 - correct_prob test_generalized_earley(grammar, classifier_output)
def test_time(): paths = config.Paths() start_time = time.time() np.random.seed(int(start_time)) classifier_output = np.random.rand(100000, 10) classifier_output = classifier_output / np.sum( classifier_output, axis=1)[:, None] # Normalize to probability for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', 'cad')): if not pcfg.endswith('.pcfg'): continue grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', pcfg) grammar = grammarutils.read_grammar( grammar_file, index=True, mapping=datasets.cad_metadata.subactivity_index) test_generalized_earley(grammar, classifier_output) print('Time elapsed: {}s'.format(time.time() - start_time))
def visualize_grammar(): paths = config.Paths() dataset_name = 'wnp' for pcfg in os.listdir( os.path.join(paths.tmp_root, 'grammar', dataset_name)): if not pcfg.endswith('.pcfg'): continue grammar_file = os.path.join(paths.tmp_root, 'grammar', dataset_name, pcfg) grammar = grammarutils.read_grammar(grammar_file, insert=False) dot_filename = os.path.join(paths.tmp_root, 'visualize', 'grammar', dataset_name, pcfg.replace('.pcfg', '.dot')) pdf_filename = os.path.join(paths.tmp_root, 'visualize', 'grammar', dataset_name, pcfg.replace('.pcfg', '.pdf')) grammarutils.grammar_to_dot(grammar, dot_filename) os.system('dot -Tpdf {} -o {}'.format(dot_filename, pdf_filename))
def test_grammar(): paths = config.Paths() for pcfg in os.listdir(os.path.join(paths.tmp_root, 'grammar', 'cad')): if not pcfg.endswith('.pcfg'): continue grammar_file = os.path.join(paths.tmp_root, 'grammar', 'cad', pcfg) grammar = grammarutils.read_grammar( grammar_file, index=True, mapping=datasets.cad_metadata.subactivity_index) corpus_file = os.path.join(paths.tmp_root, 'corpus', 'cad', pcfg.replace('pcfg', 'txt')) with open(corpus_file, 'r') as f: for line in f: tokens = [ str(datasets.cad_metadata.subactivity_index[token]) for token in line.strip(' *#\n').split(' ') ] earley_parser = nltk.EarleyChartParser(grammar, trace=0) e_chart = earley_parser.chart_parse(tokens) print e_chart.edges()[-1]