def test_pqa_english_noamb_diled_no_generalization(self): input_parses = module_path + '/tests/data/POC-English-NoAmb/MST-fixed-manually/' batch_dir = module_path + '/output/test_grammar_learner_' + str( UTC())[:10] prj_dir = batch_dir + '/noamb_pqa_diled_no_generalization/' if check_dir(prj_dir, create=True, verbose='max'): outpath = prj_dir # cp,rp :: (test) corpus_path and reference_path: cp = module_path + '/data/POC-English-NoAmb/poc_english_noamb_corpus.txt' rp = input_parses + '/poc-english_noAmb-parses-gold.txt' kwargs = { 'input_parses': input_parses, 'output_grammar': outpath, 'left_wall': '', 'period': False, 'context': 2, 'word_space': 'discrete', 'dim_reduction': 'none', 'clustering': 'group', 'grammar_rules': 2, 'categories_generalization': 'off', 'rules_generalization': 'off', 'tmpath': module_path + '/tmp/', 'linkage_limit': 1000, 'verbose': 'min' } re = learn_grammar(**kwargs) pa, f1, precision, recall = pqa_meter(re['grammar_file'], outpath, cp, rp, **kwargs) self.assertTrue(pa * recall > 0.99, str(pa) + " * " + str(recall) + " > 0.99")
def test_pqa_turtle_ddrkd_no_generalization(self): input_parses = module_path + '/tests/data/POC-Turtle/MST-fixed-manually/' batch_dir = module_path + '/output/test_grammar_learner_' + str( UTC())[:10] prj_dir = batch_dir + '/turtle_pqa_ddrkd_no_generalization/' if check_dir(prj_dir, create=True, verbose='max'): outpath = prj_dir # cp,rp :: (test) corpus_path and reference_path: cp = module_path + '/tests/data/POC-Turtle/poc-turtle-corpus.txt' rp = input_parses + '/poc-turtle-parses-gold.txt' kwargs = { 'input_parses': input_parses, 'output_grammar': outpath, 'left_wall': '', 'period': False, 'context': 2, 'word_space': 'vectors', 'dim_reduction': 'svd', 'clustering': ('kmeans', 'kmeans++', 18), #-'cluster_range' : (2,50,9) , 'cluster_range': (20, 2, 9), 'grammar_rules': 2, 'categories_generalization': 'off', 'rules_generalization': 'off', 'tmpath': module_path + '/tmp/', 'linkage_limit': 1000, 'verbose': 'min' } re = learn_grammar(**kwargs) pa, f1, precision, recall = pqa_meter(re['grammar_file'], outpath, cp, rp, **kwargs) self.assertTrue(pa * recall > 0.99, str(pa) + " * " + str(recall) + " > 0.99")
def test_pqa_turtle_diled_no_generalization(self): input_parses = module_path + '/tests/data/POC-Turtle/MST-fixed-manually' batch_dir = module_path + '/output/test_grammar_learner_' + str(UTC())[:10] prj_dir = batch_dir + '/turtle_pqa_diled_no_generalization/' if check_dir(prj_dir, create=True, verbose='max'): outpath = prj_dir # cp,rp :: (test) corpus_path and reference_path: cp = module_path + '/tests/data/POC-Turtle/poc-turtle-corpus.txt' rp = input_parses + '/poc-turtle-parses-gold.txt' kwargs = { 'input_parses' : input_parses, 'output_grammar': outpath, 'left_wall' : '' , 'period' : False , 'context' : 2 , 'word_space' : 'discrete' , 'dim_reduction' : 'none' , 'clustering' : 'group' , 'grammar_rules' : 2 , 'categories_generalization' : 'off' , 'rules_generalization' : 'off' , 'tmpath' : module_path + '/tmp/', 'linkage_limit' : 1000, 'verbose' : 'min' } re = learn_grammar(**kwargs) # 81019 changes: # FIXME: DEL comments # a, q, qa = pqa_meter(re['grammar_file'], outpath, cp, rp, **kwargs) # print('parse-ability, parse-quality:', a, q) # assert a*q > 0.99 # self.assertTrue(a*q*Decimal("100") > 0.99, str(a) + " * " + str(q) + " * 100 !> 0.99") pa, f1, precision, recall = pqa_meter(re['grammar_file'], outpath, cp, rp, **kwargs) # pa, f1, precision, recall: <float> 0.0 - 1.0 self.assertTrue(pa*recall > 0.99, str(pa) + " * " + str(recall) + " > 0.99")
def main(argv): """ Usage: python tstr.py config.json """ print('\nGrammar Tester v.' + __version__, 'started', UTC(), '| Python v.' + platform.python_version(), '\n') try: opts, args = getopt.getopt(argv, "h", ["help"]) except getopt.GetoptError: print('''Usage: tstr <json-config-file>''') sys.exit() for opt in opts: if opt == '-h': print('''Usage: tstr <json-config-file>''') sys.exit() else: config_json = args[0] with open(config_json) as f: kwargs = json.load(f) re = {} if 'input_grammar' in kwargs: # Test .dict file # 90123 ig = module_path + kwargs['input_grammar'] og = module_path + kwargs['out_path'] # og: output grammar rp = module_path + kwargs['reference'] # rp: reference path if 'test_corpus' in kwargs: cp = module_path + kwargs['test_corpus'] # cp: corpus path else: cp = rp # test corpus path = reference parses path print('Input grammar:', ig, '\nOutput directory:', og) if check_dir(og, True, 'max'): print('Grammar test started', UTC(), '\n') start = time.time() a, f1, precision, q = pqa_meter(ig, og, cp, rp, **kwargs) re.update({'grammar_test_time': sec2string(time.time() - start)}) else: print('Output path error:', og) else: print('Please set "input grammar" in config.json') sys.exit() stats = [] if 'grammar_test_time' in re: stats.append(['Grammar test time ', re['grammar_test_time']]) if len(stats) > 0: list2file(stats, og + '/test_stats.txt') copy(config_json, og) #with open(re['project_directory'] + '/grammar_learner_log.json', 'w') as f: # f.write(json.dumps(re)) print('\nGrammar learning and the learned grammar test ended', UTC()) #print(test_stats(re)) #print('Output directory:', re['project_directory'], '\n') print(f'PA = {int(round(a*100,0))}%, PQ = {int(round(q*100,0))}%, ' f'F1 = {round(f1,2)}')
def test_pqa_english_noamb_ddrkd_no_generalization(self): input_parses = module_path + '/tests/data/POC-English-NoAmb/MST-fixed-manually/' batch_dir = module_path + '/output/test_grammar_learner_' + str( UTC())[:10] prj_dir = batch_dir + '/noamb_pqa_ddrkd_no_generalization/' if check_dir(prj_dir, create=True, verbose='max'): outpath = prj_dir # cp,rp :: (test) corpus_path and reference_path: cp = module_path + '/data/POC-English-NoAmb/poc_english_noamb_corpus.txt' rp = input_parses + '/poc-english_noAmb-parses-gold.txt' kwargs = { 'input_parses': input_parses, 'output_grammar': outpath, 'left_wall': '', 'period': False, 'context': 2, 'word_space': 'vectors', 'dim_reduction': 'svd', 'clustering': ('kmeans', 'kmeans++', 18), 'cluster_range': (12, 12, 5), 'grammar_rules': 2, 'categories_generalization': 'off', 'rules_generalization': 'off', 'tmpath': module_path + '/tmp/', 'linkage_limit': 1000, 'verbose': 'min' } # Sometimes pqa_meter(with test_grammar updated 2018-10-19) returns pa,recall = 0,0 # FIXME: check with further test_grammar updates and delete. x = 0. n = 0 while x < 0.1: re = learn_grammar(**kwargs) pa, f1, precision, recall = pqa_meter(re['grammar_file'], outpath, cp, rp, **kwargs) print( f'\nnoAmb dDRKd: pa {round(pa,3)}, f1 {round(f1,3)}, precision {round(precision,3)}, recall {round(recall,3)} \n' ) x = pa * recall n += 1 if n > 24: break self.assertTrue(pa * recall > 0.99, str(pa) + " * " + str(recall) + " > 0.99")
def main(argv): """ Usage: python ppln.py config.json """ print('\nGrammar Learner + Tester ppln v.' + __version__, 'started', UTC(), '| Python v.' + platform.python_version(), '\n') try: opts, args = getopt.getopt(argv, "h", ["help"]) except getopt.GetoptError: print('''Usage: ppln <json-config-file>''') sys.exit() for opt in opts: if opt == '-h': print('''Usage: ppln <json-config-file>''') sys.exit() else: config_json = args[0] with open(config_json) as f: kwargs = json.load(f) corpus = kwargs['corpus'] del kwargs['corpus'] dataset = kwargs['dataset'] del kwargs['dataset'] if 'input_parses' not in kwargs: kwargs[ 'input_parses'] = module_path + '/data/' + corpus + '/' + dataset else: if '/home/' in kwargs['input_parses']: kwargs['input_parses'] = kwargs['input_parses'] else: kwargs['input_parses'] = module_path + kwargs['input_parses'] if 'output_grammar' not in kwargs: if 'out_path' in kwargs: if '/home/' in kwargs['out_path']: kwargs['output_grammar'] = kwargs['out_path'] else: kwargs['output_grammar'] = module_path + kwargs['out_path'] else: print('Please set "output_grammar" or "out_path" in config.json') sys.exit() if 'tmpath' not in kwargs: kwargs['tmp_dir'] = '' else: if len(kwargs['tmpath']) == 0: kwargs['tmp_dir'] = '' else: if 'home' in kwargs['tmpath']: tmpath = kwargs['tmpath'] else: tmpath = module_path + kwargs['tmpath'] if check_dir(tmpath, True, 'none'): kwargs['tmp_dir'] = tmpath else: kwargs['tmp_dir'] = '' rules, re = learn(**kwargs) if 'error' in re: print('Grammar Learner error log:\n', re) sys.exit() if kwargs['linkage_limit'] > 0: og = module_path + kwargs['out_path'] rp = module_path + kwargs['reference'] if 'test_corpus' in kwargs: cp = module_path + kwargs['test_corpus'] else: cp = rp # test corpus path = reference parses path start = time.time() a, f1, precision, q = pqa_meter(re['grammar_file'], og, cp, rp, **kwargs) re.update({'grammar_test_time': sec2string(time.time() - start)}) stats = [] if 'grammar_learn_time' in re: stats.append(['Grammar learn time', re['grammar_learn_time']]) if 'grammar_test_time' in re: stats.append(['Grammar test time ', re['grammar_test_time']]) if len(stats) > 0: x = re['corpus_stats_file'] list2file(stats, x[:x.rfind('/')] + '/learn_&_test_stats.txt') copy(config_json, re['project_directory']) with open(re['project_directory'] + '/grammar_learner_log.json', 'w') as f: f.write(json.dumps(re)) print('\nGrammar learning and the learned grammar test ended', UTC()) print(test_stats(re)) print('Output directory:', re['project_directory'], '\n')