def test_start(ace_mismatch, tmp_path, monkeypatch): popen = mock_popen( pid=10, returncode=255, stdout=io.StringIO(), stderr=io.StringIO(ace_mismatch)) grm = tmp_path / 'grm.dat' grm.write_text('') with monkeypatch.context() as m: m.setattr(ace, 'Popen', popen) m.setattr(ace, '_ace_version', lambda x: (0, 9, 29)) with pytest.raises(ace.ACEProcessError): ace.ACEParser(str(grm)) with pytest.raises(ace.ACEProcessError): ace.parse(str(grm), 'Dogs sleep.')
def full_parse(sent, selected_grammar, max_parses): """ """ results = dd(lambda: dd()) # if grammar_mode == 'erg_strict': # GRAMMAR = ERG # elif grammar_mode == 'erg_robust': # GRAMMAR = MAL_ERG # elif grammar_mode == 'zhong_strict': # GRAMMAR = ZHONG ####################################################################### # ACE cmdargs (currently only for the number of parses) ####################################################################### if max_parses == 'max': ace_cmdargs = [ '-n', "50", '--timeout=20', '--rooted-derivations', '--udx', '--max-chart-megabytes=3000', '--max-unpack-megabytes=3000' ] else: # Just make sure shenanigans can happen with HTML/JS source if int(max_parses) > 50: max_parses = "50" ace_cmdargs = [ '-n', max_parses, '--timeout=20', '--rooted-derivations', '--udx', '--max-chart-megabytes=3000', '--max-unpack-megabytes=3000' ] ####################################################################### # To silence ACE we need to give it a file to stream its own stderr. ####################################################################### with ace.ACEParser(path.join(ROOT, 'delphin/' + selected_grammar), executable=path.join(ROOT, ACE), cmdargs=ace_cmdargs, stderr=ace_stderr) as parser: erg_parse = parser.interact(sent) if erg_parse['results']: n_parses = len(erg_parse['results']) for n in list(range(n_parses)): deriv = erg_parse.result(n).derivation() deriv_json = json.dumps(deriv.to_dict()) # print(deriv_json) mrs = erg_parse.result(n).mrs() mrs_json = mrsjson.encode(mrs) mrs_simplemrs = simplemrs.encode(mrs) ################################################################ # This was breaking too often, throwing keyErrors for handles. # We need to check if it's well formed before conversion. ################################################################ if delphin.mrs.is_well_formed(mrs): dmrs = delphin.dmrs.from_mrs(mrs) dmrs_json = dmrsjson.encode(dmrs) else: dmrs_json = False sent_struct = sent_leaf_ids(deriv) errors = check_nodes(deriv, [], sent_struct) results[n]['deriv_json'] = deriv_json results[n]['mrs_json'] = mrs_json results[n]['mrs_simplemrs'] = mrs_simplemrs results[n]['dmrs_json'] = dmrs_json results[n]['errors'] = errors return results else: return results
def check_sents(sent_list): """ Given a list of sentences, this function tries to parse each one with the default ERG and, if it fails, it uses the ERG enhanced with mal-rules to parse the same input. It returns a list with the same list of sentences and a list of error codes found for each sentence. [(sent1, [error1.1, error1.2]), (sent2, [error2.1, error2.2])] """ erg_results = [] with ace.ACEParser(path.join(ROOT, ERG), executable=path.join(ROOT, ACE), cmdargs=['-1', '--timeout=20', '--max-chart-megabytes=3000', '--max-unpack-megabytes=3000']) as parser,\ ace.ACEParser(path.join(ROOT, MAL_ERG), executable=path.join(ROOT, ACE), cmdargs=['-1', '--timeout=20', '--udx', '--max-chart-megabytes=3000', '--max-unpack-megabytes=3000']) as mal: for sent in sent_list: ############################################################### # We are eliminating the double parsing method for now. This # is the same as always assuming that ERG produces no parse. # This way this decision is reversible if: # DOUBLE_GRAMMAR_PARSE = True ############################################################### DOUBLE_GRAMMAR_PARSE = False if DOUBLE_GRAMMAR_PARSE: erg_parse = parser.interact(sent) else: erg_parse = {'results': None} ############################################################### if not erg_parse['results']: # if there were no parses mal_result = mal.interact(sent) if mal_result['results']: # If the mal-grammar got a parse sent_struct = sent_leaf_ids( mal_result.result(0).derivation()) error_tags = check_nodes( mal_result.result(0).derivation(), [], sent_struct) # print(error_tags) # WHY WAS THIS HERE? ASK FCB # for tag in rbst_tags: # for tag, string in error_tags: # if type(tag) == list: # if len(tag) == 0: # tag = "empty_tag" # else: # tag = ":".join(tag) erg_results.append((sent, error_tags)) else: # only a general NoParse tag can be given erg_results.append((sent, [('NoParse', '')])) else: # Check for Mood (Imperative and Interrogative) try: mrs = erg_parse.result(0).mrs() sf = mrs.properties(mrs.index)['SF'] except: print("MRS ERROR: " + sent, file=sys.stderr) sf = 'noSF-MRSerror' if sf != 'prop': erg_results.append((sent, [(sf, '')])) else: # Propositions are good erg_results.append((sent, [])) return erg_results
def check_sents(sent_list): """ Given a list of sentences, this function tries to parse each one with the default ERG and, if it fails, it uses the ERG enhanced with mal-rules to parse the same input. It returns a list with the same list of sentences and a list of error codes found for each sentence. [(sent1, [error1.1, error1.2]), (sent2, [error2.1, error2.2])] """ erg_results = [] with ace.ACEParser(path.join(ROOT, ERG), executable=path.join(ROOT, ACE), cmdargs=['-1', '--timeout=10']) as parser, \ ace.ACEParser(path.join(ROOT, MAL_ERG), executable=path.join(ROOT, ACE), cmdargs=['-1', '--timeout=10', '--udx']) as mal: for sent in sent_list: erg_parse = parser.interact(sent) if not erg_parse['results']: # if there were no parses mal_result = mal.interact(sent) if mal_result['results']: # If the mal-grammar got a parse error_tags = check_nodes( mal_result.result(0).derivation(), []) # print(error_tags) # WHY WAS THIS HERE? ASK FCB # for tag in rbst_tags: for tag, string in error_tags: if type(tag) == list: if len(tag) == 0: tag = "empty_tag" else: tag = ":".join(tag) erg_results.append((sent, error_tags)) else: # only a general NoParse tag can be given erg_results.append((sent, [('NoParse', '')])) else: # Check for Mood (Imperative and Interrogative) try: mrs = erg_parse.result(0).mrs() sf = mrs.properties(mrs.index)['SF'] except: print("MRS ERROR: " + sent, file=sys.stderr) sf = [] if sf != 'prop': erg_results.append((sent, [(sf, '')])) else: # Propositions are good erg_results.append((sent, [])) return erg_results
from delphin import ace from delphin import tsdb from delphin import itsdb ts = itsdb.TestSuite('sample-200-py') with ace.ACEParser('terg-mac.dat', cmdargs=['--disable-generalization'], full_forest=True) as cpu: ts.process(cpu)