def test_convert_file(): converter = SubprocessConverter() output = converter.convert_file(os.path.abspath('sample/wsj_0001.parse'), debug=True) assert output assert output.split( '\n')[0] == '1\tPierre\tpierre\tNNP\t_\t2\tcompound\t_\t_\t_'
def test_convert_trees(): converter = SubprocessConverter() trees = [ '''(TOP (S (NP-SBJ (NP (NNP Pierre) (NNP Vinken)) (, ,) (ADJP (NML (CD 61) (NNS years)) (JJ old)) (, ,)) (VP (MD will) (VP (VB join) (NP (DT the) (NN board)) (PP-CLR (IN as) (NP (DT a) (JJ nonexecutive) (NN director))) (NP-TMP (NNP Nov.) (CD 29)))) (. .)))''', '''(TOP (S (NP-SBJ (NNP Mr.) (NNP Vinken)) (VP (VBZ is) (NP-PRD (NP (NN chairman)) (PP (IN of) (NP (NP (NNP Elsevier) (NNP N.V.)) (, ,) (NP (DT the) (NNP Dutch) (VBG publishing) (NN group)))))) (. .)))''' ] output = converter.convert_trees(trees, debug=True) assert len(output) == len(trees) assert output[0].split( '\n')[0] == '1\tPierre\tpierre\tNNP\t_\t2\tcompound\t_\t_\t_'
def test_convert_trees(): converter = SubprocessConverter() trees = [ '''(TOP (S (NP-SBJ (NP (NNP Pierre) (NNP Vinken)) (, ,) (ADJP (NML (CD 61) (NNS years)) (JJ old)) (, ,)) (VP (MD will) (VP (VB join) (NP (DT the) (NN board)) (PP-CLR (IN as) (NP (DT a) (JJ nonexecutive) (NN director))) (NP-TMP (NNP Nov.) (CD 29)))) (. .)))''', '''(TOP (S (NP-SBJ (NNP Mr.) (NNP Vinken)) (VP (VBZ is) (NP-PRD (NP (NN chairman)) (PP (IN of) (NP (NP (NNP Elsevier) (NNP N.V.)) (, ,) (NP (DT the) (NNP Dutch) (VBG publishing) (NN group)))))) (. .)))'''] output = converter.convert_trees(trees, debug=True) assert len(output) == len(trees) assert output[0].split('\n')[0] == '1\tPierre\tpierre\tNNP\t_\t2\tcompound\t_\t_\t_'
def _convert(py_cleannlp_path, ontonotes_folder): sys.path.append(py_cleannlp_path) from clearnlp.converter import SubprocessConverter c = SubprocessConverter() command = [c.java_command, '-ea', '-cp', ':'.join(c.classpath), c.class_name, '-h', c.head_rule_path, '-r', '-i', ontonotes_folder] proc = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) return proc.returncode
def test_command_string(): converter = SubprocessConverter() assert converter.java_command == 'java' converter = SubprocessConverter(java_command='java8') assert converter.java_command == 'java8'
def test_convert_file(): converter = SubprocessConverter() output = converter.convert_file(os.path.abspath('sample/wsj_0001.parse'), debug=True) assert output assert output.split('\n')[0] == '1\tPierre\tpierre\tNNP\t_\t2\tcompound\t_\t_\t_'