def test_convert_file():
    converter = SubprocessConverter()
    output = converter.convert_file(os.path.abspath('sample/wsj_0001.parse'),
                                    debug=True)
    assert output
    assert output.split(
        '\n')[0] == '1\tPierre\tpierre\tNNP\t_\t2\tcompound\t_\t_\t_'
def test_convert_trees():
    converter = SubprocessConverter()
    trees = [
        '''(TOP (S (NP-SBJ (NP (NNP Pierre)
                    (NNP Vinken))
                (, ,)
                (ADJP (NML (CD 61)
                           (NNS years))
                      (JJ old))
                (, ,))
        (VP (MD will)
            (VP (VB join)
                (NP (DT the)
                    (NN board))
                (PP-CLR (IN as)
                        (NP (DT a)
                            (JJ nonexecutive)
                            (NN director)))
                (NP-TMP (NNP Nov.)
                        (CD 29))))
        (. .)))''', '''(TOP (S (NP-SBJ (NNP Mr.)
                (NNP Vinken))
        (VP (VBZ is)
            (NP-PRD (NP (NN chairman))
                    (PP (IN of)
                        (NP (NP (NNP Elsevier)
                                (NNP N.V.))
                            (, ,)
                            (NP (DT the)
                                (NNP Dutch)
                                (VBG publishing)
                                (NN group))))))
        (. .)))'''
    ]
    output = converter.convert_trees(trees, debug=True)
    assert len(output) == len(trees)
    assert output[0].split(
        '\n')[0] == '1\tPierre\tpierre\tNNP\t_\t2\tcompound\t_\t_\t_'
def test_convert_trees():
    converter = SubprocessConverter()
    trees = [
'''(TOP (S (NP-SBJ (NP (NNP Pierre)
                    (NNP Vinken))
                (, ,)
                (ADJP (NML (CD 61)
                           (NNS years))
                      (JJ old))
                (, ,))
        (VP (MD will)
            (VP (VB join)
                (NP (DT the)
                    (NN board))
                (PP-CLR (IN as)
                        (NP (DT a)
                            (JJ nonexecutive)
                            (NN director)))
                (NP-TMP (NNP Nov.)
                        (CD 29))))
        (. .)))''',
'''(TOP (S (NP-SBJ (NNP Mr.)
                (NNP Vinken))
        (VP (VBZ is)
            (NP-PRD (NP (NN chairman))
                    (PP (IN of)
                        (NP (NP (NNP Elsevier)
                                (NNP N.V.))
                            (, ,)
                            (NP (DT the)
                                (NNP Dutch)
                                (VBG publishing)
                                (NN group))))))
        (. .)))''']
    output = converter.convert_trees(trees, debug=True)
    assert len(output) == len(trees)
    assert output[0].split('\n')[0] == '1\tPierre\tpierre\tNNP\t_\t2\tcompound\t_\t_\t_'
Ejemplo n.º 4
0
def _convert(py_cleannlp_path, ontonotes_folder):
    sys.path.append(py_cleannlp_path)

    from clearnlp.converter import SubprocessConverter

    c = SubprocessConverter()

    command = [c.java_command,
               '-ea',
               '-cp', ':'.join(c.classpath),
               c.class_name,
               '-h', c.head_rule_path,
               '-r',
               '-i', ontonotes_folder]
    proc = subprocess.run(command,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE,
                          universal_newlines=True)

    return proc.returncode
def test_command_string():
    converter = SubprocessConverter()
    assert converter.java_command == 'java'
    converter = SubprocessConverter(java_command='java8')
    assert converter.java_command == 'java8'
def test_convert_file():
    converter = SubprocessConverter()
    output = converter.convert_file(os.path.abspath('sample/wsj_0001.parse'), debug=True)
    assert output
    assert output.split('\n')[0] == '1\tPierre\tpierre\tNNP\t_\t2\tcompound\t_\t_\t_'