Example #1
0
    def convert_ptb_to_pml(out_dir):
        """Convert from Berkeley PTB format to Tred PML format.

            Keyword arguments:
            out_dir -- output directory
        """
        ptb_path = os.getcwd()+'/Tmp/Ptb'
        p_command = 'for i in '+ptb_path+'/*.ptb; do perl "penn2pml.pl" --output-dir "'+ptb_path+'" --bracketed-terminals "$i"; done'
        with(sm.TempChwd(os.path.sep.join([UniversalTredConverter.lib_perl, 'ptb2pml', 'bin']))):
            subprocess.call(p_command, stdout=subprocess.PIPE, shell=True)
Example #2
0
    def convert_conll_to_pml(out_dir):
        """Convert from LangBank Conll format to Tred PML format.

            Keyword arguments:
            out_dir -- output directory
        """
        conll_path = os.getcwd()+'/Tmp/Conll'
        p_command = 'for i in '+conll_path+'/*.conll; do perl "conll2pml" --out-prefix "$i" --technical-root --max-sentences 500 --columns "ID,FORM,LEMMA,POSTAG,POSTAG,FEATS,HEAD,DEPREL,PHEAD,PDEPREL" "$i"; done'
        with(sm.TempChwd(os.path.sep.join([UniversalTredConverter.lib_perl, 'conll2pml']))):
            subprocess.call(p_command, stdout=subprocess.PIPE, shell=True)
Example #3
0
    def convert_conll_to_ptb(in_dir, verbose=True):
        """Convert from LangBank Conll format to Berkeley PTB format.

            Keyword arguments:
            in_dir -- input directory
            verbose -- true if user should be warned for long runtime (default True)
        """
        if verbose:
            print('Warning: This process may take a considerable amount of time.')

        with(sm.TempChwd(UniversalTredConverter.lib_java)):
            #subprocess.run(['java', '-cp', '../lib/*:', 'BerkeleyConnl2Ptb', in_dir])
            subprocess.run(['java', '-jar', 'Conll2PtbWrapper_fat.jar', in_dir, os.path.sep.join(['.', 'conll2ptb-wrapper', 'rsrc', 'ger_sm5_gf.gr'])])