def convert_ptb_to_pml(out_dir): """Convert from Berkeley PTB format to Tred PML format. Keyword arguments: out_dir -- output directory """ ptb_path = os.getcwd()+'/Tmp/Ptb' p_command = 'for i in '+ptb_path+'/*.ptb; do perl "penn2pml.pl" --output-dir "'+ptb_path+'" --bracketed-terminals "$i"; done' with(sm.TempChwd(os.path.sep.join([UniversalTredConverter.lib_perl, 'ptb2pml', 'bin']))): subprocess.call(p_command, stdout=subprocess.PIPE, shell=True)
def convert_conll_to_pml(out_dir): """Convert from LangBank Conll format to Tred PML format. Keyword arguments: out_dir -- output directory """ conll_path = os.getcwd()+'/Tmp/Conll' p_command = 'for i in '+conll_path+'/*.conll; do perl "conll2pml" --out-prefix "$i" --technical-root --max-sentences 500 --columns "ID,FORM,LEMMA,POSTAG,POSTAG,FEATS,HEAD,DEPREL,PHEAD,PDEPREL" "$i"; done' with(sm.TempChwd(os.path.sep.join([UniversalTredConverter.lib_perl, 'conll2pml']))): subprocess.call(p_command, stdout=subprocess.PIPE, shell=True)
def convert_conll_to_ptb(in_dir, verbose=True): """Convert from LangBank Conll format to Berkeley PTB format. Keyword arguments: in_dir -- input directory verbose -- true if user should be warned for long runtime (default True) """ if verbose: print('Warning: This process may take a considerable amount of time.') with(sm.TempChwd(UniversalTredConverter.lib_java)): #subprocess.run(['java', '-cp', '../lib/*:', 'BerkeleyConnl2Ptb', in_dir]) subprocess.run(['java', '-jar', 'Conll2PtbWrapper_fat.jar', in_dir, os.path.sep.join(['.', 'conll2ptb-wrapper', 'rsrc', 'ger_sm5_gf.gr'])])