def extract_functions_and_apply_bpe(self, lang_executor=None, function_executor=None, bpe_executor=None): print("extract functions ... ") if lang_executor is None: lang_executor = LocalExecutor() jobs = [ lang_executor.submit(lang.extract_functions, self.keep_comments, self.test_size, function_executor) for lang in self.langs ] for job in jobs: job.result() for split in ['test', 'valid']: for f_type in ['functions_standalone', 'functions_class']: truncate_files( l.folder.joinpath(f'{split}{self.suffix}.{f_type}.tok') for l in self.langs) print("apply bpe on train ... ") self.apply_bpe(f'train{self.suffix}.[01234567].functions_*.tok', use_vocab=False, executor=bpe_executor) print("apply bpe on test and valid ...") self.apply_bpe(f'test{self.suffix}.functions_*.tok', use_vocab=False, executor=bpe_executor) self.apply_bpe(f'valid{self.suffix}.functions_*.tok', use_vocab=False, executor=bpe_executor)
def extract_functions(self, lang_executor=None, function_executor=None): print("extract functions ... ") if lang_executor is None: lang_executor = LocalExecutor() jobs = [ lang_executor.submit(lang.extract_functions, self.keep_comments, self.test_size, function_executor) for lang in self.langs ] for job in jobs: job.result() for split in ['test', 'valid']: for f_type in ['functions_standalone', 'functions_class']: truncate_files( l.folder.joinpath(f'{split}{self.suffix}.{f_type}.tok') for l in self.langs)