Example #1
0
    def extract_functions_and_apply_bpe(self,
                                        lang_executor=None,
                                        function_executor=None,
                                        bpe_executor=None):
        print("extract functions ... ")
        if lang_executor is None:
            lang_executor = LocalExecutor()
        jobs = [
            lang_executor.submit(lang.extract_functions, self.keep_comments,
                                 self.test_size, function_executor)
            for lang in self.langs
        ]
        for job in jobs:
            job.result()

        for split in ['test', 'valid']:
            for f_type in ['functions_standalone', 'functions_class']:
                truncate_files(
                    l.folder.joinpath(f'{split}{self.suffix}.{f_type}.tok')
                    for l in self.langs)

        print("apply bpe on train ... ")
        self.apply_bpe(f'train{self.suffix}.[01234567].functions_*.tok',
                       use_vocab=False,
                       executor=bpe_executor)
        print("apply bpe on test and valid ...")
        self.apply_bpe(f'test{self.suffix}.functions_*.tok',
                       use_vocab=False,
                       executor=bpe_executor)
        self.apply_bpe(f'valid{self.suffix}.functions_*.tok',
                       use_vocab=False,
                       executor=bpe_executor)
Example #2
0
    def extract_functions(self, lang_executor=None, function_executor=None):
        print("extract functions ... ")
        if lang_executor is None:
            lang_executor = LocalExecutor()
        jobs = [
            lang_executor.submit(lang.extract_functions, self.keep_comments,
                                 self.test_size, function_executor)
            for lang in self.langs
        ]
        for job in jobs:
            job.result()

        for split in ['test', 'valid']:
            for f_type in ['functions_standalone', 'functions_class']:
                truncate_files(
                    l.folder.joinpath(f'{split}{self.suffix}.{f_type}.tok')
                    for l in self.langs)