def get_encoder(self): with open(realpath +'/./torch_gpt2/GPT2/encoder.json', 'r') as f: encoder = json.load(f) with open(realpath + '/./torch_gpt2/GPT2/vocab.bpe', 'r', encoding="utf-8") as f: bpe_data = f.read() bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]] return Encoder( encoder=encoder, bpe_merges=bpe_merges, )
def get_encoder(self): print(self.args.source_file) source_path = self.args.source_file.split('/')[:-1] source_path = '/'.join(source_path) + '/' print(source_path) with open(realpath + '/' + source_path + '/encoder.json', 'r') as f: encoder = json.load(f) with open(realpath + '/' + source_path + '/vocab.bpe', 'r', encoding="utf-8") as f: bpe_data = f.read() bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]] return Encoder( encoder=encoder, bpe_merges=bpe_merges, )