Example #1
0
 def get_encoder(self):
     with open(realpath +'/./torch_gpt2/GPT2/encoder.json', 'r') as f:
         encoder = json.load(f)
     with open(realpath + '/./torch_gpt2/GPT2/vocab.bpe', 'r', encoding="utf-8") as f:
         bpe_data = f.read()
     bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]]
     return Encoder(
         encoder=encoder,
         bpe_merges=bpe_merges,
     )
Example #2
0
 def get_encoder(self):
     print(self.args.source_file)
     source_path = self.args.source_file.split('/')[:-1]
     source_path = '/'.join(source_path) + '/'
     print(source_path)
     with open(realpath + '/' + source_path + '/encoder.json', 'r') as f:
         encoder = json.load(f)
     with open(realpath + '/' + source_path + '/vocab.bpe', 'r', encoding="utf-8") as f:
         bpe_data = f.read()
     bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1]]
     return Encoder(
         encoder=encoder,
         bpe_merges=bpe_merges,
     )