def __init__(self, squad_json, vocab_file, cache_file, batch_size, max_seq_length, num_inputs): # Whenever you specify a custom constructor for a TensorRT class, # you MUST call the constructor of the parent explicitly. forward.IPyBatchStream.__init__(self) self.cache_file = cache_file # Every time get_batch is called, the next batch of size batch_size will be copied to the device and returned. self.data = dp.read_squad_json(squad_json) self.max_seq_length = max_seq_length self.batch_size = batch_size self.current_index = 0 self.num_inputs = num_inputs self.tokenizer = tokenization.BertTokenizer(vocab_file=vocab_file, do_lower_case=True) self.doc_stride = 128 self.max_query_length = 64 self.maxbatch = 500
def __init__(self, squad_json, vocab_file, cache_file, batch_size, max_seq_length, num_inputs): # Whenever you specify a custom constructor for a TensorRT class, # you MUST call the constructor of the parent explicitly. trt.IInt8LegacyCalibrator.__init__(self) self.cache_file = cache_file # Every time get_batch is called, the next batch of size batch_size will be copied to the device and returned. self.data = dp.read_squad_json(squad_json) self.max_seq_length = max_seq_length self.batch_size = batch_size self.current_index = 0 self.num_inputs = num_inputs self.tokenizer = tokenization.BertTokenizer(vocab_file=vocab_file, do_lower_case=True) self.doc_stride = 128 self.max_query_length = 64 # Allocate enough memory for a whole batch. self.device_inputs = [cuda.mem_alloc(self.max_seq_length * trt.int32.itemsize * self.batch_size) for binding in range(3)]
if __name__ == '__main__': args = parse_args() paragraph_text = None squad_examples = None output_prediction_file = None if not args.passage == '': paragraph_text = ' '.join(args.passage) elif not args.passage_file == '': f = open(args.passage_file, 'r') paragraph_text = f.read() elif not args.squad_json == '': squad_examples = dp.read_squad_json(args.squad_json) output_prediction_file = args.output_prediction_file else: paragraph_text = input("Paragraph: ") question_text = None if not args.question == '': question_text = ' '.join(args.question) elif not args.question_file == '': f = open(args.question_file, 'r') question_text = f.read() tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=True) # When splitting up a long document into chunks, how much stride to take between chunks. doc_stride = 128