def __init__(self, squad_json, vocab_file, cache_file, batch_size, max_seq_length, num_inputs): # Whenever you specify a custom constructor for a TensorRT class, # you MUST call the constructor of the parent explicitly. forward.IPyBatchStream.__init__(self) self.cache_file = cache_file # Every time get_batch is called, the next batch of size batch_size will be copied to the device and returned. self.data = dp.read_squad_json(squad_json) self.max_seq_length = max_seq_length self.batch_size = batch_size self.current_index = 0 self.num_inputs = num_inputs self.tokenizer = tokenization.BertTokenizer(vocab_file=vocab_file, do_lower_case=True) self.doc_stride = 128 self.max_query_length = 64 self.maxbatch = 500
def __init__(self, squad_json, vocab_file, cache_file, batch_size, max_seq_length, num_inputs): # Whenever you specify a custom constructor for a TensorRT class, # you MUST call the constructor of the parent explicitly. trt.IInt8LegacyCalibrator.__init__(self) self.cache_file = cache_file # Every time get_batch is called, the next batch of size batch_size will be copied to the device and returned. self.data = dp.read_squad_json(squad_json) self.max_seq_length = max_seq_length self.batch_size = batch_size self.current_index = 0 self.num_inputs = num_inputs self.tokenizer = tokenization.BertTokenizer(vocab_file=vocab_file, do_lower_case=True) self.doc_stride = 128 self.max_query_length = 64 # Allocate enough memory for a whole batch. self.device_inputs = [cuda.mem_alloc(self.max_seq_length * trt.int32.itemsize * self.batch_size) for binding in range(3)]