def setUpClass(self): build_mxnet() build_mxnet_kl() self.mlp_model = get_mlp_sym() self.conv_model = get_conv_sym() self.quantizer_1 = lpot.Quantization("./mxnet.yaml") self.quantizer_2 = lpot.Quantization("./mxnet_kl.yaml") framework_specific_info = dict() framework_specific_info['q_dataloader'] = None self.adaptor = lpot.adaptor.mxnet.MxNetAdaptor(framework_specific_info) self.data_low = -1000 self.data_high = 1000
def main(): # Get data (train_images, train_labels), (test_images, test_labels) = keras.datasets.fashion_mnist.load_data() train_images = train_images.astype(np.float32) / 255.0 test_images = test_images.astype(np.float32) / 255.0 # Load saved model model = tf.keras.models.load_model("../models/simple_model") print('input', model.input_names) print('output', model.output_names) # Run lpot to get the quantized graph import lpot quantizer = lpot.Quantization('./conf.yaml') dataloader = quantizer.dataloader( dataset=list(zip(test_images, test_labels))) quantized_model = quantizer(model, q_dataloader=dataloader, eval_dataloader=dataloader) # Run inference with quantized model concrete_function = get_concrete_function( graph_def=quantized_model.as_graph_def(), inputs=["input:0"], outputs=["output:0"], print_graph=True) frozen_graph_predictions = concrete_function( input=tf.constant(test_images))[0] print("Inference is done.")
def auto_tune(input_graph_path, yaml_config): quan = lpot.Quantization(yaml_config) q_model = quan(input_graph_path, q_dataloader=None, eval_func=None, eval_dataloader=None) return q_model
def main(): import lpot from lpot import common quantizer = lpot.Quantization('./conf.yaml') quantizer.model = common.Model("./mobilenet_v1_1.0_224_frozen.pb") quantized_model = quantizer()
def main(): import lpot quantizer = lpot.Quantization('./conf.yaml') dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True) data_loader = DataLoader('tensorflow', dataset) quantized_model = quantizer( './model/public/rfcn-resnet101-coco-tf/model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/', q_dataloader=data_loader)
def auto_tune(input_graph_path, yaml_config, batch_size): fp32_graph = alexnet.load_pb(input_graph_path) quan = lpot.Quantization(yaml_config) dataloader = Dataloader(batch_size) q_model = quan(fp32_graph, q_dataloader=dataloader, eval_func=None, eval_dataloader=dataloader) return q_model
def main(): import lpot quantizer = lpot.Quantization('./conf.yaml') dataset = quantizer.dataset('dummy', shape=(100, 100, 100, 3), label=True) quantizer.model = common.Model( './model/public/rfcn-resnet101-coco-tf/model/public/rfcn-resnet101-coco-tf/rfcn_resnet101_coco_2018_01_28/' ) quantizer.calib_dataloader = common.DataLoader(dataset) quantized_model = quantizer()
def main(): import lpot quantizer = lpot.Quantization('./conf.yaml') quantized_model = quantizer("./mobilenet_v1_1.0_224_frozen.pb") # Optional, run benchmark from lpot import Benchmark evaluator = Benchmark('./conf.yaml') results = evaluator(model=quantized_model) batch_size = 1 for mode, result in results.items(): acc, batch_size, result_list = result latency = np.array(result_list).mean() / batch_size print('Accuracy is {:.3f}'.format(acc)) print('Latency: {:.3f} ms'.format(latency * 1000))
def main(): import lpot from lpot import common quantizer = lpot.Quantization('./conf.yaml') # Get graph from slim checkpoint from tf_slim.nets import inception model_func = inception.inception_v1 arg_scope = inception.inception_v1_arg_scope() kwargs = {'num_classes': 1001} inputs_shape = [None, 224, 224, 3] images = tf.compat.v1.placeholder(name='input', \ dtype=tf.float32, shape=inputs_shape) # Do quantization quantizer.model = common.Model('./inception_v1.ckpt') quantized_model = quantizer()
def main(): import lpot quantizer = lpot.Quantization('./conf.yaml') # Get graph from slim checkpoint from tf_slim.nets import inception model_func = inception.inception_v1 arg_scope = inception.inception_v1_arg_scope() kwargs = {'num_classes': 1001} inputs_shape = [None, 224, 224, 3] images = tf.compat.v1.placeholder(name='input', \ dtype=tf.float32, shape=inputs_shape) from lpot.adaptor.tf_utils.util import get_slim_graph graph = get_slim_graph('./inception_v1.ckpt', model_func, \ arg_scope, images, **kwargs) # Do quantization quantized_model = quantizer(graph)
pass def reset(self): self.pred_list = [] self.label_list = [] self.samples = 0 pass def result(self): correct_num = np.sum( np.array(self.pred_list) == np.array(self.label_list)) return correct_num / self.samples # Quantize with customized dataloader and metric quantizer = lpot.Quantization('./conf.yaml') dataset = Dataset() quantizer.metric = common.Metric(MyMetric, 'hello_metric') quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=1) quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=1) quantizer.model = common.Model('../models/simple_model') q_model = quantizer() # Optional, run quantized model import tensorflow as tf with tf.compat.v1.Graph().as_default(), tf.compat.v1.Session() as sess: tf.compat.v1.import_graph_def(q_model.as_graph_def(), name='') styled_image = sess.run(['output:0'], feed_dict={'input:0': dataset.test_images}) print("Inference is done.")
def main(): import lpot quantizer = lpot.Quantization('./conf.yaml') quantized_model = quantizer("./mobilenet_v1_1.0_224_frozen.pb")
def main(config='config/blendcnn/mrpc/eval.json', args=None): cfg = Config(**json.load(open(config, "r"))) cfg_data = data.Config(**json.load(open(cfg.cfg_data, "r"))) cfg_model = models.Config(**json.load(open(cfg.cfg_model, "r"))) cfg_optim = trainer.Config(**json.load(open(cfg.cfg_optim, "r"))) set_seeds(cfg.seed) TaskDataset = data.get_class( cfg_data.task) # task dataset class according to the task tokenizer = tokenization.FullTokenizer(vocab_file=cfg_data.vocab_file, do_lower_case=True) dataset = TaskDataset( args.dataset_location, pipelines=[ data.RemoveSymbols('\\'), data.Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize), data.AddSpecialTokensWithTruncation(cfg_data.max_len), data.TokenIndexing(tokenizer.convert_tokens_to_ids, TaskDataset.labels, cfg_data.max_len) ], n_data=None) dataset = TensorDataset(*dataset.get_tensors()) # To Tensors data_iter = DataLoader(dataset, batch_size=args.batch_size, shuffle=False) model = models.BlendCNN(cfg_model, len(TaskDataset.labels)) checkpoint.load_embedding(model.embed, cfg.pretrain_file) optimizer = optim.optim4GPU(cfg_optim, model) train_loop = trainer.TrainLoop(cfg_optim, model, data_iter, optimizer, cfg.save_dir, get_device()) def get_loss(model, batch, global_step): # make sure loss is a scalar tensor input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) loss = nn.CrossEntropyLoss()(logits, label_id) return loss def evaluate(model, batch): input_ids, segment_ids, input_mask, label_id = batch logits = model(input_ids, segment_ids, input_mask) _, label_pred = logits.max(1) result = (label_pred == label_id).float() #.cpu().numpy() accuracy = result.mean() return accuracy, result class Bert_DataLoader(object): def __init__(self, loader=None, model_type=None, device='cpu', batch_size=1): self.loader = loader self.model_type = model_type self.device = device self.batch_size = batch_size def __iter__(self): for batch in self.loader: batch = tuple(t.to(self.device) for t in batch) outputs = { 'output_all': (batch[0], batch[1], batch[2]), 'labels': batch[3] } yield outputs['output_all'], outputs['labels'] def benchmark(model): total_samples = 0 total_time = 0 index = 0 class RandomDataset(object): def __init__(self, size, shape): self.len = size self.input_ids = torch.randint(low=0, high=30522, size=(size, shape), dtype=torch.int64) self.segment_ids = torch.randint(low=0, high=1, size=(size, shape), dtype=torch.int64) self.input_mask = torch.randint(low=0, high=1, size=(size, shape), dtype=torch.int64) self.data = (self.input_ids, self.segment_ids, self.input_mask) def __getitem__(self, index): return (self.data[0][index], self.data[1][index], self.data[2][index]) def __len__(self): return self.len rand_loader = DataLoader(dataset=RandomDataset(size=5000, shape=128), batch_size=args.batch_size, shuffle=True) for batch in rand_loader: index += 1 tic = time.time() if os.environ.get('BLENDCNN_PROFILING') is not None: with profiler.profile(record_shapes=True) as prof: with torch.no_grad(): input_ids, segment_ids, input_mask = batch _ = model(*batch) else: with torch.no_grad( ): # evaluation without gradient calculation input_ids, segment_ids, input_mask = batch _ = model(*batch) if index > args.warmup: total_samples += batch[0].size()[0] total_time += time.time() - tic throughput = total_samples / total_time print('Latency: %.3f ms' % (1 / throughput * 1000)) print('Throughput: %.3f images/sec' % (throughput)) if os.environ.get('BLENDCNN_PROFILING') is not None: print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10)) def eval_func(model): results = [] # prediction results total_samples = 0 total_time = 0 index = 0 model.eval() eval_dataloader = Bert_DataLoader(loader=data_iter, batch_size=args.batch_size) for batch, label in eval_dataloader: index += 1 tic = time.time() if os.environ.get('BLENDCNN_PROFILING') is not None: with profiler.profile(record_shapes=True) as prof: with torch.no_grad(): accuracy, result = evaluate(model, (*batch, label)) else: with torch.no_grad( ): # evaluation without gradient calculation accuracy, result = evaluate(model, (*batch, label)) results.append(result) if index > args.warmup: total_samples += batch[0].size()[0] total_time += time.time() - tic total_accuracy = torch.cat(results).mean().item() throughput = total_samples / total_time print('Latency: %.3f ms' % (1 / throughput * 1000)) print('Throughput: %.3f samples/sec' % (throughput)) print('Accuracy: %.3f ' % (total_accuracy)) if os.environ.get('BLENDCNN_PROFILING') is not None: print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=10)) return total_accuracy if cfg.mode == "train": train_loop.train(get_loss, cfg.model_file, None) # not use pretrain_file print("Training has been done properly.") elif cfg.mode == "eval": # results = train_loop.eval(evaluate, cfg.model_file) # total_accuracy = torch.cat(results).mean().item() # print(f"Accuracy: {total_accuracy}") if args.tune: import lpot from lpot import common # lpot tune model.load_state_dict(torch.load(args.input_model)) eval_dataloader = Bert_DataLoader(loader=data_iter, batch_size=args.batch_size) quantizer = lpot.Quantization(args.tuned_yaml) quantizer.model = common.Model(model) quantizer.calib_dataloader = eval_dataloader quantizer.eval_func = eval_func q_model = quantizer() q_model.save(args.tuned_checkpoint) elif args.int8: from lpot.utils.pytorch import load int8_model = load( os.path.abspath(os.path.expanduser(args.tuned_checkpoint)), model) print(int8_model) if args.accuracy_only: eval_func(int8_model) elif args.benchmark: benchmark(int8_model) else: model.load_state_dict(torch.load(args.input_model)) print(model) if args.accuracy_only: eval_func(model) elif args.benchmark: benchmark(model)