Example #1
0
def export_eval_protobuf_model(checkpoint_dir, model_name, dataset, quant_type,
                               output_file, batch_size):
    _, test_data, (img_shape, label_shape) = datasets.DATASETS[dataset]()

    model_func, input_spec, output_spec = get_model_func(
        "eval", model_name, quant_type, img_shape, label_shape[0])
    input_names = [i.name for i in input_spec]
    output_names = [o.name for o in output_spec]
    predictor_config = PredictConfig(session_init=SaverRestore(checkpoint_dir +
                                                               "/checkpoint"),
                                     tower_func=model_func,
                                     input_signature=input_spec,
                                     input_names=input_names,
                                     output_names=output_names,
                                     create_graph=False)

    print("Exporting optimised protobuf graph...")
    K.set_learning_phase(False)
    ModelExporter(predictor_config).export_compact(output_file, optimize=False)

    K.clear_session()
    pred = OfflinePredictor(predictor_config)

    test_data = BatchData(test_data, batch_size, remainder=True)
    test_data.reset_state()

    num_correct = 0
    num_processed = 0
    for img, label in tqdm(test_data):
        num_correct += sum(pred(img)[0].argmax(axis=1) == label.argmax(axis=1))
        num_processed += img.shape[0]

    print("Exported model has accuracy {:.4f}".format(num_correct /
                                                      num_processed))

    return input_names, output_names, {i.name: i.shape for i in input_spec}
class Loader(object):
    """
    Data loader. Combines a dataset and a sampler, and provides
    single- or multi-process iterators over the dataset.
    Arguments:
        mode (str, required): mode of dataset to operate in, one of ['train', 'val']
        batch_size (int, optional): how many samples per batch to load
            (default: 1).
        shuffle (bool, optional): set to ``True`` to have the data reshuffled
            at every epoch (default: False).
        num_workers (int, optional): how many subprocesses to use for data
            loading. 0 means that the data will be loaded in the main process
            (default: 0)
        cache (int, optional): cache size to use when loading data,
        # cuda (bool, optional): set to ``True`` and the PyTorch tensors will get preloaded
        #     to the GPU for you (necessary because this lets us to uint8 conversion on the
        #     GPU, which is faster).
    """
    def __init__(self,
                 mode,
                 batch_size=256,
                 shuffle=False,
                 num_workers=25,
                 cache=50000,
                 device='cuda'):
        # enumerate standard imagenet augmentors
        imagenet_augmentors = fbresnet_augmentor(mode == 'train')

        # load the lmdb if we can find it
        base_dir = '/userhome/cs/u3003679/'
        lmdb_loc = os.path.join(base_dir, 'ILSVRC-{}.lmdb'.format(mode))
        #lmdb_loc = os.path.join(os.environ['IMAGENET'], 'ILSVRC-%s.lmdb'%mode)
        ds = LMDBSerializer.load(lmdb_loc, shuffle=shuffle)
        ds = LocallyShuffleData(ds, cache)

        # ds = td.LMDBDataPoint(ds)

        def f(dp):
            x, label = dp
            x = cv2.imdecode(x, cv2.IMREAD_COLOR)
            for aug in imagenet_augmentors:
                x = aug.augment(x)
            return x, label

        ds = MultiProcessMapDataZMQ(ds, num_proc=8, map_func=f)
        # ds = MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0)
        # ds = AugmentImageComponent(ds, imagenet_augmentors)

        # ds = td.PrefetchData(ds, 5000, 1)

        # ds = td.MapDataComponent(ds, lambda x: cv2.imdecode(x, cv2.IMREAD_COLOR), 0)
        # ds = td.AugmentImageComponent(ds, imagenet_augmentors)
        # ds = td.PrefetchDataZMQ(ds, num_workers)
        self.ds = BatchData(ds, batch_size)
        # self.ds = MultiProcessRunnerZMQ(self.ds, 4)
        self.ds.reset_state()

        self.batch_size = batch_size
        self.num_workers = num_workers
        self.device = device

    def __iter__(self):
        for x, y in self.ds:
            x = torch.ByteTensor(x).to(self.device)
            y = torch.IntTensor(y).to(self.device)
            # but once they're on the gpu, we'll need them in
            yield uint8_to_float(x), y.long()

    def __len__(self):
        return self.ds.size()