Beispiel #1
0
def main():
    # parse the command line arguments
    parser = NeonArgparser(__doc__)
    parser.add_argument('--output_path', required=True,
                        help='Output path used when training model')
    parser.add_argument('--w2v_path', required=False, default=None,
                        help='Path to GoogleNews w2v file for voab expansion.')
    parser.add_argument('--eval_data_path', required=False, default='./SICK_data',
                        help='Path to the SICK dataset for evaluating semantic relateness')
    parser.add_argument('--max_vocab_size', required=False, default=1000000,
                        help='Limit the vocabulary expansion to fit in GPU memory')
    parser.add_argument('--subset_pct', required=False, default=100,
                        help='subset of training dataset to use (use to retreive \
                        preprocessed data from training)')
    args = parser.parse_args(gen_be=True)

    # load vocab file from training
    _, vocab_file = load_data(args.data_dir, output_path=args.output_path,
                              subset_pct=float(args.subset_pct))
    vocab, _, _ = load_obj(vocab_file)

    vocab_size = len(vocab)
    neon_logger.display("\nVocab size from the dataset is: {}".format(vocab_size))

    index_from = 2  # 0: padding 1: oov
    vocab_size_layer = vocab_size + index_from
    max_len = 30

    # load trained model
    model_dict = load_obj(args.model_file)

    # Vocabulary expansion trick needs to pass the correct vocab set to evaluate (for tokenization)
    if args.w2v_path:
        neon_logger.display("Performing Vocabulary Expansion... Loading W2V...")
        w2v_vocab, w2v_vocab_size = get_w2v_vocab(args.w2v_path,
                                                  int(args.max_vocab_size), cache=True)

        vocab_size_layer = w2v_vocab_size + index_from
        model = load_sent_encoder(model_dict, expand_vocab=True, orig_vocab=vocab,
                                  w2v_vocab=w2v_vocab, w2v_path=args.w2v_path, use_recur_last=True)
        vocab = w2v_vocab
    else:
        # otherwise stick with original vocab size used to train the model
        model = load_sent_encoder(model_dict, use_recur_last=True)

    model.initialize(dataset=(max_len, 1))

    evaluate(model, vocab=vocab, data_path=args.eval_data_path, evaltest=True,
             vocab_size_layer=vocab_size_layer)
Beispiel #2
0
def main():
    # parse the command line arguments
    parser = NeonArgparser(__doc__)
    parser.add_argument('--output_path', required=True,
                        help='Output path used when training model')
    parser.add_argument('--w2v_path', required=False, default=None,
                        help='Path to GoogleNews w2v file for voab expansion.')
    parser.add_argument('--eval_data_path', required=False, default='./SICK_data',
                        help='Path to the SICK dataset for evaluating semantic relateness')
    parser.add_argument('--max_vocab_size', required=False, default=1000000,
                        help='Limit the vocabulary expansion to fit in GPU memory')
    parser.add_argument('--subset_pct', required=False, default=100,
                        help='subset of training dataset to use (use to retreive \
                        preprocessed data from training)')
    args = parser.parse_args(gen_be=True)

    # load vocab file from training
    _, vocab_file = load_data(args.data_dir, output_path=args.output_path,
                              subset_pct=float(args.subset_pct))
    vocab, _, _ = load_obj(vocab_file)

    vocab_size = len(vocab)
    neon_logger.display("\nVocab size from the dataset is: {}".format(vocab_size))

    index_from = 2  # 0: padding 1: oov
    vocab_size_layer = vocab_size + index_from
    max_len = 30

    # load trained model
    model_dict = load_obj(args.model_file)

    # Vocabulary expansion trick needs to pass the correct vocab set to evaluate (for tokenization)
    if args.w2v_path:
        neon_logger.display("Performing Vocabulary Expansion... Loading W2V...")
        w2v_vocab, w2v_vocab_size = get_w2v_vocab(args.w2v_path,
                                                  int(args.max_vocab_size), cache=True)

        vocab_size_layer = w2v_vocab_size + index_from
        model = load_sent_encoder(model_dict, expand_vocab=True, orig_vocab=vocab,
                                  w2v_vocab=w2v_vocab, w2v_path=args.w2v_path, use_recur_last=True)
        vocab = w2v_vocab
    else:
        # otherwise stick with original vocab size used to train the model
        model = load_sent_encoder(model_dict, use_recur_last=True)

    model.initialize(dataset=(max_len, 1))

    evaluate(model, vocab=vocab, data_path=args.eval_data_path, evaltest=True,
             vocab_size_layer=vocab_size_layer)
Beispiel #3
0
def load_vgg_weights(model, path):
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/'
    filename = 'VGG_ILSVRC_16_layers_fc_reduced_fused_conv_bias.p'
    size = 86046032

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print(
        'De-serializing the pre-trained VGG16 model with dilated convolutions...'
    )
    pdict = load_obj(filepath)

    model_layers = [l for l in model.layers.layers[0].layers]
    # convert source model into dictionary with layer name as keys
    src_layers = {
        layer['config']['name']: layer
        for layer in pdict['model']['config']['layers']
    }

    i = 0
    for layer in model_layers:
        if layer.classnm == 'Convolution_bias' and i < 15:
            # no states in above parameter file
            layer.load_weights(src_layers['Convolution_bias_' + str(i)],
                               load_states=False)
            print('{} loaded from source file'.format(layer.name))
            i += 1
        elif hasattr(layer, 'W'):
            print('Skipping {} layer'.format(layer.name))
Beispiel #4
0
def load_vgg_weights(model, path):
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/'
    filename = 'VGG_ILSVRC_16_layers_fc_reduced_fused_conv_bias.p'
    size = 86046032

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print('De-serializing the pre-trained VGG16 model with dilated convolutions...')
    pdict = load_obj(filepath)

    model_layers = [l for l in model.layers.layers[0].layers]
    # convert source model into dictionary with layer name as keys
    src_layers = {layer['config']['name']: layer for layer in pdict['model']['config']['layers']}

    i = 0
    for layer in model_layers:
        if layer.classnm == 'Convolution_bias' and i < 15:
            # no states in above parameter file
            layer.load_weights(src_layers['Convolution_bias_'+str(i)], load_states=False)
            print('{} loaded from source file'.format(layer.name))
            i += 1
        elif hasattr(layer, 'W'):
            print('Skipping {} layer'.format(layer.name))
Beispiel #5
0
def load_vgg_all_weights(model, path):
    # load a pre-trained VGG16 from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/VGG/'
    filename = 'VGG_D.p'
    size = 554227541

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print('De-serializing the pre-trained VGG16 model...')
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']

    i = 0
    for layer, ps in zip(param_layers, param_dict_list):
        i += 1
        if i == 43:
            break
        layer.load_weights(ps, load_states=True)
        print(layer.name + " <-- " + ps['config']['name'])

    # to load the fc6 and fc7 from caffe into neon fc layers after ROI pooling
    neon_fc_layers = model.layers.layers[2].layers[1].layers[0].layers[2:5] +\
        model.layers.layers[2].layers[1].layers[0].layers[6:9]
    vgg_fc_layers = param_dict_list[44:47] + param_dict_list[48:51]

    for layer, ps in zip(neon_fc_layers, vgg_fc_layers):
        layer.load_weights(ps, load_states=True)
        print(layer.name + " <-- " + ps['config']['name'])
Beispiel #6
0
def load_imagenet_weights(model, path):
    # load a pre-trained Alexnet from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/'
    filename = 'alexnet.p'
    size = 488808400

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print 'De-serializing the pre-trained Alexnet using ImageNet I1K ...'
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers_to_optimize]
    param_dict_list = pdict['layer_params_states']
    i = 0
    for layer, ps in zip(param_layers, param_dict_list):
        i = i+1
        print i, layer.name
        layer.set_params(ps)
        if 'states' in ps:
            layer.set_states(ps)
        if i == 10:
            print 'Only load the pre-trained weights up to conv5 layer of Alexnet'
            break
Beispiel #7
0
    def __init__(
        self, repo_dir, inner_size, do_transforms=True, rgb=True, multiview=False, set_name="train", subset_pct=100
    ):

        assert subset_pct > 0 and subset_pct <= 100, "subset_pct must be between 0 and 100"
        assert set_name in ["train", "validation"]
        self.set_name = set_name if set_name == "train" else "val"

        self.repo_dir = repo_dir
        self.inner_size = inner_size
        self.minibatch_size = self.be.bsz

        # Load from repo dataset_cache:
        try:
            cache_filepath = os.path.join(repo_dir, "dataset_cache.pkl")
            dataset_cache = load_obj(cache_filepath)
        except:
            raise IOError(
                "Cannot find dataset cache in %s.  Run batch_writer to preprocess the"
                "data and create batch files for imageset" % (repo_dir)
            )

        # Should have following defined:
        req_attributes = [
            "global_mean",
            "nclass",
            "val_start",
            "ntrain",
            "label_names",
            "train_nrec",
            "img_size",
            "nval",
            "train_start",
            "val_nrec",
            "label_dict",
            "batch_prefix",
        ]

        for r in req_attributes:
            if r not in dataset_cache:
                raise ValueError("Dataset cache missing required attribute %s" % (r))

        self.__dict__.update(dataset_cache)
        self.filename = os.path.join(repo_dir, self.batch_prefix)

        self.center = False if do_transforms else True
        self.flip = True if do_transforms else False
        self.rgb = rgb
        self.multiview = multiview
        self.label = "l_id"
        if isinstance(self.nclass, dict):
            self.nclass = self.nclass[self.label]

        # Rough percentage
        self.recs_available = getattr(self, self.set_name + "_nrec")
        self.macro_start = getattr(self, self.set_name + "_start")
        self.macros_available = getattr(self, "n" + self.set_name)
        self.ndata = int(self.recs_available * subset_pct / 100.0)

        self.start = 0
Beispiel #8
0
def load_imagenet_weights(model, path):
    # load a pre-trained Alexnet from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/alexnet/old/pre_v1.4.0/'
    filename = 'alexnet.p'
    size = 488808400

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print 'Loading the Alexnet pre-trained with ImageNet I1K from: ' + filepath
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers]

    param_dict_list = pdict['model']['config']['layers']
    skip_loading = False
    for i, layer in enumerate(param_layers):
        if not load_pre_trained_weight(i, layer):
            skip_loading = True
        if not skip_loading:
            ps = param_dict_list[i]
            print "Loading weights for:{} [src: {}]".format(
                layer.name, ps['config']['name'])
            layer.load_weights(ps, load_states=True)
        else:
            config_name = param_dict_list[i]['config']['name'] if i < len(
                param_dict_list) else ""
            print "Skipped loading weights for: {} [src: {}]".format(
                layer.name, config_name)

    return
Beispiel #9
0
def load_vgg_all_weights(model, path):
    # load a pre-trained VGG16 from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/VGG/'
    filename = 'VGG_D.p'
    size = 554227541

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print('De-serializing the pre-trained VGG16 model...')
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']

    i = 0
    for layer, ps in zip(param_layers, param_dict_list):
        i += 1
        if i == 43:
            break
        layer.load_weights(ps, load_states=True)
        print(layer.name + " <-- " + ps['config']['name'])

    # to load the fc6 and fc7 from caffe into neon fc layers after ROI pooling
    neon_fc_layers = model.layers.layers[2].layers[1].layers[0].layers[2:5] +\
        model.layers.layers[2].layers[1].layers[0].layers[6:9]
    vgg_fc_layers = param_dict_list[44:47] + param_dict_list[48:51]

    for layer, ps in zip(neon_fc_layers, vgg_fc_layers):
        layer.load_weights(ps, load_states=True)
        print(layer.name + " <-- " + ps['config']['name'])
    def __init__(self, prm_path=default_prm_path, layer=-4, backend='gpu', cores=32):
        print 'Log::Vectorizer:: Initialising Vectorizer'
        self.layer = layer

        if not os.path.isfile(prm_path):
            raise Exception('FileNotFound: Cannot find the file %s' % prm_path)

        print 'Log::Vectorizer:: Generating backend, backend: {}'.format(backend)
        if backend == 'cpu':
            cores = 1
        self.cores = cores
        self.generated_backend_object = gen_backend(batch_size=self.cores, backend=backend)
        self.backend = backend

        print 'Log::Vectorizer:: Loading model from %s' % prm_path
        model_dict = load_obj(prm_path)

        print 'Log::Vectorizer:: Generating model with loaded file'
        self.model = Model(model_dict)

        # now we are going to extract the middle patch from the image,
        # based on the size used to train the model
        self.patch_height = model_dict['train_input_shape'][1]
        self.patch_width = model_dict['train_input_shape'][2]

        print 'Log::Vectorizer:: Initialising Model'
        # initialise the model so that internally the arrays are allocated to the correct size
        self.model.initialize(model_dict['train_input_shape'])
        print 'Log::Vectorizer:: DONE!'
Beispiel #11
0
def load_imagenet_weights(model, path):
    # load a pre-trained Alexnet from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/alexnet/old/pre_v1.4.0/'
    filename = 'alexnet.p'
    size = 488808400

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print 'Loading the Alexnet pre-trained with ImageNet I1K from: ' + filepath
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers]
        
    param_dict_list = pdict['model']['config']['layers']
    skip_loading = False
    for i, layer in enumerate(param_layers):
        if not load_pre_trained_weight(i, layer):
            skip_loading = True
        if not skip_loading:
            ps = param_dict_list[i]
            print "Loading weights for:{} [src: {}]".format(layer.name, ps['config']['name'])
            layer.load_weights(ps, load_states=True)
        else:
            config_name = param_dict_list[i]['config']['name'] if i < len(param_dict_list) else ""
            print "Skipped loading weights for: {} [src: {}]".format(layer.name, config_name)
        
    return
Beispiel #12
0
def load_vgg_weights(model, path):
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/'
    filename = 'VGG_ILSVRC_16_layers_fc_reduced.p'
    size = 244190212

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print(
        'De-serializing the pre-trained VGG16 model with dilated convolutions...'
    )
    pdict = load_obj(filepath)

    model_layers = [l for l in model.layers.layers[0].layers]

    # convert source model into dictionary with layer name as keys
    src_layers = {
        layer['config']['name']: layer
        for layer in pdict['model']['config']['layers']
    }

    for layer in model_layers:
        if layer.name in src_layers.keys():
            layer.load_weights(src_layers[layer.name], load_states=True)
            print('{} loaded from source file'.format(layer.name))
        elif hasattr(layer, 'W'):
            print('Skipping {} layer'.format(layer.name))
Beispiel #13
0
def load_imagenet_weights(model, path):
    # load a pre-trained Alexnet from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/'
    filename = 'alexnet.p'
    size = 488808400

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print 'De-serializing the pre-trained Alexnet using ImageNet I1K ...'
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers_to_optimize]
    param_dict_list = pdict['layer_params_states']
    i = 0
    for layer, ps in zip(param_layers, param_dict_list):
        i = i + 1
        print i, layer.name
        layer.set_params(ps)
        if 'states' in ps:
            layer.set_states(ps)
        if i == 10:
            print 'Only load the pre-trained weights up to conv5 layer of Alexnet'
            break
Beispiel #14
0
def deserialize(fn, datasets=None, inference=False):
    """
    Helper function to load all objects from a serialized file,
    this includes callbacks and datasets as well as the model, layers,
    etc.

    Arguments:
        datasets (DataSet, optional): If the dataset is not serialized
                                      in the file it can be passed in
                                      as an argument.  This will also
                                      override any dataset in the serialized
                                      file
        inference (bool, optional): if true only the weights will be loaded, not
                                    the states
    Returns:
        Model: the model object
        Dataset: the data set object
        Callback: the callbacks
    """
    config_dict = load_obj(fn)

    if datasets is not None:
        logger.warn('Ignoring datasets serialized in archive file %s' % fn)
    elif 'datasets' in config_dict:
        ds_cls = load_class(config_dict['datasets']['type'])
        dataset = ds_cls.gen_class(config_dict['datasets']['config'])
        datasets = dataset.gen_iterators()

    if 'train' in datasets:
        data_iter = datasets['train']
    else:
        key = list(datasets.keys())[0]
        data_iter = datasets[key]
        logger.warn('Could not find training set iterator'
                    'using %s instead' % key)

    model = Model(config_dict, data_iter)

    callbacks = None
    if 'callbacks' in config_dict:
        # run through the callbacks looking for dataset objects
        # replace them with the corresponding data set above
        cbs = config_dict['callbacks']['callbacks']
        for cb in cbs:
            if 'config' not in cb:
                cb['config'] = {}
            for arg in cb['config']:
                if type(cb['config']
                        [arg]) is dict and 'type' in cb['config'][arg]:
                    if cb['config'][arg]['type'] == 'Data':
                        key = cb['config'][arg]['name']
                        if key in datasets:
                            cb['config'][arg] = datasets[key]
                        else:
                            cb['config'][arg] = None
        # now we can generate the callbacks
        callbacks = Callbacks.load_callbacks(config_dict['callbacks'], model)
    return (model, dataset, callbacks)
Beispiel #15
0
    def __init__(self,
                 repo_dir,
                 inner_size,
                 do_transforms=True,
                 rgb=True,
                 multiview=False,
                 set_name='train',
                 subset_pct=100):

        assert (subset_pct > 0
                and subset_pct <= 100), "subset_pct must be between 0 and 100"
        assert (set_name in ['train', 'validation'])
        self.set_name = set_name if set_name == 'train' else 'val'

        self.repo_dir = repo_dir
        self.inner_size = inner_size
        self.minibatch_size = self.be.bsz

        # Load from repo dataset_cache:
        try:
            cache_filepath = os.path.join(repo_dir, 'dataset_cache.pkl')
            dataset_cache = load_obj(cache_filepath)
        except IOError:
            raise IOError(
                "Cannot find '%s/dataset_cache.pkl'. Run batch_writer to "
                "preprocess the data and create batch files for imageset" %
                (repo_dir))

        # Should have following defined:
        req_attributes = [
            'global_mean', 'nclass', 'val_start', 'ntrain', 'label_names',
            'train_nrec', 'img_size', 'nval', 'train_start', 'val_nrec',
            'label_dict', 'batch_prefix'
        ]

        for r in req_attributes:
            if r not in dataset_cache:
                raise ValueError(
                    "Dataset cache missing required attribute %s" % (r))

        self.__dict__.update(dataset_cache)
        self.filename = os.path.join(repo_dir, self.batch_prefix)

        self.center = False if do_transforms else True
        self.flip = True if do_transforms else False
        self.rgb = rgb
        self.multiview = multiview
        self.label = 'l_id'
        if isinstance(self.nclass, dict):
            self.nclass = self.nclass[self.label]

        # Rough percentage
        self.recs_available = getattr(self, self.set_name + '_nrec')
        self.macro_start = getattr(self, self.set_name + '_start')
        self.macros_available = getattr(self, 'n' + self.set_name)
        self.ndata = int(self.recs_available * subset_pct / 100.)

        self.start = 0
Beispiel #16
0
def deserialize(fn, datasets=None, inference=False):
    """
    Helper function to load all objects from a serialized file,
    this includes callbacks and datasets as well as the model, layers,
    etc.

    Arguments:
        datasets (DataSet, optional): If the dataset is not serialized
                                      in the file it can be passed in
                                      as an argument.  This will also
                                      override any dataset in the serialized
                                      file
        inference (bool, optional): if true only the weights will be loaded, not
                                    the states
    Returns:
        Model: the model object
        Dataset: the data set object
        Callback: the callbacks
    """
    config_dict = load_obj(fn)

    if datasets is not None:
        logger.warn('Ignoring datasets serialized in archive file %s' % fn)
    elif 'datasets' in config_dict:
        ds_cls = load_class(config_dict['datasets']['type'])
        dataset = ds_cls.gen_class(config_dict['datasets']['config'])
        datasets = dataset.gen_iterators()

    if 'train' in datasets:
        data_iter = datasets['train']
    else:
        key = datasets.keys()[0]
        data_iter = datasets[key]
        logger.warn('Could not find training set iterator'
                    'using %s instead' % key)

    model = Model(config_dict, data_iter)

    callbacks = None
    if 'callbacks' in config_dict:
        # run through the callbacks looking for dataset objects
        # replace them with the corresponding data set above
        cbs = config_dict['callbacks']['callbacks']
        for cb in cbs:
            if 'config' not in cb:
                cb['config'] = {}
            for arg in cb['config']:
                if type(cb['config'][arg]) is dict and 'type' in cb['config'][arg]:
                    if cb['config'][arg]['type'] == 'Data':
                        key = cb['config'][arg]['name']
                        if key in datasets:
                            cb['config'][arg] = datasets[key]
                        else:
                            cb['config'][arg] = None
        # now we can generate the callbacks
        callbacks = Callbacks.load_callbacks(config_dict['callbacks'], model)
    return (model, dataset, callbacks)
Beispiel #17
0
 def load_callbacks(cls, cdict, model, data=[]):
     if type(cdict) is str:
         cdict = load_obj(cdict)
     callbacks = cls(model, output_file=cdict['output_file'])
     callbacks.epoch_marker = cdict['epoch_marker']
     callbacks.callbacks = []
     for cb in cdict['callbacks']:
         module = load_class(cb['type'])
         callbacks.callbacks.append(module(**cb['config']))
     return callbacks
Beispiel #18
0
 def load_callbacks(cls, cdict, model, data=[]):
     if type(cdict) is str:
         cdict = load_obj(cdict)
     callbacks = cls(model, output_file=cdict["output_file"])
     callbacks.epoch_marker = cdict["epoch_marker"]
     callbacks.callbacks = []
     for cb in cdict["callbacks"]:
         module = load_class(cb["type"])
         callbacks.callbacks.append(module(**cb["config"]))
     return callbacks
Beispiel #19
0
    def run(self):
        load_dir = self.image_dir
        train_tar = os.path.join(load_dir, 'ILSVRC2012_img_train.tar')
        validation_tar = os.path.join(load_dir, 'ILSVRC2012_img_val.tar')

        for infile in (train_tar, validation_tar):
            if not os.path.exists(infile):
                raise IOError(
                    infile +
                    " not found. Please ensure you have ImageNet downloaded."
                    "More info here: http://www.image-net.org/download-imageurls"
                )
        # download our version of the metadata
        meta_dir = load_i1kmeta(self.out_dir)
        meta_file = os.path.join(meta_dir, 'neon_ILSVRC2012_devmeta.pkl')
        self.meta = load_obj(meta_file)
        self.__dict__.update(
            self.meta)  # get label_dict, label_names, global_mean from meta
        self.global_mean = np.mean(self.global_mean.reshape(3, -1),
                                   axis=1).reshape(3, 1)[::-1]

        np.random.seed(0)
        with tarfile.open(train_tar) as tf:
            s_sets = tf.getmembers()
            s_tars = [tarfile.open(fileobj=tf.extractfile(s)) for s in s_sets]
            print('Building trainset list from synset tars.')
            t_jpegfiles = []
            totalsz = len(s_tars)
            for i, st in enumerate(s_tars):
                if i % 100 == 0:
                    print("%d%% ..." % (int(round((100.0 * i) / totalsz))))
                t_jpegfiles += [st.extractfile(m) for m in st.getmembers()]
                st.close()
            print("Done loading")
            np.random.shuffle(t_jpegfiles)
            train_labels = [[self.label_dict[j.name[:9]]] for j in t_jpegfiles]
            self.train_nrec = len(t_jpegfiles)
            self.ntrain = -(-self.train_nrec // self.macro_size)
            self.nclass = {'l_id': 1000}
            self.train_start = 0
            train_labels = {'l_id': np.array(train_labels, dtype=np.int32)}
            self.write_batches('train', self.train_start, train_labels,
                               t_jpegfiles)

        with tarfile.open(validation_tar) as tf:
            jpegfiles = sorted([tf.extractfile(m) for m in tf.getmembers()],
                               key=lambda x: x.name)
            self.val_nrec = len(jpegfiles)
            self.nval = -(-self.val_nrec // self.macro_size)
            self.val_start = 10**int(np.log10(self.ntrain) + 1)
            val_labels = {
                'l_id': np.array(self.val_ground_truth, dtype=np.int32)
            }
            self.write_batches('val', self.val_start, val_labels, jpegfiles)
        self.save_meta()
Beispiel #20
0
    def read_images(self, split):
        """
        Read sentences and image features from pickled dict

        Args:
            split (str): test or train split
        """
        data_path = os.path.join(self.path, 'features.pkl.gz')
        self.dataset = load_obj(data_path)
        self.sent_data = self.dataset['sents'][split]
        self.features = self.dataset['feats']
Beispiel #21
0
    def read_images(self, split):
        """
        Read sentences and image features from pickled dict

        Args:
            split (str): test or train split
        """
        data_path = os.path.join(self.path, "features.pkl.gz")
        self.dataset = load_obj(data_path)
        self.sent_data = self.dataset["sents"][split]
        self.features = self.dataset["feats"]
Beispiel #22
0
    def load_params(self, param_path):
        """
        Loads the model parameters (per layer weights, epochs run, optimizer
        states) saved in param_path from serialize().

        Arguments:
            param_path (str): File containing serialized python dict with layer
                              weights and states.
        """
        pdict = load_obj(param_path)
        self.deserialize(pdict)
        logger.info('Model weights loaded from %s', param_path)
Beispiel #23
0
    def read_images(self, split):
        """
        Read sentences and image features from pickled dict

        Args:
            split (str): test or train split
        """
        data_path = os.path.join(self.path, 'features.pkl.gz')
        from neon.util.persist import load_obj
        self.dataset = load_obj(data_path)
        self.sent_data = self.dataset['sents'][split]
        self.features = self.dataset['feats']
Beispiel #24
0
    def __init__(self, repo_dir, inner_size,
                 do_transforms=True, rgb=True, multiview=False,
                 set_name='train', subset_pct=100):

        assert(subset_pct > 0 and subset_pct <= 100), "subset_pct must be between 0 and 100"
        assert(set_name in ['train', 'validation'])
        self.set_name = set_name if set_name == 'train' else 'val'

        self.repo_dir = repo_dir
        self.inner_size = inner_size
        self.minibatch_size = self.be.bsz

        # Load from repo dataset_cache:
        try:
            cache_filepath = os.path.join(repo_dir, 'dataset_cache.pkl')
            dataset_cache = load_obj(cache_filepath)
        except IOError:
            raise IOError("Cannot find '%s/dataset_cache.pkl'. Run batch_writer to "
                          "preprocess the data and create batch files for imageset"
                          % (repo_dir))

        # Should have following defined:
        req_attributes = ['global_mean', 'nclass', 'val_start', 'ntrain', 'label_names',
                          'train_nrec', 'img_size', 'nval', 'train_start', 'val_nrec',
                          'label_dict', 'batch_prefix']

        for r in req_attributes:
            if r not in dataset_cache:
                raise ValueError("Dataset cache missing required attribute %s" % (r))

        global_mean = dataset_cache['global_mean']
        if global_mean is not None and global_mean.shape != (3, 1):
            raise ValueError('Dataset cache global mean is not in the proper format. Run '
                             'neon/util/update_dataset_cache.py utility on %s.' % cache_filepath)

        self.__dict__.update(dataset_cache)
        self.filename = os.path.join(repo_dir, self.batch_prefix)

        self.center = False if do_transforms else True
        self.flip = True if do_transforms else False
        self.rgb = rgb
        self.multiview = multiview
        self.label = 'l_id'
        if isinstance(self.nclass, dict):
            self.nclass = self.nclass[self.label]

        # Rough percentage
        self.recs_available = getattr(self, self.set_name + '_nrec')
        self.macro_start = getattr(self, self.set_name + '_start')
        self.macros_available = getattr(self, 'n' + self.set_name)
        self.ndata = int(self.recs_available * subset_pct / 100.)

        self.start = 0
Beispiel #25
0
    def load_params(self, param_path):
        """
        Loads the model parameters (per layer weights, epochs run, optimizer
        states) saved in param_path from serialize().

        Arguments:
            param_path (str): File containing serialized python dict with layer
                              weights and states.
        """
        pdict = load_obj(param_path)
        self.deserialize(pdict, weights_only=True)
        logger.info('Model weights loaded from %s', param_path)
Beispiel #26
0
def load_caffe_weights(model, file_path):
    pdict = load_obj(file_path)['params']

    #  we match by name with the caffe blobs
    for (pos, layer) in enumerate(model.layers.layers):
        if pos == 1:  # skip conv4_3
            continue
        load_weights(layer.layers, pdict)

    # we handle the tree-in-tree next
    conv4_3_loc = model.layers.layers[1].layers[1].layers[0].layers
    conv4_3_conf = model.layers.layers[1].layers[1].layers[1].layers
    load_weights(conv4_3_loc, pdict)
    load_weights(conv4_3_conf, pdict)
Beispiel #27
0
def load_caffe_weights(model, file_path):
    pdict = load_obj(file_path)['params']

    #  we match by name with the caffe blobs
    for (pos, layer) in enumerate(model.layers.layers):
        if pos == 1:  # skip conv4_3
            continue
        load_weights(layer.layers, pdict)

    # we handle the tree-in-tree next
    conv4_3_loc = model.layers.layers[1].layers[1].layers[0].layers
    conv4_3_conf = model.layers.layers[1].layers[1].layers[1].layers
    load_weights(conv4_3_loc, pdict)
    load_weights(conv4_3_conf, pdict)
Beispiel #28
0
    def run(self):
        load_dir = self.image_dir
        train_tar = os.path.join(load_dir, "ILSVRC2012_img_train.tar")
        validation_tar = os.path.join(load_dir, "ILSVRC2012_img_val.tar")

        for infile in (train_tar, validation_tar):
            if not os.path.exists(infile):
                raise IOError(
                    infile + " not found. Please ensure you have ImageNet downloaded."
                    "More info here: http://www.image-net.org/download-imageurls"
                )
        # download our version of the metadata
        meta_dir = load_i1kmeta(self.out_dir)
        meta_file = os.path.join(meta_dir, "neon_ILSVRC2012_devmeta.pkl")
        self.meta = load_obj(meta_file)
        self.__dict__.update(self.meta)  # get label_dict, label_names, global_mean from meta
        self.global_mean = np.mean(self.global_mean.reshape(3, -1), axis=1).reshape(3, 1)[::-1]

        np.random.seed(0)
        with tarfile.open(train_tar) as tf:
            s_sets = tf.getmembers()
            s_tars = [tarfile.open(fileobj=tf.extractfile(s)) for s in s_sets]
            print("Building trainset list from synset tars.")
            t_jpegfiles = []
            totalsz = len(s_tars)
            for i, st in enumerate(s_tars):
                if i % 100 == 0:
                    print("%d%% ..." % (int(round((100.0 * i) / totalsz))))
                t_jpegfiles += [st.extractfile(m) for m in st.getmembers()]
                st.close()
            print("Done loading")
            np.random.shuffle(t_jpegfiles)
            train_labels = [[self.label_dict[j.name[:9]]] for j in t_jpegfiles]
            self.train_nrec = len(t_jpegfiles)
            self.ntrain = -(-self.train_nrec // self.macro_size)
            self.nclass = {"l_id": 1000}
            self.train_start = 0
            train_labels = {"l_id": np.array(train_labels, dtype=np.int32)}
            self.write_batches("train", self.train_start, train_labels, t_jpegfiles)

        with tarfile.open(validation_tar) as tf:
            jpegfiles = sorted([tf.extractfile(m) for m in tf.getmembers()], key=lambda x: x.name)
            self.val_nrec = len(jpegfiles)
            self.nval = -(-self.val_nrec // self.macro_size)
            self.val_start = 10 ** int(np.log10(self.ntrain) + 1)
            val_labels = {"l_id": np.array(self.val_ground_truth, dtype=np.int32)}
            self.write_batches("val", self.val_start, val_labels, jpegfiles)
        self.save_meta()
Beispiel #29
0
    def load_params(self, param_path, load_states=True):
        """
        Loads the model parameters (per layer weights, epochs run, optimizer
        states) saved in param_path from serialize().

        Arguments:
            param_path (str): File containing serialized python dict with layer
                              weights and states.
            load_states (bool):  if False, then only the weights will be loaded
                                 into a model in which the layers have already been
                                 created, otherwise will (re)create the layers from
                                 the serialized parameters and set the learning
                                 states as well
        """
        self.deserialize(load_obj(param_path), load_states=load_states)
        logger.info('Model weights loaded from %s', param_path)
Beispiel #30
0
    def load_params(self, param_path, load_states=True):
        """
        Loads the model parameters (per layer weights, epochs run, optimizer
        states) saved in param_path from serialize().

        Arguments:
            param_path (str): File containing serialized python dict with layer
                              weights and states.
            load_states (bool):  if False, then only the weights will be loaded
                                 into a model in which the layers have already been
                                 created, otherwise will (re)create the layers from
                                 the serialized parameters and set the learning
                                 states as well
        """
        self.deserialize(load_obj(param_path), load_states=load_states)
        logger.info('Model weights loaded from %s', param_path)
Beispiel #31
0
    def load_weights(self, weight_path):
        """
        Loads the layer weights saved in weight_path from serialize().

        Arguments:
            weight_path (str): File containing serialized python dict with layer
                               weights and states.
        """
        pdict = load_obj(weight_path)
        self.epoch_index = pdict['epoch_index']

        param_layers = [l for l in self.layers_to_optimize]
        param_dict_list = pdict['layer_params_states']
        for l, ps in zip(param_layers, param_dict_list):
            l.set_params(ps['params'])
            if 'states' in ps:
                l.set_states(ps['states'])
Beispiel #32
0
    def load_weights(self, weight_path):
        """
        Loads the layer weights saved in weight_path from serialize().

        Arguments:
            weight_path (str): File containing serialized python dict with layer
                               weights and states.
        """
        pdict = load_obj(weight_path)
        self.epoch_index = pdict["epoch_index"]

        param_layers = [l for l in self.layers_to_optimize]
        param_dict_list = pdict["layer_params_states"]
        for l, ps in zip(param_layers, param_dict_list):
            l.set_params(ps["params"])
            if "states" in ps:
                l.set_states(ps["states"])
Beispiel #33
0
def load_vgg_weights(model, path):
    # load a pre-trained VGG16 from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/VGG/'
    filename = 'VGG_D_Conv.p'
    size = 169645138

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    neon_logger.display('De-serializing the pre-trained VGG16 model...')
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']
    for layer, ps in zip(param_layers, param_dict_list):
        neon_logger.display("{}".format(layer.name, ps['config']['name']))
        layer.load_weights(ps, load_states=True)
Beispiel #34
0
def load_vgg_weights(model, path):
    # load a pre-trained VGG16 from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/VGG/'
    filename = 'VGG_D_Conv.p'
    size = 169645138

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    neon_logger.display('De-serializing the pre-trained VGG16 model...')
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']
    for layer, ps in zip(param_layers, param_dict_list):
        neon_logger.display("{}".format(layer.name, ps['config']['name']))
        layer.load_weights(ps, load_states=True)
Beispiel #35
0
    def load_weights(self, weight_path):
        """
        Loads the layer weights saved in weight_path from serialize().

        Arguments:
            weight_path (str): File containing serialized python dict with layer
                               weights and states.
        """
        pdict = load_obj(weight_path)

        self.epoch_index = pdict['epoch_index']

        param_layers = [l for l in self.layers_to_optimize]
        param_dict_list = pdict['layer_params_states']
        for l, ps in zip(param_layers, param_dict_list):
            l.set_params(ps['params'])
            if 'states' in ps:
                l.set_states(ps['states'])

        logger.info('Model weights loaded from %s', weight_path)
Beispiel #36
0
def load_imagenet_weights(model, path):
    # load a pre-trained Alexnet from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/alexnet/'
    filename = 'alexnet.p'
    size = 488808400

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print 'De-serializing the pre-trained Alexnet using ImageNet I1K ...'
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']
    for layer, ps in zip(param_layers, param_dict_list):
        print layer.name, ps['config']['name']
        layer.load_weights(ps, load_states=True)
        if ps['config']['name'] == 'Pooling_2':
            print 'Only load the pre-trained weights up to conv5 layer of Alexnet'
            break
Beispiel #37
0
def load_imagenet_weights(model, path):
    # load a pre-trained Alexnet from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/alexnet/'
    filename = 'alexnet.p'
    size = 488808400

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print 'De-serializing the pre-trained Alexnet using ImageNet I1K ...'
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']
    for layer, ps in zip(param_layers, param_dict_list):
        print layer.name, ps['config']['name']
        layer.load_weights(ps, load_states=True)
        if ps['config']['name'] == 'Pooling_2':
            print 'Only load the pre-trained weights up to conv5 layer of Alexnet'
            break
Beispiel #38
0
def get_w2v_vocab(fname, max_vocab_size, cache=True):
    """
    Get ordered dict of vocab from google word2vec
    """
    if cache:
        cache_fname = fname.split('.')[0] + ".vocab"

        if os.path.isfile(cache_fname):
            vocab, vocab_size = load_obj(cache_fname)
            neon_logger.display(
                "Word2Vec vocab cached, size is: {}".format(vocab_size))
            return vocab, vocab_size

    with open(fname, 'rb') as f:
        header = f.readline()
        vocab_size, embed_dim = map(int, header.split())
        binary_len = np.dtype('float32').itemsize * embed_dim

        neon_logger.display("Word2Vec vocab size is: {}".format(vocab_size))
        vocab_size = min(max_vocab_size, vocab_size)
        neon_logger.display("Reducing vocab size to: {}".format(vocab_size))

        vocab = OrderedDict()

        for i, line in enumerate(range(vocab_size)):
            word = []
            while True:
                ch = f.read(1)
                if ch == b' ':
                    word = (b''.join(word)).decode('utf-8')
                    break
                if ch != b'\n':
                    word.append(ch)
            f.read(binary_len)
            vocab[word] = i

    if cache:
        save_obj((vocab, vocab_size), cache_fname)

    return vocab, vocab_size
Beispiel #39
0
def load_vgg_weights(model, path):
    # load a pre-trained VGG16 from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/VGG/'
    filename = 'VGG_D.p'
    size = 554227541

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print 'De-serializing the pre-trained VGG16 model...'
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']
    i = 0
    for layer, ps in zip(param_layers, param_dict_list):
        i += 1
        print layer.name, ps['config']['name']
        layer.load_weights(ps, load_states=True)
        if i == 43:
            break
Beispiel #40
0
def get_w2v_vocab(fname, max_vocab_size, cache=True):
    """
    Get ordered dict of vocab from google word2vec
    """
    if cache:
        cache_fname = fname.split('.')[0] + ".vocab"

        if os.path.isfile(cache_fname):
            vocab, vocab_size = load_obj(cache_fname)
            neon_logger.display("Word2Vec vocab cached, size is: {}".format(vocab_size))
            return vocab, vocab_size

    with open(fname, 'rb') as f:
        header = f.readline()
        vocab_size, embed_dim = map(int, header.split())
        binary_len = np.dtype('float32').itemsize * embed_dim

        neon_logger.display("Word2Vec vocab size is: {}".format(vocab_size))
        vocab_size = min(max_vocab_size, vocab_size)
        neon_logger.display("Reducing vocab size to: {}".format(vocab_size))

        vocab = OrderedDict()

        for i, line in enumerate(range(vocab_size)):
            word = []
            while True:
                ch = f.read(1)
                if ch == b' ':
                    word = (b''.join(word)).decode('utf-8')
                    break
                if ch != b'\n':
                    word.append(ch)
            f.read(binary_len)
            vocab[word] = i

    if cache:
        save_obj((vocab, vocab_size), cache_fname)

    return vocab, vocab_size
Beispiel #41
0
def load_vgg_weights(model, path):
    # load a pre-trained VGG16 from Neon model zoo to the local
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/VGG/'
    filename = 'VGG_D.p'
    size = 554227541

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print 'De-serializing the pre-trained VGG16 model...'
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']
    i = 0
    for layer, ps in zip(param_layers, param_dict_list):
        i += 1
        print layer.name, ps['config']['name']
        layer.load_weights(ps, load_states=True)
        if i == 43:
            break
Beispiel #42
0
def load_vgg_weights(model, path):
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/VGG/'
    filename = 'VGG_D_Conv_fused_conv_bias.p'
    size = 58867537

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print('De-serializing the pre-trained VGG16 model...')
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']

    i = 0
    for layer, ps in zip(param_layers, param_dict_list):
        # finished loading param_dict_list[00 - 29] and param_layers[00-29]
        if i == 30:
            break
        layer.load_weights(ps, load_states=False)
        i += 1
        print(layer.name + " <-- " + ps['config']['name'])
Beispiel #43
0
def load_vgg_weights(model, path):
    url = 'https://s3-us-west-1.amazonaws.com/nervana-modelzoo/VGG/'
    filename = 'VGG_D_Conv_fused_conv_bias.p'
    size = 58867537

    workdir, filepath = Dataset._valid_path_append(path, '', filename)
    if not os.path.exists(filepath):
        Dataset.fetch_dataset(url, filename, filepath, size)

    print('De-serializing the pre-trained VGG16 model...')
    pdict = load_obj(filepath)

    param_layers = [l for l in model.layers.layers[0].layers]
    param_dict_list = pdict['model']['config']['layers']

    i = 0
    for layer, ps in zip(param_layers, param_dict_list):
        # finished loading param_dict_list[00 - 29] and param_layers[00-29]
        if i == 30:
            break
        layer.load_weights(ps, load_states=False)
        i += 1
        print(layer.name + " <-- " + ps['config']['name'])
Beispiel #44
0
 def __init__(self, pdict):
     if type(pdict) is str:
         pdict = load_obj(pdict)
     super(ModelDescription, self).__init__(pdict)
Beispiel #45
0
#!/usr/bin/env python
# ----------------------------------------------------------------------------
# Copyright 2016 Nervana Systems Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ----------------------------------------------------------------------------
from neon.util.argparser import NeonArgparser
from neon.util.persist import load_obj
from neon.transforms import Misclassification
from neon.models import Model
from neon.data import ImageLoader

# parse the command line arguments (generates the backend)
parser = NeonArgparser(__doc__)
args = parser.parse_args()

# setup data provider
test_set = ImageLoader(set_name='validation', repo_dir=args.data_dir,
                       inner_size=32, scale_range=40, do_transforms=False)
model = Model(load_obj(args.model_file), test_set)
print 'Accuracy: %.1f %% (Top-1)' % (1.0-model.eval(test_set, metric=Misclassification())*100)
Beispiel #46
0
    def __init__(self, path='.', n_mb=None, img_per_batch=None, conv_size=None,
                 rpn_rois_per_img=None, frcn_rois_per_img=None, add_flipped=False,
                 shuffle=False, deterministic=False, rebuild_cache=False, subset_pct=100,
                 mock_db=None):
        self.batch_index = 0
        self.path = path
        self.mock_db = mock_db

        # how many ROIs per image
        self.rois_per_img = rpn_rois_per_img if rpn_rois_per_img else self.RPN_ROI_PER_IMAGE
        self.img_per_batch = img_per_batch if img_per_batch else self.IMG_PER_BATCH
        self.rois_per_batch = self.rois_per_img * self.img_per_batch

        # how many ROIs to use to train frcnn
        self.frcn_rois_per_img = frcn_rois_per_img if frcn_rois_per_img \
            else self.FRCNN_ROI_PER_IMAGE

        assert self.img_per_batch == 1, "Only a minibatch of 1 is supported."

        self.num_classes = len(self.CLASSES)
        self._class_to_index = dict(list(zip(self.CLASSES, list(range(self.num_classes)))))

        # shape of the final conv layer
        if conv_size:
            self._conv_size = conv_size
        else:
            self._conv_size = int(np.floor(self.MAX_SIZE * self.SCALE))
        self._feat_stride = 1 / float(self.SCALE)
        self._num_scales = len(self.SCALES) * len(self.RATIOS)
        self._total_anchors = self._conv_size * self._conv_size * self._num_scales
        self.shuffle = shuffle
        self.deterministic = deterministic
        self.add_flipped = add_flipped

        # load the configure the dataset paths
        self.config = self.load_data()

        # annotation metadata
        self._annotation_file_ext = '.xml'
        self._annotation_obj_tag = 'object'
        self._annotation_class_tag = 'name'
        self._annotation_xmin_tag = 'xmin'
        self._annotation_xmax_tag = 'xmax'
        self._annotation_ymin_tag = 'ymin'
        self._annotation_ymax_tag = 'ymax'

        # self.rois_per_batch is 128 (2*64) ROIs
        # But the image path batch size is self.img_per_batch
        # need to control the batch size here
        assert self.img_per_batch is 1, "Only a batch size of 1 image is supported"

        neon_logger.display("Backend batchsize is changed to be {} "
                            "from Object Localization dataset".format(
                             self.img_per_batch))

        self.be.bsz = self.img_per_batch

        # 0. allocate buffers
        self.allocate()

        if not self.mock_db:
            # 1. read image index file
            assert os.path.exists(self.config['image_path']), \
                'Image index file does not exist: {}'.format(self.config['image_path'])
            with open(self.config['index_path']) as f:
                self.image_index = [x.strip() for x in f.readlines()]

            num_images = len(self.image_index)
            self.num_image_entries = num_images * 2 if self.add_flipped else num_images
            self.ndata = self.num_image_entries * self.rois_per_img
        else:
            self.num_image_entries = 1
            self.ndata = self.num_image_entries * self.rois_per_img

        assert (subset_pct > 0 and subset_pct <= 100), ('subset_pct must be between 0 and 100')

        if n_mb is not None:
            self.nbatches = n_mb
        else:
            self.nbatches = int(self.num_image_entries / self.img_per_batch * subset_pct / 100)

        self.cache_file = self.config['cache_path']

        if os.path.exists(self.cache_file) and not rebuild_cache and not self.mock_db:
            self.roi_db = load_obj(self.cache_file)
            neon_logger.display('ROI dataset loaded from file {}'.format(self.cache_file))

        elif not self.mock_db:
            # 2. read object Annotations (XML)
            roi_db = self.load_roi_groundtruth()

            if(self.add_flipped):
                roi_db = self.add_flipped_db(roi_db)

            # 3. construct acnhor targets
            self.roi_db = self.add_anchors(roi_db)

            if NORMALIZE_BBOX_TARGETS:
                # 4. normalize bbox targets by class
                self.roi_db = self.normalize_bbox_targets(self.roi_db)

            save_obj(self.roi_db, self.cache_file)
            neon_logger.display('wrote ROI dataset to {}'.format(self.cache_file))

        else:
            assert self.mock_db is not None
            roi_db = [self.mock_db]
            self.roi_db = self.add_anchors(roi_db)

        # 4. map anchors back to full canvas.
        # This is neccessary because the network outputs reflect the full canvas.
        # We cache the files in the unmapped state (above) to save memory.
        self.roi_db = unmap(self.roi_db)
Beispiel #47
0
    def configure(self, repo_dir, inner_size, do_transforms,
                  rgb, multiview, set_name, subset_pct, macro):
        """
        Set up all dataset config options.
        """
        assert (subset_pct > 0 and subset_pct <= 100), (
            'subset_pct must be between 0 and 100')
        assert(set_name in ['train', 'validation'])
        self.set_name = set_name if set_name == 'train' else 'val'

        self.repo_dir = repo_dir
        self.inner_size = inner_size
        self.minibatch_size = self.be.bsz

        self.center = not do_transforms
        self.flip = do_transforms
        self.rgb = rgb
        self.multiview = multiview
        self.start = 0
        self.macro = macro

        if not macro:
            self.filename = os.path.join(repo_dir, 'filelist.txt')
            if not os.path.exists(self.filename):
                raise IOError('Cannot find %s' % self.filename)
            filelist = np.genfromtxt(self.filename, dtype=str)
            self.ndata = int(len(filelist) * subset_pct / 100.)
            assert self.ndata != 0
            self.macro_start = 0
            self.nlabels = 1
            self.nclass = 1
            self.global_mean = None
            self.img_size = 256
            return

        # Load from repo dataset_cache:
        try:
            cache_filepath = os.path.join(repo_dir, 'dataset_cache.pkl')
            dataset_cache = load_obj(cache_filepath)
        except IOError:
            raise IOError("Cannot find '%s/dataset_cache.pkl'. Run batch_writer "
                          "to preprocess the data and create batch files for "
                          "imageset" % (repo_dir))

        # Should have following defined:
        req_attributes = ['global_mean', 'nclass', 'val_start', 'ntrain',
                          'label_names', 'train_nrec', 'img_size', 'nval',
                          'train_start', 'val_nrec', 'label_dict',
                          'batch_prefix']

        for r in req_attributes:
            if r not in dataset_cache:
                raise ValueError(
                    'Dataset cache missing required attribute %s' % (r))

        if dataset_cache['global_mean'].shape != (3, 1):
            raise ValueError('Dataset cache global mean is not in the proper format. Run '
                             'neon/util/update_dataset_cache.py utility on %s.' % cache_filepath)

        self.__dict__.update(dataset_cache)
        self.filename = os.path.join(repo_dir, self.batch_prefix)

        self.label = 'l_id'
        if isinstance(self.nclass, dict):
            self.nclass = self.nclass[self.label]

        self.recs_available = getattr(self, self.set_name + '_nrec')
        self.macro_start = getattr(self, self.set_name + '_start')
        self.macros_available = getattr(self, 'n' + self.set_name)
        self.ndata = int(self.recs_available * subset_pct / 100.)
Beispiel #48
0
 def __init__(self, pdict):
     if type(pdict) is str:
         pdict = load_obj(pdict)
     super(ModelDescription, self).__init__(pdict)
Beispiel #49
0
    # fit the model for 3 epochs
    model.fit(train,
              optimizer=opt,
              num_epochs=3,
              cost=cost,
              callbacks=callbacks)

train.reset()
# get 1 image
for im, l in train:
    break
train.exit_batch_provider()
save_obj((im.get(), l.get()), 'im1.pkl')
im_save = im.get().copy()
if args.resume:
    (im2, l2) = load_obj('im1.pkl')
    im.set(im2)
    l.set(l2)

# run fprop and bprop on this minibatch save the results
out_fprop = model.fprop(im)

out_fprop_save = [x.get() for x in out_fprop]
im.set(im_save)
out_fprop = model.fprop(im)
out_fprop_save2 = [x.get() for x in out_fprop]
for x, y in zip(out_fprop_save, out_fprop_save2):
    assert np.max(np.abs(x - y)) == 0.0, '2 fprop iterations do not match'

# run fit fot 1 minibatch
# have to do this by hand
Beispiel #50
0
    def __init__(self,
                 image_set,
                 year,
                 path='.',
                 add_flipped=False,
                 overlap_thre=None,
                 output_type=0,
                 n_mb=None,
                 img_per_batch=None,
                 rois_per_img=None,
                 rois_random_sample=True,
                 shuffle=False):
        self.isRoiDB = True
        self.batch_index = 0
        self.year = year
        self.image_set = image_set
        self.add_flipped = add_flipped
        self.overlap_thre = overlap_thre if overlap_thre else FRCN_IOU_THRE
        self.output_type = output_type

        # how many ROIs per image
        self.rois_per_image = rois_per_img if rois_per_img else FRCN_ROI_PER_IMAGE
        self.img_per_batch = img_per_batch if img_per_batch else FRCN_IMG_PER_BATCH
        self.fg_rois_per_image = FRCN_FG_FRAC * self.rois_per_image
        self.bg_rois_per_image = self.rois_per_image - self.fg_rois_per_image
        self.rois_per_batch = self.rois_per_image * self.img_per_batch
        self.rois_random_sample = rois_random_sample
        self.shuffle = shuffle

        self.cache_file_name = 'voc_{}_{}_flip_{}_ovlp_{}.pkl'.format(
            self.year, self.image_set, self.add_flipped, self.overlap_thre)
        print 'prepare PASCAL VOC {} from year {}: add flipped image {} and overlap threshold {}'\
            .format(self.image_set, self.year, self.add_flipped, self.overlap_thre)

        # PASCAL class to index
        self.num_classes = PASCAL_VOC_NUM_CLASSES
        self._class_to_index = dict(
            zip(PASCAL_VOC_CLASSES, xrange(self.num_classes)))

        # load the voc dataset
        self.voc_root = self.load_voc(image_set, year, path)

        self.cache_file = os.path.join(self.voc_root, self.cache_file_name)

        # load the precomputed ss results from voc data, it includes both 2007 and 2012 data
        self.ss_path = self.load_voc('ss', None, path)

        # VOC paths and infos
        self.image_index_file = os.path.join(self.voc_root, 'ImageSets',
                                             'Main', self.image_set + '.txt')
        self.image_path = os.path.join(self.voc_root, 'JPEGImages')
        self._image_file_ext = '.jpg'

        self.annotation_path = os.path.join(self.voc_root, 'Annotations')
        self._annotation_file_ext = '.xml'
        self._annotation_obj_tag = 'object'
        self._annotation_class_tag = 'name'
        self._annotation_xmin_tag = 'xmin'
        self._annotation_xmax_tag = 'xmax'
        self._annotation_ymin_tag = 'ymin'
        self._annotation_ymax_tag = 'ymax'

        self._selective_search_ext = '.pkl'
        self.selective_search_file = os.path.join(
            self.ss_path,
            '_'.join(['voc', year, self.image_set, 'selectivesearch.pkl']))

        self._bb_xmin_idx = 0
        self._bb_ymin_idx = 1
        self._bb_xmax_idx = 2
        self._bb_ymax_idx = 3

        # self.rois_per_batch is 128 (2*64) ROIs
        # But the image path batch size is self.img_per_batch
        # need to control the batch size here
        print "Backend batchsize is changed to be image_per_batch from PASCAL_VOC dataset"
        self.be.bsz = self.img_per_batch

        # backend tensor to push the data
        self.image_shape = (3, FRCN_MAX_SCALE, FRCN_MAX_SCALE)
        self.img_np = np.zeros(
            (3, FRCN_MAX_SCALE, FRCN_MAX_SCALE, self.be.bsz), dtype=np.float32)
        self.dev_X_img = self.be.iobuf(self.image_shape, dtype=np.float32)
        self.dev_X_img_chw = self.dev_X_img.reshape(3, FRCN_MAX_SCALE,
                                                    FRCN_MAX_SCALE,
                                                    self.be.bsz)
        # for rois, features are 4 + 1 (idx within the batch)
        self.dev_X_rois = self.be.zeros((self.rois_per_batch, 5))
        self.dev_y_labels_flat = self.be.zeros((1, self.rois_per_batch),
                                               dtype=np.int32)
        self.dev_y_labels = self.be.zeros(
            (self.num_classes, self.rois_per_batch), dtype=np.int32)
        self.dev_y_bbtargets = self.be.zeros(
            (self.num_classes * 4, self.rois_per_batch))
        self.dev_y_bbmask = self.be.zeros(
            (self.num_classes * 4, self.rois_per_batch))

        # the shape will indicate the shape for 1st path (ImageNet model), and
        # 2nd path (ROIs)
        self.shape = [self.image_shape, self.num_classes * 4]

        # Need to do the following:
        #   1. load the image index list
        #   2. for each image, load the ground truth from pascal annotation
        #   3. load the selective search ROIs (this step needs gt ROIs)
        #   4.1. merge the ROIs
        #   4.2. may have to add the flipped images for training
        #   4.3. add the fields for max overlap and max overlapped classes
        #   4.4. add the bounding box targets for regression
        #   5. during minibatch feeding:
        #   - rescale images
        #   - rescale ROIs
        #   - random select foreground ROIs (bigger ones)
        #   - random select background ROIS (smaller ones)
        #   - clamp bg ROI labels (to be 0)
        #   - convert ROIs into the regression target (ROIs, 4*21)

        # 1.
        assert os.path.exists(self.image_index_file), \
            'Image index file does not exist: {}'.format(self.image_index_file)
        with open(self.image_index_file) as f:
            self.image_index = [x.strip() for x in f.readlines()]

        # self.image_index = image_index * 2 if self.add_flipped else image_index
        self.num_images = len(self.image_index)
        self.num_image_entries = self.num_images * \
            2 if self.add_flipped else self.num_images
        self.ndata = self.num_image_entries * self.rois_per_image
        self.nbatches = self.num_image_entries / self.img_per_batch

        if n_mb is not None:
            self.nbatches = n_mb

        if os.path.exists(self.cache_file):
            self.roi_db = load_obj(self.cache_file)
            print 'ROI dataset loaded from file {}'.format(self.cache_file)
        else:
            # 2.
            self.roi_gt = self.load_pascal_roi_groundtruth()

            # 3.
            self.roi_ss = self.load_pascal_roi_selectivesearch()

            # 4.
            self.roi_db = self.combine_gt_ss_roi()

            save_obj(self.roi_db, self.cache_file)
            print 'wrote ROI dataset to {}'.format(self.cache_file)
Beispiel #51
0
from neon.util.argparser import NeonArgparser
from neon.layers import Pooling
from neon.models import Model
from neon.data import ImageLoader
from neon.util.persist import save_obj, load_obj

# parse the command line arguments (generates the backend)
parser = NeonArgparser(__doc__)
args = parser.parse_args()

scales = [112, 128, 160, 240]
for scale in scales:
    print scale
    test = ImageLoader(set_name='validation', shuffle=False, do_transforms=False, inner_size=scale,
                       scale_range=scale, repo_dir=args.data_dir)

    model_desc = load_obj(args.model_file)
    model_desc['model']['config']['layers'].insert(-1, Pooling('all', op='avg').get_description())
    model = Model(model_desc, test, inference=True)
    softmaxes = model.get_outputs(test)
    save_obj(softmaxes, "bigfeat_dropout_SM_{}.pkl".format(scale))
Beispiel #52
0
import os

from neon.util.argparser import NeonArgparser
from neon.util.persist import load_obj
from neon.transforms import Misclassification, CrossEntropyMulti
from neon.optimizers import GradientDescentMomentum
from neon.layers import GeneralizedCost
from neon.models import Model
from neon.data import DataLoader, ImageParams

# parse the command line arguments (generates the backend)
parser = NeonArgparser(__doc__)
args = parser.parse_args()

# setup data provider
test_dir = os.path.join(args.data_dir, 'val')
shape = dict(channel_count=3, height=32, width=32)
test_params = ImageParams(center=True, flip=False, **shape)
common = dict(target_size=1, nclasses=10)
test_set = DataLoader(set_name='val', repo_dir=test_dir, media_params=test_params, **common)

model = Model(load_obj(args.model_file))
cost = GeneralizedCost(costfunc=CrossEntropyMulti())
opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001)
model.initialize(test_set, cost=cost)

acc = 1.0 - model.eval(test_set, metric=Misclassification())[0]
print 'Accuracy: %.1f %% (Top-1)' % (acc*100.0)

model.benchmark(test_set, cost=cost, optimizer=opt)
Beispiel #53
0
# hyperparameters from the reference
args.batch_size = 64
embed_dim = 620

valid_split = None
# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# load the documents by giving the path and what extension the files are
data_file, vocab_file = load_data(args.data_dir, valid_split=valid_split,
                                  max_vocab_size=args.max_vocab_size,
                                  max_len_w=args.max_len_w,
                                  output_path=args.output_dir,
                                  file_ext=['txt'],
                                  subset_pct=args.subset_pct)
vocab, rev_vocab, word_count = load_obj(vocab_file)

vocab_size = len(vocab)
neon_logger.display("\nData loading complete.")
neon_logger.display("\nVocab size from the dataset is: {}".format(vocab_size))

index_from = 2  # 0: padding 1: oov
vocab_size_layer = vocab_size + index_from

init_embed_dev = Uniform(low=-0.1, high=0.1)

# sent2vec network
nhidden = 2400
gradient_clip_norm = 5.0

train_set = SentenceHomogenous(data_file=data_file, sent_name='train', text_name='report_train',
Beispiel #54
0
    layers.append(Conv(name = 'Custom Head 2', **conv_params(1, 2, relu=False)))
    layers.append(Activation(Softmax()))
    # layers.append(Affine(512, init=Kaiming(local=False),
    #                  batch_norm=True, activation=Rectlin()))
    # layers.append(Affine(2, init=Kaiming(local=False), activation=Softmax()))

    return Model(layers=layers)

lunaModel = create_network(args.depth)

PRETRAINED = False
# Pre-trained ResNet 50
# It assumes the image has a depth channel of 3
pretrained_weights_file = 'resnet{}_weights.prm'.format(args.depth)
print ('Loading pre-trained ResNet weights: {}'.format(pretrained_weights_file))
trained_resnet = load_obj(pretrained_weights_file)  # Load a pre-trained resnet 50 model

# Load the pre-trained weights to our model
param_layers = [l for l in lunaModel.layers.layers]
param_dict_list = trained_resnet['model']['config']['layers']

for layer, params in zip(param_layers, param_dict_list):

    if (layer.name == 'end_resnet'):
        break

    # ResNet is trained on images that have 3 color depth channels
    # Our data usually isn't 3 color channels so we should not load the weights for that layer
    if (layer.name != 'Input Layer'):
        layer.load_weights(params, load_states=False)  # Don't load the state, just load the weights
Beispiel #55
0
from neon.util.persist import load_obj, save_obj
from neon import logger as neon_logger

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("cache_file", help="path to data cache file")
    args = parser.parse_args()

    cache_file = args.cache_file

    # check for RW access to file
    assert os.path.exists(cache_file), "file does not exist %s" % cache_file
    if not os.access(os.path.abspath(cache_file), os.R_OK | os.W_OK):
        raise IOError("Need to add read and/or write permissions on file %s" % cache_file)

    dc = load_obj(cache_file)

    if "global_mean" not in dc or "img_size" not in dc:
        raise ValueError("data cache file missing global_mean key")

    sz = dc["img_size"]
    gm = dc["global_mean"]

    if len(gm.shape) != 2 or (gm.shape[0] != sz * sz * 3 or gm.shape[1] != 1):
        raise ValueError("global mean shape {} does not match format expected".format(gm.shape))

    # Collapse the full tensor mean into channel means and correct the order (RGB <-> BGR)
    dc["global_mean"] = np.mean(gm.reshape(3, -1), axis=1).reshape(3, 1)[::-1]

    save_obj(dc, cache_file)
Beispiel #56
0
    def load_pascal_roi_selectivesearch(self):
        """
        Load the pre-computed selective search data on PASCAL VOC in pickle file

        The pickle file contains images and rp:
            images: image indices for the dataset (Img, 1)
                    name in string is in images[i][0][0]
            rp: all the proposed ROIs for each image (Img, 1)
                    in bb[i], there are (B, 4) for B proposed ROIs
                    The coordinates are ordered as:
                    [y1, x1, y2, x2]
                    While ground truth coordinates are:
                    [x1, y1, x2, y2]
                    So it needs re-ordering

        """
        assert self.roi_gt is not None, 'Ground truth ROIs need to be loaded first'
        assert os.path.exists(self.selective_search_file), \
            'selected search data does not exist'

        ss_data = load_obj(self.selective_search_file)
        ss_bb = ss_data['boxes'].ravel()
        ss_img_idx = ss_data['images'].ravel()
        ss_num_img = ss_bb.shape[0]

        assert ss_num_img == self.num_images, \
            'Number of images in SS data must match number of image in the dataset'

        roi_ss = []
        # load the bb from SS and compare with gt
        for i in xrange(ss_num_img):
            # make sure the image index match
            assert self.image_index[i] == ss_img_idx[i][0]
            bb = (ss_bb[i][:, (1, 0, 3, 2)] - 1)
            num_boxes = bb.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)

            gt_bb = self.roi_gt[i]['gt_bb']
            gt_classes = self.roi_gt[i]['gt_classes'].ravel()

            gt_overlap, gt_dim = calculate_bb_overlap(bb.astype(np.float),
                                                      gt_bb.astype(np.float))

            max_overlap_area = gt_overlap.max(axis=gt_dim)
            max_overlap_arg = gt_overlap.argmax(axis=gt_dim)

            # only put the non-zero overlaps into the table
            I = np.where(max_overlap_area > 0)[0]
            overlaps[I, gt_classes[max_overlap_arg[I]]] = max_overlap_area[I]
            max_overlap_class = overlaps.argmax(axis=gt_dim)
            max_overlaps = overlaps.max(axis=gt_dim)

            # prepare the bounding box targets
            ss_bb_targets = np.zeros((num_boxes, 5), np.float32)
            # only the ones with large enough overlap with gt are used
            use_idx = np.where(max_overlaps >= self.overlap_thre)[0]

            bb_targets = self._compute_bb_targets(
                gt_bb[max_overlap_arg[use_idx]], bb[use_idx],
                max_overlap_class[use_idx])

            ss_bb_targets[use_idx] = bb_targets

            roi_ss.append({
                'ss_bb':
                bb,
                'gt_classes':
                np.zeros((num_boxes, 1), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'max_overlap_area':
                max_overlap_area.reshape(-1, 1),
                'max_overlap_class':
                max_overlap_class.reshape(-1, 1),
                'bb_targets':
                ss_bb_targets,
            })

        return roi_ss
Beispiel #57
0
# hyperparameters from the reference
args.batch_size = 64
embed_dim = 620

valid_split = None
# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# load the documents by giving the path and what extension the files are
data_file, vocab_file = load_data(args.data_dir, valid_split=valid_split,
                                  max_vocab_size=args.max_vocab_size,
                                  max_len_w=args.max_len_w,
                                  output_path=args.output_dir,
                                  file_ext=['txt'],
                                  subset_pct=args.subset_pct)
vocab, rev_vocab, word_count = load_obj(vocab_file)

vocab_size = len(vocab)
neon_logger.display("\nData loading complete.")
neon_logger.display("\nVocab size from the dataset is: {}".format(vocab_size))

index_from = 2  # 0: padding 1: oov
vocab_size_layer = vocab_size + index_from

init_embed_dev = Uniform(low=-0.1, high=0.1)

# sent2vec network
nhidden = 2400
gradient_clip_norm = 5.0

train_set = SentenceHomogenous(data_file=data_file, sent_name='train', text_name='report_train',
Beispiel #58
0
              nclasses=c)
data_dir = args.image_path

test_set = PixelWiseImageLoader(set_name='test',
                                repo_dir=data_dir,
                                media_params=test_params,
                                index_file=os.path.join(
                                    data_dir, 'test_images.csv'),
                                **common)

# initialize model object
segnet_model = Model(layers=gen_model(c, h, w))
segnet_model.initialize(test_set)

# load up the serialized model
model_desc = ModelDescription(load_obj(args.save_model_file))
for layer in segnet_model.layers_to_optimize:
    name = layer.name
    trained_layer = model_desc.getlayer(name)
    layer.load_weights(trained_layer)

fig = plt.figure()
if args.display:
    plt.ion()

im1 = None
im2 = None

cnt = 1
for x, t in test_set:
    z = segnet_model.fprop(x).get()
Beispiel #59
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ----------------------------------------------------------------------------
from neon.util.argparser import NeonArgparser
from neon.util.persist import load_obj
from neon.transforms import Misclassification
from neon.models import Model
from neon.data import ImageLoader

# parse the command line arguments (generates the backend)
parser = NeonArgparser(__doc__)
args = parser.parse_args()

# setup data provider
test_set = ImageLoader(set_name='validation',
                       repo_dir=args.data_dir,
                       inner_size=32,
                       scale_range=40,
                       do_transforms=False)
model = Model(load_obj(args.model_file))
print 'Accuracy: %.1f %% (Top-1)' % (
    1.0 - model.eval(test_set, metric=Misclassification()) * 100)