Exemple #1
0
def sanity_check(conf_file, result, **be_args):
    experiment = deserialize(os.path.join(dir, conf_file))
    backend = gen_backend(model=experiment.model, **be_args)
    experiment.initialize(backend)
    res = experiment.run()
    print(float(res['test']['MisclassRate_TOP_1']))
    assert float(res['test']['MisclassRate_TOP_1']) == result
Exemple #2
0
def serialize_check(conf_file, result, tol, res_string, **be_args):
    experiment = deserialize(conf_file)
    backend = gen_backend(model=experiment.model, **be_args)
    experiment.initialize(backend)
    res = experiment.run()
    print float(res[res_string]['MisclassPercentage_TOP_1']), result,
    assert abs(
        float(res[res_string]['MisclassPercentage_TOP_1']) - result) < tol
Exemple #3
0
def speed_check(conf_file, num_epochs, **be_args):
    experiment = deserialize(os.path.join(dir, conf_file))
    experiment.model.num_epochs = num_epochs
    backend = gen_backend(model=experiment.model, **be_args)
    experiment.initialize(backend)
    start = time.time()
    experiment.run()
    return (time.time() - start)
Exemple #4
0
def speed_check(conf_file, num_epochs, **be_args):
    experiment = deserialize(os.path.join(dir, conf_file))
    experiment.model.num_epochs = num_epochs
    backend = gen_backend(model=experiment.model, **be_args)
    experiment.initialize(backend)
    start = time.time()
    experiment.run()
    return (time.time() - start)
Exemple #5
0
    def model_compare(model1_file, model2_file, atol=0.0, rtol=0.0):
        model1 = deserialize(model1_file)
        model2 = deserialize(model2_file)

        assert model1.keys().sort() == model2.keys().sort()

        # remove the epochs from the dictionaries and compare them
        assert model1.pop('epochs_complete') == model2.pop('epochs_complete')

        # for MLP just layers should be left?
        print('checking the 1 versus k step outputs...')
        for ky in model1.keys():
            print(ky)
            assert TestSerialization.layer_compare(model1[ky],
                                                   model2[ky],
                                                   atol=atol,
                                                   rtol=rtol)
        print('OK')

        return True
Exemple #6
0
    def model_compare(model1_file, model2_file, atol=0.0, rtol=0.0):
        model1 = deserialize(model1_file)
        model2 = deserialize(model2_file)

        assert model1.keys().sort() == model2.keys().sort()

        # remove the epochs from the dictionaries and compare them
        assert model1.pop('epochs_complete') == model2.pop('epochs_complete')

        # for MLP just layers should be left?
        print('checking the 1 versus k step outputs...')
        for ky in model1.keys():
            print(ky)
            assert TestSerialization.layer_compare(model1[ky],
                                                   model2[ky],
                                                   atol=atol,
                                                   rtol=rtol)
        print('OK')

        return True
Exemple #7
0
    def load_file(self, filename, nclasses):
        logger.info("loading: %s", filename)
        dict = deserialize(filename)

        full_image = np.float32(dict["data"])
        full_image /= 255.0

        labels = np.array(dict["labels"])
        onehot = np.zeros((len(labels), nclasses), dtype="float32")
        for col in range(nclasses):
            onehot[:, col] = labels == col
        return (full_image, onehot)
Exemple #8
0
    def load_file(self, filename, nclasses):
        logger.info('loading: %s', filename)
        dict = deserialize(filename)

        full_image = np.float32(dict['data'])
        full_image /= 255.

        labels = np.array(dict['labels'])
        onehot = np.zeros((len(labels), nclasses), dtype='float32')
        for col in range(nclasses):
            onehot[:, col] = (labels == col)
        return (full_image, onehot)
Exemple #9
0
    def load_file(self, filename, nclasses):
        logger.info('loading: %s', filename)
        dict = deserialize(filename)

        full_image = np.float32(dict['data'])
        full_image /= 255.

        labels = np.array(dict['labels'])
        onehot = np.zeros((len(labels), nclasses), dtype='float32')
        for col in range(nclasses):
            onehot[:, col] = (labels == col)
        return (full_image, onehot)
Exemple #10
0
    def run(self):
        """
        Actually carry out each of the experiment steps.
        """

        # load the dataset, save it to disk if specified
        self.dataset.set_batch_size(self.model.batch_size)
        self.dataset.backend = self.backend
        self.dataset.load(backend=self.backend, experiment=self)
        if hasattr(self.dataset,
                   'serialized_path') and (self.dataset.serialized_path
                                           is not None):
            logger.warning('Ability to serialize dataset has been deprecated.')

        # fit the model to the data, save it if specified
        if not hasattr(self.model, 'backend'):
            self.model.backend = self.backend
        if not hasattr(self.model, 'epochs_complete'):
            self.model.epochs_complete = 0
        mfile = ''
        if hasattr(self.model, 'deserialized_path'):
            mfile = os.path.expandvars(
                os.path.expanduser(self.model.deserialized_path))
        elif hasattr(self.model, 'serialized_path'):
            mfile = os.path.expandvars(
                os.path.expanduser(self.model.serialized_path))
        elif self.live:
            raise RuntimeError('Live inference requires a saved model')

        if os.access(mfile, os.R_OK):
            if self.backend.is_distributed():
                raise NotImplementedError('Deserializing models not supported '
                                          'in distributed mode')
            self.model.set_params(deserialize(mfile))
        elif mfile != '':
            logger.info('Unable to find saved model %s, starting over', mfile)
            if self.live:
                raise RuntimeError('Live inference requires a saved model')

        if self.model.epochs_complete >= self.model.num_epochs:
            return
        if self.live:
            return

        self.model.fit(self.dataset)

        if hasattr(self.model, 'serialized_path'):
            if self.backend.rank() == 0:
                serialize(self.model.get_params(), self.model.serialized_path)
Exemple #11
0
def call_neon(params):
    """
    runs the system call to neon and reads the result to give back to sm
    """
    timestring = str(int(time.time()))
    experiment_dir = os.path.realpath(os.environ['HYPEROPT_PATH'])
    # Generate the yaml file
    hyper_file = os.path.join(experiment_dir, 'hyperyaml.yaml')
    yaml_file = os.path.join(experiment_dir, 'yamels',
                             'temp' + timestring + '.yaml')
    try:
        os.mkdir(os.path.join(experiment_dir, 'yamels'))
    except OSError:
        "Directory exists"
    write_params(hyper_file, yaml_file, params)

    # Initialize the neon experiment
    logging.basicConfig(level=20)
    experiment = deserialize(yaml_file)
    backend = gen_backend(model=experiment.model)  # , gpu='nervanagpu'
    experiment.initialize(backend)

    # ensure TOP1 error is calculated
    if not hasattr(experiment, 'metrics'):
        experiment.metrics = {
            'validation': [MisclassPercentage(error_rank=1)],
            'test': [MisclassPercentage(error_rank=1)]
        }
    for item in ['validation', 'test']:
        if item not in experiment.metrics:
            experiment.metrics[item] = [MisclassPercentage(error_rank=1)]
        metriclist = [str(x) for x in experiment.metrics[item]]
        if 'MisclassPercentage_TOP_1' not in metriclist:
            experiment.metrics[item].append(MisclassPercentage(error_rank=1))

    result = experiment.run()

    # check if validation set is available
    if experiment.dataset.has_set('validation'):
        hyperopt_set = 'validation'
    elif experiment.dataset.has_set('test'):
        hyperopt_set = 'test'
        print("Warning: No validation set found, performing hyperparameter "
              "optimization on test set.")
    else:
        raise AttributeError("No error found.")

    return result[hyperopt_set]['MisclassPercentage_TOP_1']
Exemple #12
0
    def run(self):
        """
        Actually carry out each of the experiment steps.
        """

        # load the dataset, save it to disk if specified
        self.dataset.set_batch_size(self.model.batch_size)
        self.dataset.backend = self.backend
        self.dataset.load(backend=self.backend, experiment=self)
        if hasattr(self.dataset, 'serialized_path') and (
                self.dataset.serialized_path is not None):
            logger.warning('Ability to serialize dataset has been deprecated.')

        # fit the model to the data, save it if specified
        if not hasattr(self.model, 'backend'):
            self.model.backend = self.backend
        if not hasattr(self.model, 'epochs_complete'):
            self.model.epochs_complete = 0
        mfile = ''
        if hasattr(self.model, 'deserialized_path'):
            mfile = os.path.expandvars(os.path.expanduser(
                self.model.deserialized_path))
        elif hasattr(self.model, 'serialized_path'):
            mfile = os.path.expandvars(os.path.expanduser(
                self.model.serialized_path))
        elif self.live:
            raise RuntimeError('Live inference requires a saved model')

        if os.access(mfile, os.R_OK):
            if self.backend.is_distributed():
                raise NotImplementedError('Deserializing models not supported '
                                          'in distributed mode')
            self.model.set_params(deserialize(mfile))
        elif mfile != '':
            logger.info('Unable to find saved model %s, starting over', mfile)
            if self.live:
                raise RuntimeError('Live inference requires a saved model')

        if self.model.epochs_complete >= self.model.num_epochs:
            return
        if self.live:
            return

        self.model.fit(self.dataset)

        if hasattr(self.model, 'serialized_path'):
            if self.backend.rank() == 0:
                serialize(self.model.get_params(), self.model.serialized_path)
Exemple #13
0
def call_neon(params):
    """
    runs the system call to neon and reads the result to give back to sm
    """
    timestring = str(int(time.time()))
    experiment_dir = os.path.realpath(os.environ['HYPEROPT_PATH'])
    # Generate the yaml file
    hyper_file = os.path.join(experiment_dir, 'hyperyaml.yaml')
    yaml_file = os.path.join(experiment_dir, 'yamels',
                             'temp' + timestring + '.yaml')
    try:
        os.mkdir(os.path.join(experiment_dir, 'yamels'))
    except OSError:
        "Directory exists"
    write_params(hyper_file, yaml_file, params)

    # Initialize the neon experiment
    logging.basicConfig(level=20)
    experiment = deserialize(yaml_file)
    backend = gen_backend(model=experiment.model)  # , gpu='nervanagpu'
    experiment.initialize(backend)

    # ensure TOP1 error is calculated
    if not hasattr(experiment, 'metrics'):
        experiment.metrics = {'validation': [MisclassPercentage(error_rank=1)],
                              'test': [MisclassPercentage(error_rank=1)]}
    for item in ['validation', 'test']:
        if item not in experiment.metrics:
            experiment.metrics[item] = [MisclassPercentage(error_rank=1)]
        metriclist = [str(x) for x in experiment.metrics[item]]
        if 'MisclassPercentage_TOP_1' not in metriclist:
            experiment.metrics[item].append(MisclassPercentage(error_rank=1))

    result = experiment.run()

    # check if validation set is available
    if experiment.dataset.has_set('validation'):
        hyperopt_set = 'validation'
    elif experiment.dataset.has_set('test'):
        hyperopt_set = 'test'
        print("Warning: No validation set found, performing hyperparameter "
              "optimization on test set.")
    else:
        raise AttributeError("No error found.")

    return result[hyperopt_set]['MisclassPercentage_TOP_1']
Exemple #14
0
    def load(self, backend=None, experiment=None):
        '''
        Imageset only supports nervanagpu based backends
        '''
        if not hasattr(self.backend, 'ng'):
            raise DeprecationWarning("Only nervanagpu-based backends "
                                     "supported.  For using cudanet backend, "
                                     "revert to neon 0.8.2 ")

        bdir = os.path.expanduser(self.save_dir)
        cachefile = os.path.join(bdir, 'dataset_cache.pkl')
        if not os.path.exists(cachefile):
            logger.error("Batch dir cache not found in %s:", cachefile)
            response = raw_input("Press Y to create, otherwise exit: ")
            if response == 'Y':
                from neon.util.batch_writer import (BatchWriter,
                                                    BatchWriterImagenet)

                if self.imageset.startswith('I1K'):
                    self.bw = BatchWriterImagenet(**self.__dict__)
                else:
                    self.bw = BatchWriter(**self.__dict__)
                self.bw.run()
                logger.error('Done writing batches - please rerun to train.')
            else:
                logger.error('Exiting...')
            sys.exit()
        cstats = deserialize(cachefile, verbose=False)
        if cstats['macro_size'] != self.macro_size:
            raise NotImplementedError("Cached macro size %d different from "
                                      "specified %d, delete save_dir %s "
                                      "and try again.",
                                      cstats['macro_size'],
                                      self.macro_size,
                                      self.save_dir)
        # Set the max indexes of batches for each from the cache file
        self.maxval = cstats['nval'] + cstats['val_start'] - 1
        self.maxtrain = cstats['ntrain'] + cstats['train_start'] - 1

        # Make sure only those properties not by yaml are updated
        cstats.update(self.__dict__)
        self.__dict__.update(cstats)
        # Should also put (in addition to nclass), number of train/val images
        req_param(self, ['ntrain', 'nval', 'train_start', 'val_start',
                         'train_mean', 'val_mean', 'labels_dict'])
Exemple #15
0
    def load(self, backend=None, experiment=None):
        '''
        Imageset only supports nervanagpu based backends
        '''
        if not hasattr(self.backend, 'ng'):
            raise DeprecationWarning("Only nervanagpu-based backends "
                                     "supported.  For using cudanet backend, "
                                     "revert to neon 0.8.2 ")

        bdir = os.path.expanduser(self.save_dir)
        cachefile = os.path.join(bdir, 'dataset_cache.pkl')
        if not os.path.exists(cachefile):
            logger.error("Batch dir cache not found in %s:", cachefile)
            response = raw_input("Press Y to create, otherwise exit: ")
            if response == 'Y':
                from neon.util.batch_writer import (BatchWriter,
                                                    BatchWriterImagenet)

                if self.imageset.startswith('I1K'):
                    self.bw = BatchWriterImagenet(**self.__dict__)
                else:
                    self.bw = BatchWriter(**self.__dict__)
                self.bw.run()
                logger.error('Done writing batches - please rerun to train.')
            else:
                logger.error('Exiting...')
            sys.exit()
        cstats = deserialize(cachefile, verbose=False)
        if cstats['macro_size'] != self.macro_size:
            raise NotImplementedError("Cached macro size %d different from "
                                      "specified %d, delete save_dir %s "
                                      "and try again.",
                                      cstats['macro_size'],
                                      self.macro_size,
                                      self.save_dir)
        # Set the max indexes of batches for each from the cache file
        self.maxval = cstats['nval'] + cstats['val_start'] - 1
        self.maxtrain = cstats['ntrain'] + cstats['train_start'] - 1

        # Make sure only those properties not by yaml are updated
        cstats.update(self.__dict__)
        self.__dict__.update(cstats)
        # Should also put (in addition to nclass), number of train/val images
        req_param(self, ['ntrain', 'nval', 'train_start', 'val_start',
                         'train_mean', 'val_mean', 'labels_dict'])
Exemple #16
0
def train():

    save_file = sys.argv[1]
    if len(sys.argv) > 2:
        model = deserialize(sys.argv[2])
    else:
        layers = get_parameters(n_in=FEATURE_LENGTH, n_hidden_units=[100, 50, NUM_CLASSES])
        # define model
        model = MLP(num_epochs=1, batch_size=MINIBATCH_SIZE,
                     layers=layers, epochs_complete=0)
        model.link()
        #be.configure(model, datapar=False, modelpar=False)
        model.initialize(be)
        model.data_layer = model.layers[0]
        model.cost_layer = model.layers[-1]

    dataset = Fly(backend=be,
                    repo_path=os.path.expanduser('~/flyvfly/'))
    
    # par related init
    be.actual_batch_size = model.batch_size
    be.mpi_size = 1
    be.mpi_rank = 0
    be.par = NoPar()
    be.par.backend = be

    max_macro_epochs = 1000
    min_err = sys.maxint
    for i in range(max_macro_epochs):
        model.epochs_complete = 0
        dataset.use_set = "train"
        model.fit(dataset)
        #scores, targets = model.predict_fullset(dataset, "validation")
        val_err = get_validation(model, dataset)
        logger.info('epoch: %d,  valid error: %0.6f', i, val_err)
        if val_err < min_err:
            serialize(model, save_file)
            min_err = val_err
Exemple #17
0
    def run_experiment_in_steps(self,
                                config_file,
                                n,
                                k,
                                be,
                                init_config=None,
                                **be_args):
        # run an experiment for N epochs in k stepe with n/k
        # epochs per step.  Last step will be enough epochs
        # to reach n total epochs.  Between each step the
        # model will be serialized and saved, then reloaded
        # from that saved file at the next step if init_config
        # is not None, then that file will be used #  as the
        # initial deserialize file -  this is used for handing off
        # models run on cpu.gpu backends to running on gpu/cpu backends

        stepsize = n / k
        last_saved_state = None

        for ind in range(1, k + 1):
            # run the same learning with N/k epochs k times
            # each time saving and reloading the serialized model state
            if ind == k:
                # in case N/k is a fraction
                # last step will end at epoch N
                end_epoch = n
            else:
                end_epoch = ind * stepsize

            # load up base experiment config
            experiment = deserialize(config_file)

            # run for N/k steps
            experiment.model.num_epochs = end_epoch

            if ind > 1:
                # after step 1 need to load initial config
                # from last runs serialized file
                experiment.model.deserialized_path = last_saved_state
            elif init_config is not None:
                # used the given pickle file to initialize the mdoel
                experiment.model.deserialized_path = init_config

            # save the model to this file
            last_saved_state = os.path.join(self.model_path,
                                            '%d_step_%d.prm' % (k, end_epoch))
            print(last_saved_state)
            if os.path.exists(last_saved_state):
                print('removing %s' % last_saved_state)
                os.remove(last_saved_state)
            experiment.model.serialized_path = last_saved_state

            experiment.model.serialize_schedule = k
            if k == 1:
                # keep copies of all checkpoint files for cp tests
                experiment.model.save_checkpoints = n

            backend = gen_backend(model=experiment.model, **be_args)
            experiment.initialize(backend)

            if ind == 1:
                # save the initial weights for check with other runs
                intial_weights = {}
                for ind, layer in enumerate(experiment.model.layers):
                    if hasattr(layer, 'weights'):  # only checking weights
                        # ensure unique layer names
                        ln = '%s_%d' % (layer.name, ind)
                        intial_weights[ln] = np.copy(
                            layer.weights.asnumpyarray())
            experiment.run()

        return (last_saved_state, intial_weights)
Exemple #18
0
 def get_macro_batch(self):
     self.macro_idx = (self.macro_idx + 1 - self.startb) \
         % self.nmacros + self.startb
     fname = os.path.join(self.save_dir,
                          'data_batch_{:d}'.format(self.macro_idx))
     return deserialize(os.path.expanduser(fname), verbose=False)
Exemple #19
0
 def get_macro_batch(self):
     self.macro_idx = (self.macro_idx + 1 - self.startb) \
         % self.nmacros + self.startb
     fname = os.path.join(self.save_dir,
                          'data_batch_{:d}'.format(self.macro_idx))
     return deserialize(os.path.expanduser(fname), verbose=False)
Exemple #20
0
    model.data_layer.use_set('test', predict=True)
    dataset.use_set = 'test'
    scores, targets = model.predict_fullset(dataset, "test")
    scores_ts = np.transpose(scores.asnumpyarray())
    targets_ts = np.transpose(targets.asnumpyarray())
    model_no = sys.argv[1].split(".")[0][-2:]
    find_no_class(targets_ts[:, 5], scores_ts[:, 5], targets_tr[:, 5], scores_tr[:, 5], model_no)
    #prc_curve(targets_ts, scores_ts, targets_tr, scores_tr, model_no)

def visualize():
    weights = model.layers[-2].weights.asnumpyarray()
    np.savetxt("mcmodel12weights3.txt", weights)
    np.savetxt("mcmodel12weights2.txt", model.layers[-3].weights.asnumpyarray())
    np.savetxt("mcmodel12weights1.txt", model.layers[-4].weights.asnumpyarray())
    plt.subplot(1, 2, 1)
    plt.imshow(np.transpose(np.sort(abs(model.layers[-2].weights.asnumpyarray()))), cmap = cm.Greys_r)
    plt.subplot(1, 2, 2)
    plt.imshow(np.sort(np.transpose(abs(model.layers[-3].weights.asnumpyarray()))), cmap = cm.Greys_r)
    plt.show()

    weights_sort = weights.argsort()
    max_weights = weights[0, weights_sort[0, -5:]]
    min_weights = weights[0, weights_sort[0, 0:5]]
    print min_weights
if __name__ == '__main__':
    with open(sys.argv[1], 'r') as f:
        model = deserialize(f)
    model.print_layers()
    visualize()
    #test()
Exemple #21
0
def serialize_check_alexnet(conf_file, result, **be_args):
    experiment = deserialize(os.path.join(dir, conf_file))
    backend = gen_backend(model=experiment.model, **be_args)
    experiment.initialize(backend)
    res = experiment.run()
    print float(res['validation']['MisclassPercentage_TOP_1']), result
Exemple #22
0
    def run_experiment_in_steps(self, config_file, n, k, be,
                                init_config=None, **be_args):
        # run an experiment for N epochs in k stepe with n/k
        # epochs per step.  Last step will be enough epochs
        # to reach n total epochs.  Between each step the
        # model will be serialized and saved, then reloaded
        # from that saved file at the next step if init_config
        # is not None, then that file will be used #  as the
        # initial deserialize file -  this is used for handing off
        # models run on cpu.gpu backends to running on gpu/cpu backends

        stepsize = n/k
        last_saved_state = None

        for ind in range(1, k+1):
            # run the same learning with N/k epochs k times
            # each time saving and reloading the serialized model state
            if ind == k:
                # in case N/k is a fraction
                # last step will end at epoch N
                end_epoch = n
            else:
                end_epoch = ind*stepsize

            # load up base experiment config
            experiment = deserialize(config_file)

            # run for N/k steps
            experiment.model.num_epochs = end_epoch

            if ind > 1:
                # after step 1 need to load initial config
                # from last runs serialized file
                experiment.model.deserialized_path = last_saved_state
            elif init_config is not None:
                # used the given pickle file to initialize the mdoel
                experiment.model.deserialized_path = init_config

            # save the model to this file
            last_saved_state = os.path.join(self.model_path,
                                            '%d_step_%d.prm' % (k, end_epoch))
            print(last_saved_state)
            if os.path.exists(last_saved_state):
                print('removing %s' % last_saved_state)
                os.remove(last_saved_state)
            experiment.model.serialized_path = last_saved_state

            experiment.model.serialize_schedule = k
            if k == 1:
                # keep copies of all checkpoint files for cp tests
                experiment.model.save_checkpoints = n

            backend = gen_backend(model=experiment.model, **be_args)
            experiment.initialize(backend)

            if ind == 1:
                # save the initial weights for check with other runs
                intial_weights = {}
                for ind, layer in enumerate(experiment.model.layers):
                    if hasattr(layer, 'weights'):  # only checking weights
                        # ensure unique layer names
                        ln = '%s_%d' % (layer.name, ind)
                        intial_weights[ln] = np.copy(
                            layer.weights.asnumpyarray()
                            )
            experiment.run()

        return (last_saved_state, intial_weights)