예제 #1
0
    def __loadmapping(self, fn):
        classmap = dd()
        labelmap = dd()
        currentlabel = -1
        with open(fn, 'rb') as f:
            for line in f:
                
                # Split line
                line = line.replace('\n', '')
                split = line.split('=')
                
                # Extract key and value
                key = split[0]
                val = split[len(split) == 2]
                
                # Add class description if not in label mapping (from class description to class number)
                if val not in labelmap:
                    currentlabel += 1
                    labelmap[val] = currentlabel
                
                # Add key to classmap 
                if key not in classmap :
                    classmap[key] = dd()
                    
                # Assign description and label to key
                classmap[key].label = labelmap[val]
                classmap[key].desc  = val

        return classmap
예제 #2
0
    def learn(self, inp, trainer, inp_corruption_type=None, inp_corruption_level=0, hid_corruption_type=None, hid_corruption_level=0, cost_weight = cast_x(1), learn_scale_first=False, debug_path=None, nb_frames=None):

        if trainer:
            # Build noisy autoencoder for training
            train_enc = self(inp, inp_corruption_type, inp_corruption_level, 'full')
            train_dec = self.dec(train_enc, hid_corruption_type, hid_corruption_level)
            train_cost = self.cost(inp, train_dec, cost_weight)

            # Build noiseless autoencoder for validation
            valid_enc = self(inp, border_mode = 'full')
            valid_dec = self.dec(valid_enc)
            valid_cost = self.cost(inp, valid_dec, cost_weight)

            # Quick training for weight scaling
            if learn_scale_first:
                lookback = trainer.lookback
                momentum = trainer.momentum
                trainer.lookback = int(ceil(trainer.lookback / 20.))
                trainer.momentum = 0
                trainer([self.scale], train_cost, valid_cost, model_id=self.model_id + '_scaling').learn()
                trainer.lookback = lookback
                trainer.momentum = momentum

            debug_args = dd()
            debug_args.debug_path = debug_path
            debug_args.nb_frames = nb_frames
            debug_args.prefix = 'unsupervised'
            self.trainer = trainer(self.params.values(), train_cost, valid_cost, model_id=self.model_id,
                                   additionnal_updates = self.additionnal_update(),
                                   debug_calls=(self.debug_call, debug_args),
                                   debug_nodes = dd({'unsupervised_'+self.model_id+'_encoder_act_trainset':train_enc}))

        # Learn model
        self.trainer.learn()
예제 #3
0
파일: sgd_.py 프로젝트: fulQuan/ift6266h15
    def __log_output(self, epoch):

        # UNSUPERVISED ---------------------------------------------------------------------------

        # Set current model log dictionnary
        model_log = dd()

        model_log.nb_epoch = epoch

        if self.cost.valid:
            if "cost" not in model_log:
                model_log.cost = dd()
            model_log.cost.valid = self.cost.valid[-1]
            model_log.cost.valid_smooth = self.cost.valid_ema[-1]

        if self.error.valid:
            if "error" not in model_log:
                model_log.error = dd()
            model_log.error.valid = self.error.valid[-1]
            model_log.error.valid_smooth = self.error.valid_ema[-1]

        if self.cost.train:
            if "cost" not in model_log:
                model_log.cost = dd()
            model_log.cost.train = self.cost.train[-1]
            model_log.cost.train_smooth = self.cost.train_ema[-1]

        if self.error.train:
            if "error" not in model_log:
                model_log.error = dd()
            model_log.error.train = self.error.train[-1]
            model_log.error.train_smooth = self.error.train_ema[-1]

        self.log[self.model_id] = model_log
        self.log.dump(join(self.output_path, "out.pkl"), True)

        if self.cost.train:
            self.subplots.graphs[0, 0].add_point(
                train=(epoch, self.cost.train[-1]),
                train_ema=(epoch, self.cost.train_ema[-1]))

        if self.cost.valid:
            self.subplots.graphs[0, 0].add_point(
                valid=(epoch, self.cost.valid[-1]),
                valid_ema=(epoch, self.cost.valid_ema[-1]))

        if self.error.train:
            self.subplots.graphs[0, 1].add_point(
                train=(epoch, self.error.train[-1]),
                train_ema=(epoch, self.error.train_ema[-1]))

        if self.error.valid:
            self.subplots.graphs[0, 1].add_point(
                valid=(epoch, self.error.valid[-1]),
                valid_ema=(epoch, self.error.valid_ema[-1]))

        self.subplots.graphs.save(join(self.output_path,
                                       self.model_id + ".png"),
                                  dpi=100)
예제 #4
0
    def __init__(self,
                 examples,
                 valid_ratio=0.1,
                 test_ratio=0.1,
                 shuffle=True):

        # Initialization
        self.__splits = dd([('train', []), ('valid', []), ('test', []),
                            ('full', [])])

        # Extract a copy of the example list
        examples = list(examples)

        # If required, shuffle example list
        if shuffle:
            random.shuffle(examples)

        # Assign examples to splits
        self.__splits.train = examples[:int(
            len(examples) * (1. - test_ratio - valid_ratio))]
        self.__splits.valid = examples[len(self.__splits.train
                                           ):len(self.__splits.train) +
                                       int(len(examples) * valid_ratio)]
        self.__splits.test = examples[len(self.__splits.train +
                                          self.__splits.valid):]
        self.__splits.full = examples

        print("\nSplits statistics:")
        for split_id in self.__splits:
            print("        {:7d} items in {} set".format(
                len(self[split_id]), split_id))
예제 #5
0
    def __generate(self):

        tasks = self.maketasks()
        results = mp.Queue(1)
        buff = q.Queue(1)
        nb_tasks = tasks.qsize()

        perftrace = dd([(kw, mp.Queue()) for kw in self.extractor.statics])
        self.__perftraces.append(perftrace)

        # Start workers
        processes = []
        for i in range(self.__nb_workers):
            p = mp.Process(target=self.worker,
                           args=(tasks, results),
                           kwargs=perftrace)
            p.daemon = True
            p.start()
            processes.append(p)

        # Start
        t = Thread(target=extractfromqueue, args=(results, buff))
        t.daemon = True
        t.start()

        # Generator function
        def generator(buff, nb_tasks):
            for i in range(nb_tasks):
                yield buff.get()

        # Instanciate generator
        return generator(buff, nb_tasks)
예제 #6
0
    def __init__(self, classmap_path=None, **models):
        super(marshall, self).__init__()

        self.models = list(models.values())

        # Classmap maps class name from the dataset domain to generalized class names
        self.classmap = self.__loadmapping(classmap_path)

        # Extract classnames
        self.classes += [c.desc for c in list(self.classmap.values())]

        # Marchall all classes and convert class description
        frequency = dd([(cls, 0.) for cls in self.classes])
        for model in self.models:
            for e in model.examples:
                e.label = self.classmap[e.desc].label
                e.desc = self.classmap[e.desc].desc
                self.examples += [e]
                frequency[e.desc] += 1

        # Normalize frequencies
        for cls in frequency:
            frequency[cls] /= len(self.examples)

        # Report number of examples per class
        print("Marshalling datasets with a total of {} examples".format(self.nb_examples))
        for cls, freq in list(frequency.items()):
            print("    {:7d} ({:6.3f}%) items of class {}".format(int(freq*self.nb_examples), freq*100., cls))
예제 #7
0
    def add_line(self, line=None, name=None, x=None, y=None, **line_kwargs):

        if line is not None and name is None:
            if line.get_label():
                name = line.get_label()

        if line is None and name is None:
            if 'label' in line_kwargs:
                name = line_kwargs['label']

        if name in self.__lines:
            raise ValueError, "A line referenced by name {} already exists".format(
                name)

        x = x if isinstance(x, list) else []
        y = y if isinstance(y, list) else []

        if not line:
            if 'color' not in line_kwargs:
                line_kwargs['color'] = self.__colorcycle[
                    self.__colorcycle_id % len(self.__colorcycle)]
                self.__colorcycle_id += 1
            line = Line2D(x, y, **line_kwargs)

        if name:
            line_dd = dd()
            line_dd.x = x
            line_dd.y = y
            line_dd.line = line
            self.__lines[name] = line_dd

        axes_base.add_line(self, line)
        self.relim()
        self.autoscale()
예제 #8
0
    def export(self, path):

        make_dir(path)

        hparams = dd({'hp': self.hp, 'input_sz': self.input_sz})

        hparams.dump(join(path, 'hparams.pkl'))

        for i, layer in enumerate(self.layers):
            layer.export(join(path, '{:03d}_{}'.format(i, layer.name)))
예제 #9
0
    def __iadd__(self, other):

        if not isinstance(other, dict):
            raise TypeError("__iadd__ rhs is of must be or derive from type dict.  Rather, is of type type {}".format(type(other)))

        self._nb += 1

        for key, value in list(other.items()):

            if key in self:
                self[key].value += [value]
                self[key].index += [self._nb-1]
            else:
                self[key] = dd()
                self[key].value = [value]
                self[key].index = [self._nb-1]

        return self
예제 #10
0
    def __init__(self, path=None):
        datamodel.__init__(self)

        self.path = path if path else self.path

        self.splits = dd({split: [] for split in ['train', 'valid', 'test']})

        # Maps class label to a list of images for each given class
        for fn in walk_files(self.path, join=False, explore_subfolders=False):

            # example class name
            cls = fn[:3]
            idx = int(fn[4:-4])

            # Add class to class list
            if cls not in self.classes:
                self.classes.append(cls)

            example = element(self.path,
                              fn,
                              label=self.classes.index(cls),
                              desc=cls,
                              index=idx)

            self.examples.append(example)

            if idx < 20000 // 2:
                self.splits.train.append(example)
            elif idx < 22500 // 2:
                self.splits.valid.append(example)
            else:
                self.splits.test.append(example)

        print("Cats and dogs dataset is loaded with:")
        for split_id in self.splits:
            print("    {:7d} examples in {} set".format(
                len(self.splits[split_id]), split_id))
예제 #11
0
    def run(self):
        hp = self.hp

        # Initialize log file
        self.log = dd()

        # Model input and labels
        self.input = T.tensor4("input_tensor4", float_x)
        self.label = T.ivector("label_vector")

        # :earing model
        self.model = model(hp.model, self.trainiterator.shape)

        # Class probability output
        train_prob = self.model(self.input, 'train')
        valid_prob = self.model(self.input, 'valid')
        test_prob = self.model(self.input, 'test')

        # Loss functions
        train_loss = cross_entropy(train_prob, self.label)
        valid_loss = cross_entropy(valid_prob, self.label)
        test_loss = cross_entropy(test_prob, self.label)

        # Error functions
        train_error = error(train_prob, self.label)
        valid_error = error(valid_prob, self.label)
        test_error = error(test_prob, self.label)

        # Initialize dataset
        self.source = source(hp.dataset.path)

        # Iterators that will read and extract images from the dataset
        self.trainiterator = randompatch(self.source.splits.train,
                                         load_ratio=0.05,
                                         dataset_per_epoch=1,
                                         corrupt=True,
                                         nb_workers=1,
                                         **hp.iterator)
        self.validiterator = randompatch(self.source.splits.valid,
                                         load_ratio=0.1,
                                         dataset_per_epoch=1,
                                         corrupt=False,
                                         nb_workers=1,
                                         **hp.iterator)
        self.testiterator = randompatch(self.source.splits.test,
                                        load_ratio=0.1,
                                        corrupt=False,
                                        nb_workers=1,
                                        **hp.iterator)

        # Trainer object
        self.trainer = sgd(self.input, self.trainiterator, self.validiterator,  \
                           log=self.log, output_path=join(hp.meta.path, hp.meta.result_folder), **hp.trainer)

        # Greedy layer-wise pre-training
        self.model.pretrain(self.input, self.input, self.trainiterator)

        # Training
        self.trainer(self.model.optimizables, train_loss, valid_loss,
                     self.label, train_error, valid_error,
                     self.model.name).learn()

        # Greedy layer-wise post-training
        self.model.posttrain(self.input, self.input, self.trainiterator)

        # Compute final valid and test result
        valid_performance_fn = th.function(inputs=[self.input, self.label],
                                           outputs=[valid_loss, valid_error],
                                           allow_input_downcast=True)
        test_performance_fn = th.function(inputs=[self.input, self.label],
                                          outputs=[test_loss, test_error],
                                          allow_input_downcast=True)

        # Test performance on validation set
        valid_loss, valid_error = self.test(self.validiterator,
                                            valid_performance_fn)
        print("Final valid loss = {:0.4f}, error = {:0.4f}".format(
            valid_loss, valid_error))

        test_loss, test_error = self.test(self.testiterator,
                                          test_performance_fn)
        print("Final test loss = {:0.4f}, error = {:0.4f}".format(
            test_loss, test_error))

        # Final results to log file
        self.log[self.model.name].cost.final_valid = valid_loss
        self.log[self.model.name].cost.final_test = test_loss
        self.log[self.model.name].error.final_valid = valid_error
        self.log[self.model.name].error.final_test = test_error
        self.log.dump(join(hp.meta.path, hp.meta.result_folder, "out.pkl"),
                      True)
예제 #12
0
    def generate_hp(self, path):

        # Seed random number generator
        np.random.seed(datetime.now().microsecond)

        # Hyperparameters
        hp = dd()

        # Set debug mode
        hp.debug = False

        # Experiment parameters
        hp.meta = dd()
        hp.meta.path = path
        hp.meta.result_folder = './'
        hp.meta.export_folder = 'export'

        # Dataset parameters
        hp.dataset = dd()
        hp.dataset.path = join(datasets.path(), 'catsanddogs')

        # Feature extraction parameters
        hp.iterator = dd()
        hp.iterator.patch_sz = (90, 90, 3)
        hp.iterator.reshape_sz = (100, 100, 3)

        # Feature learning layers
        hp.model = dd()
        hp.model.name = 'convnet'
        hp.model.layers = dd()

        # Preprocess layer
        i = 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_preprocess'
        hp.model.layers[i].nb_channels = 3
        hp.model.layers[i].nb_pretrain_iterations = 1

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 16
        hp.model.layers[i].filter_sz = (3, 3)

        # Max pooling layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_maxpool'
        hp.model.layers[i].downsample_sz = 2

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 32
        hp.model.layers[i].filter_sz = (3, 3)

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 32
        hp.model.layers[i].filter_sz = (3, 3)

        # Max pooling layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_maxpool'
        hp.model.layers[i].downsample_sz = 2

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 64
        hp.model.layers[i].filter_sz = (5, 5)

        # Max pooling layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_maxpool'
        hp.model.layers[i].downsample_sz = 2

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Convolutional layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_vanilla'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[i].nb_filters = 64
        hp.model.layers[i].filter_sz = (5, 5)

        # Batch normalization layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'conv_batchnorm'

        # Fully connected layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'hidden'
        hp.model.layers[i].activation = "relu"
        hp.model.layers[
            i].nb_hid = 256  # int(10**np.random.uniform(log(128)/log(10), log(512)/log(10)))

        # Logistic layer
        i += 1
        hp.model.layers[i] = dd()
        hp.model.layers[i].type = 'logistic'
        hp.model.layers[i].nb_out = 2

        # Trainer
        hp.trainer = dd()
        hp.trainer.max_epoch = 1000
        hp.trainer.lookback = randint(10, 30)
        hp.trainer.minibatch_sz = 100
        hp.trainer.init_lr = None
        hp.trainer.incr_lr = None
        hp.trainer.lr = 10**np.random.uniform(
            log(0.01) / log(10),
            log(0.1) / log(10))
        hp.trainer.decay_rate = uniform(0.985, 1.0)
        hp.trainer.momentum = 10**uniform(
            log(0.8) / log(10),
            log(0.99) / log(10))
        hp.trainer.momentum_reset_prob = 0

        print("Save hyperparameters to file")
        hp.dump(join(hp.meta.path, 'hp.pkl'), save_pretty_textfile=True)

        print(hp)

        return hp
예제 #13
0
def analyze(experiment_path, output_path, objectives, ignores, ignore_vals, substitute, out_to_hp):

    # Remove unfinished experiments
    clean(experiment_path, objectives)

    # Get experiment dictionary
    lpath = walk_folders(experiment_path)

    # Get all experiment files
    ehp = expdd()
    eout = expdd()
    for path in lpath:
        try:
            hp = read_hp(os.path.join(path, 'hp.txt'), substitute)
            out = read_hp(os.path.join(path, 'out.txt'), substitute)
        except:
            continue

        ehp += hp
        eout += out

    # Make output directory
    make_dir(output_path)

    # Transfer output data to input
    for obj in out_to_hp:
        if obj in eout:
            ehp["out."+obj] = dd()
            ehp["out."+obj] = eout[obj]

    # Get exclusion
    arr, _ = ehp.get_array(list(ehp.keys())[0])
    include = np.ones(arr.size)
    if ignore_vals != None:
        for key, value in list(ignore_vals.items()):
            arr, _ = ehp.get_array(key)
            include *= (arr != value)

    fig = plt.figure()  #(figsize = (4,3))

    for xlabel in list(ehp.keys()):

        do_ignore = False
        for ignore in ignores:
            print(ignore, xlabel, ignore in xlabel)
            if ignore in xlabel:
                do_ignore = True
                break

        if do_ignore:
            continue

        fig.clf()
        ax = fig.add_subplot(111)
        ax.set_xlabel(xlabel)
        ax.set_ylabel('Objective')
        ax.set_title('Hyperparameter optimization for {}'.format(os.path.split(experiment_path)[1]))
        x, this_include = ehp.get_array(xlabel)
        this_include *= include

        # Do not output hyperparameters with fixed size
        if np.unique(x).size == 1:
            continue

        print("For {}, number of unique x is {}".format(xlabel, np.unique(x).size))
        if np.unique(x).size == 2:
            for val in np.unique(x):
                print('        {}'.format(val))

        # Convert object dtype to string
        if x.dtype == np.object:
            x = [str(obj) for obj in x]

        # Rotate string to 90 degrees for readability of graph
        if isinstance(x[0], str):

            for i in range(len(x)):
                x[i] = x[i].replace("\\",  "/")

                if 'path' in xlabel:
                    tail, head = os.path.split(x[i])
                    x[i] = os.path.join(os.path.split(tail)[1], head)

            un = np.unique(x).astype(sstr)
            xticks = un.copy()
            un = dict(list(zip(un, np.arange(un.size))))
            x = np.asarray([un[val] for val in x], dtype = sstr)
            ax.set_xticks(np.arange(xticks.size))
            ax.set_xticklabels(xticks)

            for ticklabel in ax.get_xticklabels():
                ticklabel.set_rotation(90)
                if 'path' in xlabel:
                    ticklabel.set_fontsize(3)

        for ylabel in list(eout.keys()):
            if ylabel in objectives:
                y, _ = eout.get_array(ylabel)
                ax.plot(x[this_include * (y!=0)],y[this_include * (y!=0)], marker = '.', markersize = 5, linestyle='none', label = ylabel)

        try:
            xlim = ax.get_xlim()
            ax.set_xlim(xlim[0]-(xlim[1]-xlim[0])*0.05, xlim[1]+(xlim[1]-xlim[0])*0.05)
            ax.legend(loc=1, fontsize=7)
            #ax.set_yscale('log')
            fig.savefig(os.path.join(output_path, os.path.split(experiment_path)[1] + "." + xlabel + '.png'), bbox_inches='tight', dpi=600)
        except:
            print_exc()
예제 #14
0
파일: sgd_.py 프로젝트: fulQuan/ift6266h15
    def __init__(self,
                 inp,
                 train_data,
                 valid_data=None,
                 max_epoch=10000,
                 lookback=1000,
                 minibatch_sz=100,
                 init_lr=None,
                 incr_lr=None,
                 lr=0.01,
                 decay_rate=0,
                 momentum=0.9,
                 momentum_reset_prob=0,
                 loops_per_epoch=1,
                 output_path=None,
                 log=dd()):
        """
        Implement stochastic gradient descent

        Parameters
        ----------
        inp:            theano tensor
                        model input

        train_data / valid_data:     tuple or function object
                        training/validation data with (examples, labels)
                        examples should be a ndarray with shape [nb_examples, dim1, dim2, ...]
                        labels should be None for unsupervised learning or a ndarray with shape [nb_examples]
                        If train_data is a function object, a call without argument should return a tuple.

        max_epoch:      integer type
                        Maximum number epochs

        lookback:       integer type
                        EMA smoothing period on validation cost, used for
                        early-stop, to prevent overfit

        minibatch_sz:   integer type
                        number of examples per parameter update

        init_lr:        float type
                        Initial learning rate

        incr_lr:        float type
                        Learning rate increment applied after each epoch until
                        init_lr + lr_increment >= lr.  Then the learning rate
                        lr is decayed.

        lr:             float type
                        Gradient descent's learning rate

        decay_rate:     float_type
                        Decay rate
                        learning_rate (t+1) = learning_rate(t) * decay_rate

        momentum:       float type
                        last_update*momentum + learning_rate*grad

        momentum_reset_prob: float type
                        probability of reseting momentum after on on an epoch

        log :           dict like object
                        Dictionary in which the trainer will log learning's
                        current status.

        output_path :   str
                        output path to which the trainer will dump the log dict
                        to text file and ouput a graph of the learning
                        progression


        """

        # Model input
        self.inp = inp
        self.max_epoch = max_epoch
        self.lookback = lookback
        self.minibatch_sz = int(minibatch_sz)
        self.init_lr = init_lr
        self.incr_lr = incr_lr
        self.high_lr = lr
        self.decay_rate = decay_rate
        self.momentum = momentum
        self.train_data = train_data
        self.valid_data = valid_data
        self.log = log
        self.output_path = output_path
        self.subplots = dd()
        self.do_validation = valid_data is not None
        self.momentum_reset_prob = momentum_reset_prob
        self.loops_per_epoch = loops_per_epoch

        # Learning data shared variables
        self.shr_train_data = shared_x(np.empty([0] * self.inp.ndim),
                                       name='training_set_data')
예제 #15
0
파일: sgd_.py 프로젝트: fulQuan/ift6266h15
    def __call__(self,
                 params,
                 train_cost,
                 valid_cost=None,
                 labels=None,
                 train_error=None,
                 valid_error=None,
                 model_id='',
                 additionnal_updates=[],
                 debug_calls=[],
                 debug_nodes={}):

        # Determine whether learning is supervised or not
        self.supervised = labels != None
        self.do_validation = self.do_validation and bool(valid_cost)
        print('\nInitializing sgd for {} learning of {} {} validation.'.format(
            {
                0: 'unsupervised',
                1: 'supervised'
            }[self.supervised], model_id, {
                0: 'without',
                1: 'with'
            }[self.do_validation]))

        # Initializations
        self.additionnal_updates = additionnal_updates if isinstance(
            additionnal_updates, list) else [additionnal_updates]
        self.learning_divergence_counter = 0
        self.rampup = bool(self.init_lr)
        self.lr = self.init_lr if self.rampup else self.high_lr

        # Outputs
        self.cost = dd()
        self.cost.epoch = []
        self.cost.train = []
        self.cost.train_ema = []
        self.cost.valid = []
        self.cost.valid_ema = []

        self.error = dd()
        self.error.epoch = []
        self.error.train = []
        self.error.train_ema = []
        self.error.valid = []
        self.error.valid_ema = []

        # Validation data
        self.valid_list = []
        self.valid_label_list = []

        # Input parameters
        index = T.lscalar(name='lscalar_index')
        minibatch_sz = T.lscalar(name="lscalar_minibatch_sz")
        momentum = T.fscalar(name="fscalar_momentum")
        learning_rate = T.fscalar(name="fscalar_learning_rate")

        # Save reference to parameters
        self.params = params

        # Compute gradient
        self.grads = T.grad(cost=train_cost,
                            wrt=params,
                            disconnected_inputs='ignore',
                            return_disconnected='None')

        # Keep only gradients that are connected to the update tree
        self.grads = dd([(param, grad)
                         for (param, grad) in zip(params, self.grads)
                         if grad != None])

        # Save initial value of parameters
        self.init = dd([(param, shared_x(param.get_value()))
                        for param in self.params])

        if self.do_validation:
            self.best_performance = sys.float_info.max
            self.best_params = dd([(param, param.get_value())
                                   for param in self.params])

        # Initialize lastupdates
        self.last_update = dd([(p, shared_x(np.zeros_like(p.get_value())))
                               for p in self.params])

        # Learning updates
        updates = []
        for param, grad in list(self.grads.items()):
            last = self.last_update[param]
            gradient = last * momentum + learning_rate * grad
            updates.append((param, param - gradient))
            updates.append((last, gradient))

        # Learning function
        if self.supervised:

            # Learning labels shared variables
            self.shr_train_labels = th.shared(np.empty([0] * labels.ndim,
                                                       dtype=labels.dtype),
                                              name='training_set_labels')

            # Learning function
            self.learningstep_fn = th.function(
                inputs=[index, minibatch_sz, momentum, learning_rate],
                outputs=[train_cost, train_error],
                updates=updates,
                givens={
                    self.inp:
                    self.shr_train_data[index * minibatch_sz:(index + 1) *
                                        minibatch_sz],
                    labels:
                    self.shr_train_labels[index * minibatch_sz:(index + 1) *
                                          minibatch_sz]
                },
                allow_input_downcast=True)

            # Noisy cost on train shared variables
            self.train_cost_fn = th.function(inputs=[],
                                             outputs=[valid_cost, valid_error],
                                             givens={
                                                 self.inp: self.shr_train_data,
                                                 labels: self.shr_train_labels
                                             },
                                             allow_input_downcast=True)

            # Clean reconstruction cost (for validation)
            self.valid_cost_fn = th.function(inputs=[self.inp, labels],
                                             outputs=[valid_cost, valid_error],
                                             allow_input_downcast=True)

            if debug_nodes:
                self.debug_fn = th.function(
                    inputs=[],
                    outputs=[output for output in list(debug_nodes.values())],
                    givens={
                        self.inp: self.shr_train_data,
                        labels: self.shr_train_labels
                    },
                    allow_input_downcast=True,
                    on_unused_input='ignore')

        else:
            # Theano learning function
            self.learningstep_fn = th.function(
                inputs=[index, minibatch_sz, momentum, learning_rate],
                outputs=train_cost,
                updates=updates,
                givens={
                    self.inp:
                    self.shr_train_data[index * minibatch_sz:(index + 1) *
                                        minibatch_sz]
                },
                allow_input_downcast=True,
                on_unused_input='ignore')

            # Noisy cost on train shared variables
            self.train_cost_fn = th.function(
                inputs=[],
                outputs=valid_cost,
                givens={self.inp: self.shr_train_data},
                allow_input_downcast=True)

            # Clean reconstruction cost (for validation)
            self.valid_cost_fn = th.function(inputs=[self.inp],
                                             outputs=valid_cost,
                                             allow_input_downcast=True)

            if debug_nodes:
                self.debug_fn = th.function(
                    inputs=[],
                    outputs=[output for output in list(debug_nodes.values())],
                    givens={self.inp: self.shr_train_data},
                    allow_input_downcast=True,
                    on_unused_input='ignore')

        # Debug
        self.debug_calls = debug_calls
        self.debug_nodes = dd(debug_nodes)
        self.model_id = model_id

        # For learning stats graph outputs
        self.last_batch_param = dd([(param, shared_copy(param))
                                    for param in self.params])
        self.last_batch_update = dd([(param, shared_zeros_like(param))
                                     for param in self.params])
        self.this_batch_update = dd([(param, shared_zeros_like(param))
                                     for param in self.params])

        updates = []
        for param in self.params:
            updates += [(self.last_batch_update[param],
                         self.this_batch_update[param])]
            updates += [(self.this_batch_update[param],
                         param - self.last_batch_param[param])]
            updates += [(self.last_batch_param[param], param)]
        self.update_learning_stats_fn = th.function(inputs=[],
                                                    outputs=[],
                                                    updates=updates)

        # Initialize learning stats plots
        if self.subplots:
            for sp in list(self.subplots.values()):
                sp.clf()
            self.subplots = dd()

        self.subplots.graphs = subplots(1,
                                        1 + self.supervised,
                                        3,
                                        3 + 3 * self.supervised,
                                        projection='recurrent')

        line_names = ['p005', 'median', 'p995', 'std']
        line_labels = ['0.5%', 'median', '99.5%', 'std']
        for param in self.params:
            nb_plots = 3 if param.ndim == 1 else 6
            sp = subplots(2, nb_plots, 6, nb_plots * 3, projection='recurrent')
            for i in range(nb_plots):
                for name, label in zip(line_names, line_labels):
                    sp[1, i].add_line(name=name, label=label)
                sp[1, i].set(xlabel='epoch',
                             xscale='log',
                             xtick_fontsize=6,
                             ytick_fontsize=6)
                sp[1, i].legend(loc='upper center', fontsize=6)
                sp[0, i].set(xtick_fontsize=6, ytick_fontsize=6)
            if nb_plots == 6:
                sp[1, 3].set(yscale='log')
            self.subplots[param] = sp

        for node in list(self.debug_nodes.values()):
            sp = subplots(2, 1, 6, 3, projection='recurrent')
            for name, label in zip(line_names, line_labels):
                sp[1, 0].add_line(name=name, label=label)
            sp[1, 0].add_line(name='nonzero', label='non-zeros')
            sp[1, 0].set(xlabel='epoch',
                         xscale='log',
                         xtick_fontsize=6,
                         ytick_fontsize=6)
            sp[1, 0].legend(loc='upper center', fontsize=6)
            sp[0, 0].set(xtick_fontsize=6, ytick_fontsize=6)
            self.subplots[node] = sp

        self.subplots.graphs[0, 0].set(xlabel='epoch',
                                       yscale='log',
                                       xtick_fontsize=8,
                                       ytick_fontsize=8)
        self.subplots.graphs[0, 0].set_title(
            'cost'
            ' - {}'.format(model_id) if model_id else None, fontsize=10)
        self.subplots.graphs[0, 0].add_line(name='train', label='train')
        self.subplots.graphs[0, 0].add_line(name='train_ema',
                                            label='train (EMA)')
        if self.do_validation:
            self.subplots.graphs[0, 0].add_line(name='valid', label='valid')
            self.subplots.graphs[0, 0].add_line(name='valid_ema',
                                                label='valid (EMA)')
        self.subplots.graphs[0, 0].legend(loc='best', fontsize=6)

        if self.supervised:
            self.subplots.graphs[0, 1].set(xlabel='epoch',
                                           yscale='log',
                                           xtick_fontsize=8,
                                           ytick_fontsize=8)
            self.subplots.graphs[0, 1].set_title('error', fontsize=10)
            self.subplots.graphs[0, 1].add_line(name='train', label='train')
            self.subplots.graphs[0, 1].add_line(name='train_ema',
                                                label='train (EMA)')
            if self.do_validation:
                self.subplots.graphs[0, 1].add_line(name='valid',
                                                    label='valid')
                self.subplots.graphs[0, 1].add_line(name='valid_ema',
                                                    label='valid (EMA)')
            self.subplots.graphs[0, 1].legend(loc='best', fontsize=6)

        return self
예제 #16
0
 def __init__(self, function, statics):
     from libutils.dict import dd
     self.__function = function
     self.statics = dd(statics)
예제 #17
0
 def additionnal_update(self):
     return th.function(inputs=[], outputs=[], updates=dd({self.W:self.W/T.sqrt((self.W**2).sum(3).sum(2).sum(1))[:,None,None,None]}))