Ejemplo n.º 1
0
    def load_data(self, type='train', batch=0, verbose=2):
        """
        Will load the data from the file and will return the data. The important thing to note 
        is that all the datasets in :mod: ``yann`` all require a ``y`` or a variable to 
        predict. In case of auto-encoder for instance, the thing to predict is the image 
        itself. Setup dataset thusly.

        Args: 
            type: ``train``, ``test`` or ``valid``.
                  default is ``train``
            batch: Supply an integer
                   
            verbose: Simliar to verbose in toolbox.

        Returns:
            numpy.ndarray: ``data_x, data_y`` 
        """
        if verbose >= 3:
            print "... loading " + type + " data batch " + str(batch)

        f = open(self.dataset + '/' + type + '/batch_' + str(batch) + '.pkl',
                 'rb')
        data_x, data_y = cPickle.load(f)
        f.close()

        if verbose >= 3:
            print "... data is loaded"

        data_x = check_type(data_x, theano.config.floatX)
        data_y = check_type(data_y, theano.config.floatX)
        # Theano recommends storing on gpus only as floatX and casts them to ints during use.
        # I don't know why, but I am following their recommendations blindly.
        return data_x, data_y
Ejemplo n.º 2
0
    def load_data (self, type = 'train', batch = 0, verbose = 2):
        """
        Will load the data from the file and will return the data. The important thing to note 
        is that all the datasets in :mod: ``yann`` all require a ``y`` or a variable to 
        predict. In case of auto-encoder for instance, the thing to predict is the image 
        itself. Setup dataset thusly.

        Args: 
            type: ``train``, ``test`` or ``valid``.
                  default is ``train``
            batch: Supply an integer
                   
            verbose: Simliar to verbose in toolbox.

        Returns:
            numpy.ndarray: ``data_x, data_y`` 
        """
        if verbose >= 3: 
            print "... loading " + type + " data batch " + str(batch) 

        f = open(self.dataset + '/' + type + '/batch_' + str(batch) +'.pkl', 'rb')
        data_x, data_y = cPickle.load(f)
        f.close()   

        if verbose >= 3:
            print "... data is loaded"  
        
        data_x = check_type (data_x, theano.config.floatX)
        data_y = check_type (data_y, theano.config.floatX)
        # Theano recommends storing on gpus only as floatX and casts them to ints during use.
        # I don't know why, but I am following their recommendations blindly.
        return data_x, data_y         
Ejemplo n.º 3
0
    def one_hot_labels(self, y, verbose=1):
        """
        Function takes in labels and returns a one-hot encoding. Used for max-margin loss.
        Args:
            y: Labels to be encoded.n_classes
            verbose: Typical as in the rest of the toolbox.

        Notes:
            ``self.n_classes``: Number of unique classes in the labels.

                       This could be found out using the following:
                       .. code-block: python
                           
                           import numpy
                           n_classes = len(numpy.unique(y))
                        
                      This might be potentially dangerous in case of cached dataset. Although 
                      this is the default if ``n_classes`` is not provided as input to this 
                      module, I discourage anyone from using this. 
        Returns:
            numpy ndarray: one-hot encoded label list.
        """

        if self.n_classes is False:
            if verbose >= 3:
                print "... Making a decision to create n_classes variable, not a good idea."
            self.n_classes = len(numpy.unique(y))

        # found this technique online somewhere, forgot where couldn't cite.
        y1 = -1 * numpy.ones((y.shape[0], self.n_classes))
        y1[numpy.arange(y.shape[0]), y] = 1
        y1 = check_type(y1, theano.config.floatX)
        return y1
Ejemplo n.º 4
0
    def one_hot_labels (self, y, verbose = 1):
        """
        Function takes in labels and returns a one-hot encoding. Used for max-margin loss.
        Args:
            y: Labels to be encoded.n_classes
            verbose: Typical as in the rest of the toolbox.

        Notes:
            ``self.n_classes``: Number of unique classes in the labels.

                       This could be found out using the following:
                       .. code-block: python
                           
                           import numpy
                           n_classes = len(numpy.unique(y))
                        
                      This might be potentially dangerous in case of cached dataset. Although 
                      this is the default if ``n_classes`` is not provided as input to this 
                      module, I discourage anyone from using this. 
        Returns:
            numpy ndarray: one-hot encoded label list.
        """

        if self.n_classes is False:
            if verbose >= 3:
                print "... Making a decision to create n_classes variable, not a good idea."
            self.n_classes = len(numpy.unique(y)) 

        # found this technique online somewhere, forgot where couldn't cite.
        y1 = -1 * numpy.ones((y.shape[0], self.n_classes))
        y1[numpy.arange(y.shape[0]), y] = 1	   
        y1 = check_type(y1, theano.config.floatX)         
        return y1