Example #1
0
    def preprocess(self, copy=False):
        """
        Creates a preprocessing function according to the settings, the function is used by the user of the dataset
        :param copy: Boolean specifying if there is a need to copy the input matrix
        :return:
        """
        path = self.dataset_path
        if not os.path.isdir(self.dataset_path):
            path = os.path.dirname(os.path.abspath(self.dataset_path))

        if self.normalize_data:
            self.preprocessors = (functools.partial(preprocessing.normalize,
                                                    copy=copy),
                                  functools.partial(preprocessing.normalize,
                                                    copy=copy))

        if self.scale:
            self.preprocessors = (preprocessing.StandardScaler(copy=copy).fit(
                self.trainset[0]).transform,
                                  preprocessing.StandardScaler(copy=copy).fit(
                                      self.trainset[1]).transform)

        if self.scale_rows:
            self.preprocessors = (functools.partial(preprocessing.scale,
                                                    copy=copy,
                                                    axis=1),
                                  functools.partial(preprocessing.scale,
                                                    copy=copy,
                                                    axis=1))

        if not self.pca[0] == 0:
            self.preprocessors = (PCA(self.pca[0],
                                      copy=copy,
                                      whiten=self.whiten).fit(
                                          self.trainset[0].copy()).transform,
                                  lambda x: x)

        if not self.pca[1] == 0:
            self.preprocessors = (lambda x: x,
                                  PCA(self.pca[0],
                                      copy=copy,
                                      whiten=self.whiten).fit(
                                          self.trainset[1].copy()).transform)

        if self.whiten:
            OutputLog().write('using whiten')
            pca_dim1 = PCA(whiten=True)
            pca_dim2 = PCA(whiten=True)

            pca_dim1.fit(self.trainset[0])
            pca_dim2.fit(self.trainset[1])

            self.trainset = (pca_dim1.transform(self.trainset[0]),
                             pca_dim2.transform(self.trainset[1]))
            self.testset = (pca_dim1.transform(self.testset[0]),
                            pca_dim2.transform(self.testset[1]))
            self.tuning = (pca_dim1.transform(self.tuning[0]),
                           pca_dim2.transform(self.tuning[1]))
Example #2
0
    def __init__(self, data_set_parameters):

        OutputLog().write('Loading dataset: ' + data_set_parameters['name'])

        self.dataset_path = data_set_parameters['path']

        self.trainset = None
        self.testset = None
        self.tuning = None

        self.reduce_val = 0
        self.x_y_mapping = {'train': None, 'dev': None, 'test': None}
        self.x_reduce = {'train': None, 'dev': None, 'test': None}

        self.data_set_parameters = data_set_parameters
        self.scale = bool(int(data_set_parameters['scale']))
        self.scale_rows = bool(int(data_set_parameters['scale_samples']))
        self.whiten = bool(int(data_set_parameters['whiten']))
        self.pca = map(int, data_set_parameters['pca'].split())
        self.normalize_data = bool(int(data_set_parameters['normalize']))
        self.preprocessors = None
Example #3
0
    def load(self):
        """
        Dataset can be saved as three .npy files each containing a tuple of matrices, or the data can be read manually through
        the build_dataset method. The dataset is composed of three tuples for training, testing and validation. Each
        tuple contains two matrices one for the X view and the Y view.

        The matrices are of size MxD1 and MxD2, where M is the number of samples and D1 and D2 are the dimensionality of views
        X and Y respectively.
        :return:
        """
        path = self.dataset_path
        if not os.path.isdir(self.dataset_path):
            path = os.path.dirname(os.path.abspath(self.dataset_path))

        params = os.path.join(path, 'params.p')

        try:
            self.trainset = self.load_cache(path, 'train')
            self.testset = self.load_cache(path, 'test')
            self.tuning = self.load_cache(path, 'validate')

            try:
                self.x_y_mapping['test'] = numpy.load(
                    os.path.join(path, 'mapping_test.npy'), 'r')
                self.x_y_mapping['dev'] = numpy.load(
                    os.path.join(path, 'mapping_dev.npy'), 'r')
                self.x_reduce = cPickle.load(
                    open(os.path.join(path, 'reduce.p'), 'r'))
            except:
                OutputLog().write('Failed loading mappings')
                self.generate_mapping()

                # Save mapping to disk
                numpy.save(os.path.join(path, 'mapping_test'),
                           self.x_y_mapping['test'])
                numpy.save(os.path.join(path, 'mapping_dev'),
                           self.x_y_mapping['dev'])

                with open(os.path.join(path, 'reduce.p'), 'w') as reduce_file:
                    cPickle.dump(self.x_reduce, reduce_file)

            with open(params) as params_file:
                loaded_params = cPickle.load(params_file)

            OutputLog().write(
                'Loaded dataset params: {0}'.format(loaded_params))

        except Exception as e:
            OutputLog().write(
                'Failed loading from local cache with exception: {}'.format(e))
            self.build_dataset()

        self.preprocess()

        OutputLog().write(
            'Dataset dimensions = %d, %d' %
            (self.trainset[0].shape[1], self.trainset[1].shape[1]))
        OutputLog().write('Training set size = %d' % self.trainset[0].shape[0])
        OutputLog().write('Test set size = %d' % self.testset[0].shape[0])

        OutputLog().write('Dataset params: {0}'.format(
            self.data_set_parameters))

        if self.tuning is not None:
            OutputLog().write('Tuning set size = %d' % self.tuning[0].shape[0])
Example #4
0
 def print_params(cls):
     OutputLog().write('Params:\n')
     for (key, value) in cls.__dict__.iteritems():
         if not key.startswith('__'):
             OutputLog().write('{0}: {1}'.format(key, value))
Example #5
0
def test_model(model_x,
               model_y,
               dataset_x,
               dataset_y,
               preprocessors=None,
               reduce=0):
    test_x = dataset_x
    test_y = dataset_y

    x_total_value = None
    y_total_value = None

    if preprocessors is None:
        preprocessors = (None, None)

    for index, batch in enumerate(
            iterate_single_minibatch(test_x,
                                     Params.VALIDATION_BATCH_SIZE,
                                     False,
                                     preprocessor=preprocessors[0])):
        x_values = model_y(batch)[0]

        if x_total_value is None:
            x_total_value = x_values
        else:
            x_total_value = numpy.vstack((x_total_value, x_values))

    for index, batch in enumerate(
            iterate_single_minibatch(test_y,
                                     Params.VALIDATION_BATCH_SIZE,
                                     False,
                                     preprocessor=preprocessors[1])):

        y_values = model_x(batch)[0]

        if y_total_value is None:
            y_total_value = y_values
        else:
            y_total_value = numpy.vstack((y_total_value, y_values))

    for index, (x_tilde,
                y_tilde) in enumerate(zip(y_total_value, x_total_value)):
        x_tilde_reshape = x_tilde.reshape((28, 14), order='F')
        y_tilde_reshape = y_tilde.reshape((28, 14), order='F')

        x_reshape = test_x[index].reshape((28, 14), order='F')
        y_reshape = test_y[index].reshape((28, 14), order='F')

        image_tilde_x = numpy.hstack((x_tilde_reshape, y_reshape))
        image_tilde_y = numpy.hstack((x_reshape, y_tilde_reshape))

        image_tilde_x = (image_tilde_x + abs(image_tilde_x)) / 2
        image_tilde_y = (image_tilde_y + abs(image_tilde_y)) / 2

        pyplot.imsave(os.path.join('/home/avive/theses/MNIST_results2/x/',
                                   '{0}.jpg'.format(index)),
                      image_tilde_x,
                      cmap='Greys_r')
        pyplot.imsave(os.path.join('/home/avive/theses/MNIST_results2/y/',
                                   '{0}.jpg'.format(index)),
                      image_tilde_y,
                      cmap='Greys_r')

    header = ['layer', 'loss', 'corr', 'search1', 'search5', 'desc1', 'desc5']

    rows = []

    search_recall, describe_recall = complete_rank(x_total_value,
                                                   y_total_value, reduce)

    loss = euclidean_error(x_total_value, y_total_value)
    correlation = calculate_correlation(x_total_value, y_total_value)

    print_row = ["{0} ".format(Params.OUTPUT_LAYER), loss, correlation]
    print_row.extend(search_recall)
    print_row.extend(describe_recall)

    rows.append(print_row)

    OutputLog().write(tabulate(rows, headers=header))
Example #6
0
VALIDATE_ALL = False
MEMORY_LIMIT = 8000000.

if __name__ == '__main__':

    data_set_config = sys.argv[1]
    if len(sys.argv) > 2:
        top = int(sys.argv[2])
    else:
        top = 0

    model_results = {'train': [], 'validate': []}

    results_folder = os.path.join(os.getcwd(), 'results')

    OutputLog().set_path(results_folder)
    OutputLog().set_verbosity('info')

    data_config = ConfigParser.ConfigParser()
    data_config.read(data_set_config)
    data_parameters = ConfigSectionMap("dataset_parameters", data_config)

    # construct data set
    data_set = Container().create(data_parameters['name'], data_parameters)
    data_set.load()

    Params.print_params()

    # Export network
    path = OutputLog().output_path
Example #7
0
        if preprocessor is not None:
            yield preprocessor(numpy.copy(buffer[excerpt]))
        else:
            yield buffer[excerpt]


if __name__ == '__main__':

    data_set_config = sys.argv[1]

    model_results = {'train': [], 'validate': []}

    results_folder = os.path.join(os.getcwd(), 'results')

    OutputLog().set_path(results_folder)
    OutputLog().set_verbosity('info')

    data_config = ConfigParser.ConfigParser()
    data_config.read(data_set_config)
    data_parameters = ConfigSectionMap("dataset_parameters", data_config)

    # construct data set
    data_set = Container().create(data_parameters['name'], data_parameters)
    data_set.load()

    y_var = tensor.matrix()
    x_var = tensor.matrix()

    model = tied_dropout_iterative_model