Esempio n. 1
0
def SdAWrapper(X, batch_size = 128, layers = [512, 64], corruption_levels = [0.3, 0.3], pretrain_epochs = 100, pretrain_lr = 0.001):
    X = theano.shared(np.asarray(X, dtype = theano.config.floatX), borrow = True)
    n_samples, n_vars = X.get_value(borrow=True).shape
    n_train_batches = n_samples / batch_size
    numpy_rng = numpy.random.RandomState(23432)
    ###############
    # BUILD MODEL #
    ###############
    logging.info('Building model')
    logging.info(str(n_vars) + ' -> ' + ' -> '.join(map(str, layers)))

    sda = SdA(numpy_rng=numpy_rng, n_ins=n_vars,
              hidden_layers_sizes=layers)

    logging.info('Compiling training functions')
    pretraining_fns = sda.pretraining_functions(train_set_x=X,
                                                batch_size=batch_size)

    #####################
    # TRAINING MODEL #
    #####################
    logging.info('Training model')
    for i in xrange(sda.n_layers):
        for epoch in xrange(pretrain_epochs):
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            corruption=corruption_levels[i],
                                            lr=pretrain_lr))
            logging.info('Training layer {}, epoch {}, cost {}'.format(
                i, epoch, numpy.mean(c)))

    return sda
Esempio n. 2
0
def main():

    print('Loading data from file...')
    f = h5py.File('feature_matrix.h5', 'r')
    data = f['data'][:]
    col = f['col'][:]
    row = f['row'][:]
    shape = f['shape'][:]

    matrix = csc_matrix(
        (numpy.array(data), (numpy.array(row), numpy.array(col))),
        shape=(shape[0], shape[1]),
        dtype=numpy.uint8)

    print(matrix.shape)

    # exit(1)

    batch_size = 10
    n_samples, n_vars = matrix.shape
    n_train_batches = n_samples / batch_size

    numpy_rng = numpy.random.RandomState(23432)

    # build model
    print('Building model...')

    sda = SdA(numpy_rng=numpy_rng,
              n_ins=n_vars,
              hidden_layers_sizes=[
                  int(sys.argv[2]),
                  int(sys.argv[3]),
                  int(sys.argv[4])
              ])

    print('configuring...')
    pretraining_fns = sda.pretraining_functions(train_set_x=matrix.todense(),
                                                batch_size=batch_size)

    print('training...')
    pretraining_epochs = 15
    pretrain_lr = 0.001
    corruption_levels = [0.1, 0.2, 0.3] + [0.4] * sda.n_layers
    for i in xrange(sda.n_layers):
        for epoch in xrange(pretraining_epochs):
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            corruption=corruption_levels[i],
                                            lr=pretrain_lr))

    y = sda.get_lowest_hidden_values(matrix)
    get_y = theano.function([], y)
    y_val = get_y()
    print_array(y_val, index=len(sys.argv) - 1)
Esempio n. 3
0
def main(args):
	logging.info('Loading data')
	
	X, index = load_data(args.input)

	# Compute number of minibatches
	batch_size = 10
	n_samples, n_vars = X.get_value(borrow=True).shape
	n_train_batches = n_samples / batch_size

	numpy_rng = numpy.random.RandomState(23432)

	###############
	# BUILD MODEL #
	###############
	logging.info('Building model')
	logging.info(str(n_vars) + ' -> ' + ' -> '.join(map(str, args.l)))

	sda = SdA(numpy_rng=numpy_rng, n_ins=n_vars,
			  hidden_layers_sizes=args.l)

	#####################
	# TRAINING MODEL #
	#####################
	logging.info('Compiling training functions')
	pretraining_fns = sda.pretraining_functions(train_set_x=X,
												batch_size=batch_size)

	logging.info('Training model')
	pretraining_epochs = 15
	pretrain_lr = 0.001
	corruption_levels = [0.1, 0.2, 0.3] + [0.4] * sda.n_layers
	for i in xrange(sda.n_layers):
		for epoch in xrange(pretraining_epochs):
			c = []
			for batch_index in xrange(n_train_batches):
				c.append(pretraining_fns[i](index=batch_index,
											corruption=corruption_levels[i],
											lr=pretrain_lr))
			logging.info('Training layer {}, epoch {}, cost {}'.format(
				i, epoch, numpy.mean(c)))

	y = sda.get_lowest_hidden_values(X)
	get_y = theano.function([], y)
	y_val = get_y()
	print_array(y_val, index=index)
Esempio n. 4
0
def main(args):
    logging.info('Loading data')

    X, index = load_data(args.input)

    # Compute number of minibatches
    batch_size = 10
    n_samples, n_vars = X.get_value(borrow=True).shape
    n_train_batches = n_samples / batch_size

    numpy_rng = numpy.random.RandomState(23432)

    ###############
    # BUILD MODEL #
    ###############
    logging.info('Building model')
    logging.info(str(n_vars) + ' -> ' + ' -> '.join(map(str, args.l)))

    sda = SdA(numpy_rng=numpy_rng, n_ins=n_vars, hidden_layers_sizes=args.l)

    #####################
    # TRAINING MODEL #
    #####################
    logging.info('Compiling training functions')
    pretraining_fns = sda.pretraining_functions(train_set_x=X,
                                                batch_size=batch_size)

    logging.info('Training model')
    pretraining_epochs = 15
    pretrain_lr = 0.001
    corruption_levels = [0.1, 0.2, 0.3] + [0.4] * sda.n_layers
    for i in xrange(sda.n_layers):
        for epoch in xrange(pretraining_epochs):
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            corruption=corruption_levels[i],
                                            lr=pretrain_lr))
            logging.info('Training layer {}, epoch {}, cost {}'.format(
                i, epoch, numpy.mean(c)))

    y = sda.get_lowest_hidden_values(X)
    get_y = theano.function([], y)
    y_val = get_y()
    print_array(y_val, index=index)
Esempio n. 5
0
def SdAWrapper(X,
               batch_size=128,
               layers=[512, 64],
               corruption_levels=[0.3, 0.3],
               pretrain_epochs=100,
               pretrain_lr=0.001):
    X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True)
    n_samples, n_vars = X.get_value(borrow=True).shape
    n_train_batches = n_samples / batch_size
    numpy_rng = numpy.random.RandomState(23432)
    ###############
    # BUILD MODEL #
    ###############
    logging.info('Building model')
    logging.info(str(n_vars) + ' -> ' + ' -> '.join(map(str, layers)))

    sda = SdA(numpy_rng=numpy_rng, n_ins=n_vars, hidden_layers_sizes=layers)

    logging.info('Compiling training functions')
    pretraining_fns = sda.pretraining_functions(train_set_x=X,
                                                batch_size=batch_size)

    #####################
    # TRAINING MODEL #
    #####################
    logging.info('Training model')
    for i in xrange(sda.n_layers):
        for epoch in xrange(pretrain_epochs):
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            corruption=corruption_levels[i],
                                            lr=pretrain_lr))
            logging.info('Training layer {}, epoch {}, cost {}'.format(
                i, epoch, numpy.mean(c)))

    return sda