def preprocess(params): # Data processing: encode text inputs into numeric vectors processor = preprocessing.VocabularyProcessor( max_document_length=params.max_sequence_length, min_frequency=params.min_frequency) encoded_inputs = list(processor.fit_transform(inputs)) vocab_size = len(processor.vocabulary_) encoded_inputs = np.array(encoded_inputs) encoded_labels = np.array([int(label == 'ham') for label in labels]) # Shuffle and split data np.random.seed(0) shuffled_ix = np.random.permutation(np.arange(len(encoded_labels))) x_shuffled = encoded_inputs[shuffled_ix] y_shuffled = encoded_labels[shuffled_ix] # Split train/test set ix_cutoff = int(len(y_shuffled) * 0.80) x_train, x_test = x_shuffled[:ix_cutoff], x_shuffled[ix_cutoff:] y_train, y_test = y_shuffled[:ix_cutoff], y_shuffled[ix_cutoff:] if hype.util.is_debug_logged(): hype.util.debug('Vocabulary size: %d' % vocab_size) hype.util.debug('Train/test split: train=%d, test=%d' % (len(y_train), len(y_test))) train = hype.DataSet(x_train, y_train) test = hype.DataSet(x_test, y_test) data = hype.Data(train, test, test) return data, vocab_size
def get_cifar10_data(validation_size=5000, one_hot=True): from tflearn.datasets import cifar10 (x_train, y_train), (x_test, y_test) = cifar10.load_data('temp-cifar10/data', one_hot=one_hot) x_val = x_train[:validation_size] y_val = y_train[:validation_size] x_train = x_train[validation_size:] y_train = y_train[validation_size:] return hype.Data(train=hype.DataSet(x_train, y_train), validation=hype.DataSet(x_val, y_val), test=hype.DataSet(x_test, y_test))
def preprocess(params): # Data processing: encode text inputs into numeric vectors processor = preprocessing.VocabularyProcessor( max_document_length=params.max_sequence_length, min_frequency=params.min_frequency) encoded_inputs = list(processor.fit_transform(inputs)) vocab_size = len(processor.vocabulary_) # Set this to see verbose output: # hype.util.set_verbose() if hype.util.is_debug_logged(): hype.util.debug('Encoded text examples:') for i in range(3): hype.util.debug(' %s ->' % inputs[i]) hype.util.debug(' %s\n' % encoded_inputs[i].tolist()) encoded_inputs = np.array(encoded_inputs) encoded_labels = np.array([int(label == 'ham') for label in labels]) # Shuffle the data shuffled_ix = np.random.permutation(np.arange(len(encoded_labels))) x_shuffled = encoded_inputs[shuffled_ix] y_shuffled = encoded_labels[shuffled_ix] # Split into train/validation/test sets idx1 = int(len(y_shuffled) * 0.75) idx2 = int(len(y_shuffled) * 0.85) x_train, x_val, x_test = x_shuffled[:idx1], x_shuffled[ idx1:idx2], x_shuffled[idx2:] y_train, y_val, y_test = y_shuffled[:idx1], y_shuffled[ idx1:idx2], y_shuffled[idx2:] if hype.util.is_debug_logged(): hype.util.debug('Vocabulary size: %d' % vocab_size) hype.util.debug( 'Train/validation/test split: train=%d, val=%d, test=%d' % (len(y_train), len(y_val), len(y_test))) train = hype.DataSet(x_train, y_train) validation = hype.DataSet(x_val, y_val) test = hype.DataSet(x_test, y_test) data = hype.Data(train, validation, test) return data, vocab_size
flat = tf.reshape(pool2, [-1, pool2.shape[1] * pool2.shape[2] * pool2.shape[3]]) dense = tf.layers.dense(inputs=flat, units=1024, activation=tf.nn.relu) logits = tf.layers.dense(inputs=dense, units=10) loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y), name='loss') optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train_op = optimizer.minimize(loss_op, name='minimize') accuracy = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1)), tf.float32), name='accuracy') tf_data_sets = input_data.read_data_sets('temp-mnist/data', one_hot=True) convert = lambda data_set: hype.DataSet( data_set.images.reshape((-1, 28, 28, 1)), data_set.labels) data = hype.Data(train=convert(tf_data_sets.train), validation=convert(tf_data_sets.validation), test=convert(tf_data_sets.test)) solver_params = { 'batch_size': 1000, 'eval_batch_size': 2500, 'epochs': 10, 'evaluate_test': True, 'eval_flexible': False, } solver = hype.TensorflowSolver(data=data, **solver_params) solver.train()
import hyperengine as hype from sklearn import datasets from sklearn.model_selection import train_test_split x_data, y_data = datasets.load_boston(return_X_y=True) x_split, x_test, y_split, y_test = train_test_split(x_data, y_data, test_size=0.33, random_state=42) x_train, x_val, y_train, y_val = train_test_split(x_split, y_split, test_size=0.33, random_state=7) data = hype.Data(train=hype.DataSet(x_train, y_train), validation=hype.DataSet(x_val, y_val), test=hype.DataSet(x_test, y_test)) def dnn_model(params): x = tf.placeholder(shape=[None, 13], dtype=tf.float32, name='input') y = tf.placeholder(shape=None, dtype=tf.float32, name='label') mode = tf.placeholder(tf.string, name='mode') training = tf.equal(mode, 'train') weights = { 1: tf.Variable(tf.random_normal([13, 6], stddev=0.1)), 2: tf.Variable(tf.random_normal([6, 6], stddev=0.1)), 'output': tf.Variable(tf.random_normal([6, 1], stddev=0.1)) } biases = {
def __init__(self, config): BaseAgent.__init__(self, config) self.hyper_data = hyper.Data(train=self.data_loader.initialize('train'), validation=self.data_loader.initialize('test')) self.setup_parameters()
# tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. loss = tf.reduce_mean(tf.nn.nce_loss(nce_weights, nce_biases, labels=labels, inputs=embed, num_sampled=params.negative_samples, num_classes=params.vocab_size), name='loss') optimizer = tf.train.AdamOptimizer(params.learning_rate) optimizer.minimize(loss, name='minimize') provider = SentenceDataProvider() provider.build() data = hype.Data(train=provider, validation=None, test=None) word2vec_model(params=hype.spec.new( vocab_size=provider.vocab_size, embedding_size=128, negative_samples=64, learning_rate=0.01, )) solver_params = { 'batch_size': 1024, 'epochs': 5, 'eval_flexible': False, } solver = hype.TensorflowSolver(data=data, **solver_params) solver.train()