def preprocess(params):
    # Data processing: encode text inputs into numeric vectors
    processor = preprocessing.VocabularyProcessor(
        max_document_length=params.max_sequence_length,
        min_frequency=params.min_frequency)
    encoded_inputs = list(processor.fit_transform(inputs))
    vocab_size = len(processor.vocabulary_)

    encoded_inputs = np.array(encoded_inputs)
    encoded_labels = np.array([int(label == 'ham') for label in labels])

    # Shuffle and split data
    np.random.seed(0)
    shuffled_ix = np.random.permutation(np.arange(len(encoded_labels)))
    x_shuffled = encoded_inputs[shuffled_ix]
    y_shuffled = encoded_labels[shuffled_ix]

    # Split train/test set
    ix_cutoff = int(len(y_shuffled) * 0.80)
    x_train, x_test = x_shuffled[:ix_cutoff], x_shuffled[ix_cutoff:]
    y_train, y_test = y_shuffled[:ix_cutoff], y_shuffled[ix_cutoff:]

    if hype.util.is_debug_logged():
        hype.util.debug('Vocabulary size: %d' % vocab_size)
        hype.util.debug('Train/test split: train=%d, test=%d' %
                        (len(y_train), len(y_test)))

    train = hype.DataSet(x_train, y_train)
    test = hype.DataSet(x_test, y_test)
    data = hype.Data(train, test, test)

    return data, vocab_size
Exemple #2
0
def get_cifar10_data(validation_size=5000, one_hot=True):
    from tflearn.datasets import cifar10
    (x_train, y_train), (x_test,
                         y_test) = cifar10.load_data('temp-cifar10/data',
                                                     one_hot=one_hot)
    x_val = x_train[:validation_size]
    y_val = y_train[:validation_size]
    x_train = x_train[validation_size:]
    y_train = y_train[validation_size:]
    return hype.Data(train=hype.DataSet(x_train, y_train),
                     validation=hype.DataSet(x_val, y_val),
                     test=hype.DataSet(x_test, y_test))
def preprocess(params):
    # Data processing: encode text inputs into numeric vectors
    processor = preprocessing.VocabularyProcessor(
        max_document_length=params.max_sequence_length,
        min_frequency=params.min_frequency)
    encoded_inputs = list(processor.fit_transform(inputs))
    vocab_size = len(processor.vocabulary_)

    # Set this to see verbose output:
    #   hype.util.set_verbose()

    if hype.util.is_debug_logged():
        hype.util.debug('Encoded text examples:')
        for i in range(3):
            hype.util.debug('  %s ->' % inputs[i])
            hype.util.debug('  %s\n' % encoded_inputs[i].tolist())

    encoded_inputs = np.array(encoded_inputs)
    encoded_labels = np.array([int(label == 'ham') for label in labels])

    # Shuffle the data
    shuffled_ix = np.random.permutation(np.arange(len(encoded_labels)))
    x_shuffled = encoded_inputs[shuffled_ix]
    y_shuffled = encoded_labels[shuffled_ix]

    # Split into train/validation/test sets
    idx1 = int(len(y_shuffled) * 0.75)
    idx2 = int(len(y_shuffled) * 0.85)
    x_train, x_val, x_test = x_shuffled[:idx1], x_shuffled[
        idx1:idx2], x_shuffled[idx2:]
    y_train, y_val, y_test = y_shuffled[:idx1], y_shuffled[
        idx1:idx2], y_shuffled[idx2:]

    if hype.util.is_debug_logged():
        hype.util.debug('Vocabulary size: %d' % vocab_size)
        hype.util.debug(
            'Train/validation/test split: train=%d, val=%d, test=%d' %
            (len(y_train), len(y_val), len(y_test)))

    train = hype.DataSet(x_train, y_train)
    validation = hype.DataSet(x_val, y_val)
    test = hype.DataSet(x_test, y_test)
    data = hype.Data(train, validation, test)

    return data, vocab_size
flat = tf.reshape(pool2,
                  [-1, pool2.shape[1] * pool2.shape[2] * pool2.shape[3]])
dense = tf.layers.dense(inputs=flat, units=1024, activation=tf.nn.relu)
logits = tf.layers.dense(inputs=dense, units=10)

loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                                 labels=y),
                         name='loss')
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss_op, name='minimize')
accuracy = tf.reduce_mean(tf.cast(
    tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1)), tf.float32),
                          name='accuracy')

tf_data_sets = input_data.read_data_sets('temp-mnist/data', one_hot=True)
convert = lambda data_set: hype.DataSet(
    data_set.images.reshape((-1, 28, 28, 1)), data_set.labels)
data = hype.Data(train=convert(tf_data_sets.train),
                 validation=convert(tf_data_sets.validation),
                 test=convert(tf_data_sets.test))

solver_params = {
    'batch_size': 1000,
    'eval_batch_size': 2500,
    'epochs': 10,
    'evaluate_test': True,
    'eval_flexible': False,
}
solver = hype.TensorflowSolver(data=data, **solver_params)
solver.train()
Exemple #5
0
import hyperengine as hype
from sklearn import datasets
from sklearn.model_selection import train_test_split

x_data, y_data = datasets.load_boston(return_X_y=True)
x_split, x_test, y_split, y_test = train_test_split(x_data,
                                                    y_data,
                                                    test_size=0.33,
                                                    random_state=42)
x_train, x_val, y_train, y_val = train_test_split(x_split,
                                                  y_split,
                                                  test_size=0.33,
                                                  random_state=7)

data = hype.Data(train=hype.DataSet(x_train, y_train),
                 validation=hype.DataSet(x_val, y_val),
                 test=hype.DataSet(x_test, y_test))


def dnn_model(params):
    x = tf.placeholder(shape=[None, 13], dtype=tf.float32, name='input')
    y = tf.placeholder(shape=None, dtype=tf.float32, name='label')
    mode = tf.placeholder(tf.string, name='mode')
    training = tf.equal(mode, 'train')

    weights = {
        1: tf.Variable(tf.random_normal([13, 6], stddev=0.1)),
        2: tf.Variable(tf.random_normal([6, 6], stddev=0.1)),
        'output': tf.Variable(tf.random_normal([6, 1], stddev=0.1))
    }
    biases = {
Exemple #6
0
 def __init__(self, config):
   BaseAgent.__init__(self, config)
   self.hyper_data = hyper.Data(train=self.data_loader.initialize('train'),
                                validation=self.data_loader.initialize('test'))
   self.setup_parameters()
    # tf.nce_loss automatically draws a new sample of the negative labels each
    # time we evaluate the loss.
    loss = tf.reduce_mean(tf.nn.nce_loss(nce_weights,
                                         nce_biases,
                                         labels=labels,
                                         inputs=embed,
                                         num_sampled=params.negative_samples,
                                         num_classes=params.vocab_size),
                          name='loss')
    optimizer = tf.train.AdamOptimizer(params.learning_rate)
    optimizer.minimize(loss, name='minimize')


provider = SentenceDataProvider()
provider.build()
data = hype.Data(train=provider, validation=None, test=None)

word2vec_model(params=hype.spec.new(
    vocab_size=provider.vocab_size,
    embedding_size=128,
    negative_samples=64,
    learning_rate=0.01,
))

solver_params = {
    'batch_size': 1024,
    'epochs': 5,
    'eval_flexible': False,
}
solver = hype.TensorflowSolver(data=data, **solver_params)
solver.train()