Exemple #1
0
 def _create_encoder(self, n_layers, dropout):
   """Create the encoder layers."""
   prev_layer = self._features
   for i in range(len(self._filter_sizes)):
     filter_size = self._filter_sizes[i]
     kernel_size = self._kernel_sizes[i]
     if dropout > 0.0:
       prev_layer = layers.Dropout(dropout, in_layers=prev_layer)
     prev_layer = layers.Conv1D(
         filters=filter_size,
         kernel_size=kernel_size,
         in_layers=prev_layer,
         activation_fn=tf.nn.relu)
   prev_layer = layers.Flatten(prev_layer)
   prev_layer = layers.Dense(
       self._decoder_dimension, in_layers=prev_layer, activation_fn=tf.nn.relu)
   prev_layer = layers.BatchNorm(prev_layer)
   if self._variational:
     self._embedding_mean = layers.Dense(
         self._embedding_dimension,
         in_layers=prev_layer,
         name='embedding_mean')
     self._embedding_stddev = layers.Dense(
         self._embedding_dimension, in_layers=prev_layer, name='embedding_std')
     prev_layer = layers.CombineMeanStd(
         [self._embedding_mean, self._embedding_stddev], training_only=True)
   return prev_layer
Exemple #2
0
 def create_discriminator(self, data_inputs, conditional_inputs):
     discrim_in = layers.Concat(data_inputs + conditional_inputs)
     dense = layers.Dense(10,
                          in_layers=discrim_in,
                          activation_fn=tf.nn.relu)
     return layers.Dense(1,
                         in_layers=dense,
                         activation_fn=tf.sigmoid)
Exemple #3
0
 def _create_encoder(self, n_layers, dropout):
   """Create the encoder layers."""
   prev_layer = self._features
   for i in range(n_layers):
     if dropout > 0.0:
       prev_layer = layers.Dropout(dropout, in_layers=prev_layer)
     prev_layer = layers.GRU(
         self._embedding_dimension, self.batch_size, in_layers=prev_layer)
   prev_layer = layers.Gather(in_layers=[prev_layer, self._gather_indices])
   if self._variational:
     self._embedding_mean = layers.Dense(
         self._embedding_dimension, in_layers=prev_layer)
     self._embedding_stddev = layers.Dense(
         self._embedding_dimension, in_layers=prev_layer)
     prev_layer = layers.CombineMeanStd(
         [self._embedding_mean, self._embedding_stddev], training_only=True)
   return prev_layer
Exemple #4
0
 def _create_decoder(self, n_layers, dropout):
   """Create the decoder layers."""
   prev_layer = layers.Dense(
       self._embedding_dimension,
       in_layers=self.embedding,
       activation_fn=tf.nn.relu)
   prev_layer = layers.Repeat(self._max_output_length, in_layers=prev_layer)
   for i in range(3):
     if dropout > 0.0:
       prev_layer = layers.Dropout(dropout, in_layers=prev_layer)
     prev_layer = layers.GRU(
         self._decoder_dimension, self.batch_size, in_layers=prev_layer)
   retval = layers.Dense(
       len(self._output_tokens),
       in_layers=prev_layer,
       activation_fn=tf.nn.softmax,
       name='output')
   return retval
Exemple #5
0
    def test_dense(self):
        """Test invoking Dense in eager mode."""
        with context.eager_mode():
            in_dim = 2
            out_dim = 3
            batch_size = 10
            input = np.random.rand(batch_size, in_dim).astype(np.float32)
            layer = layers.Dense(out_dim)
            result = layer(input)
            assert result.shape == (batch_size, out_dim)
            assert len(layer.trainable_variables) == 2

            # Creating a second layer should produce different results, since it has
            # different random weights.

            layer2 = layers.Dense(out_dim)
            result2 = layer2(input)
            assert not np.allclose(result, result2)

            # But evaluating the first layer again should produce the same result as before.

            result3 = layer(input)
            assert np.allclose(result, result3)
 def __init__(self,
              seq_length,
              use_RNN=False,
              num_tasks=1,
              num_filters=15,
              kernel_size=15,
              pool_width=35,
              L1=0,
              dropout=0.0,
              verbose=True,
              **kwargs):
     super(SequenceDNN, self).__init__(**kwargs)
     self.num_tasks = num_tasks
     self.verbose = verbose
     self.add(layers.Conv2D(num_filters, kernel_size=kernel_size))
     self.add(layers.Dropout(dropout))
     self.add(layers.Flatten())
     self.add(layers.Dense(self.num_tasks, activation_fn=tf.nn.relu))
Exemple #7
0
    def test_tensorboard(self):
        """Test creating an Estimator from a TensorGraph that logs information to TensorBoard."""
        n_samples = 10
        n_features = 3
        n_tasks = 2

        # Create a dataset and an input function for processing it.

        np.random.seed(123)
        X = np.random.rand(n_samples, n_features)
        y = np.zeros((n_samples, n_tasks))
        dataset = dc.data.NumpyDataset(X, y)

        def input_fn(epochs):
            x, y, weights = dataset.make_iterator(batch_size=n_samples,
                                                  epochs=epochs).get_next()
            return {'x': x, 'weights': weights}, y

        # Create a TensorGraph model.

        model = dc.models.TensorGraph()
        features = layers.Feature(shape=(None, n_features))
        dense = layers.Dense(out_channels=n_tasks, in_layers=features)
        dense.set_summary('histogram')
        model.add_output(dense)
        labels = layers.Label(shape=(None, n_tasks))
        loss = layers.ReduceMean(layers.L2Loss(in_layers=[labels, dense]))
        model.set_loss(loss)

        # Create an estimator from it.

        x_col = tf.feature_column.numeric_column('x', shape=(n_features, ))
        estimator = model.make_estimator(feature_columns=[x_col])

        # Train the model.

        estimator.train(input_fn=lambda: input_fn(100))
Exemple #8
0
    def test_sequential(self):
        """Test creating an Estimator from a Sequential model."""
        n_samples = 20
        n_features = 2

        # Create a dataset and an input function for processing it.

        X = np.random.rand(n_samples, n_features)
        y = np.array([[0.5] for x in range(n_samples)])
        dataset = dc.data.NumpyDataset(X, y)

        def input_fn(epochs):
            x, y, weights = dataset.make_iterator(batch_size=n_samples,
                                                  epochs=epochs).get_next()
            return {'x': x}, y

        # Create the model.

        model = dc.models.Sequential(loss="mse", learning_rate=0.01)
        model.add(layers.Dense(out_channels=1))

        # Create an estimator from it.

        x_col = tf.feature_column.numeric_column('x', shape=(n_features, ))
        metrics = {'error': tf.metrics.mean_absolute_error}
        estimator = model.make_estimator(feature_columns=[x_col],
                                         metrics=metrics)

        # Train the model.

        estimator.train(input_fn=lambda: input_fn(1000))

        # Evaluate the model.

        results = estimator.evaluate(input_fn=lambda: input_fn(1))
        assert results['loss'] < 1e-2
        assert results['error'] < 0.1
# Build the model.

model = dc.models.TensorGraph(model_dir='rnai')
features = layers.Feature(shape=(None, 21, 4))
labels = layers.Label(shape=(None, 1))
prev = features
for i in range(2):
    prev = layers.Conv1D(filters=10,
                         kernel_size=10,
                         activation=tf.nn.relu,
                         padding='same',
                         in_layers=prev)
    prev = layers.Dropout(dropout_prob=0.3, in_layers=prev)
output = layers.Dense(out_channels=1,
                      activation_fn=tf.sigmoid,
                      in_layers=layers.Flatten(prev))
model.add_output(output)
loss = layers.ReduceMean(layers.L2Loss(in_layers=[labels, output]))
model.set_loss(loss)

# Load the data.

train = dc.data.DiskDataset('train_siRNA')
valid = dc.data.DiskDataset('valid_siRNA')

# Train the model, tracking its performance on the training and validation datasets.

metric = dc.metrics.Metric(dc.metrics.pearsonr, mode='regression')
for i in range(20):
    model.fit(train, nb_epoch=10)
Exemple #10
0
 def create_generator(self, noise_input, conditional_inputs):
   gen_in = layers.Concat([noise_input] + conditional_inputs)
   return [layers.Dense(1, in_layers=gen_in)]
Exemple #11
0
import tensorflow as tf
import numpy as np

# Build the model.

model = dc.models.TensorGraph(batch_size=1000, model_dir='chromatin')
features = layers.Feature(shape=(None, 101, 4))
accessibility = layers.Feature(shape=(None, 1))
labels = layers.Label(shape=(None, 1))
weights = layers.Weights(shape=(None, 1))
prev = features
for i in range(3):
    prev = layers.Conv1D(filters=15, kernel_size=10, activation=tf.nn.relu, padding='same', in_layers=prev)
    prev = layers.Dropout(dropout_prob=0.5, in_layers=prev)
prev = layers.Concat([layers.Flatten(prev), accessibility])
logits = layers.Dense(out_channels=1, in_layers=prev)
output = layers.Sigmoid(logits)
model.add_output(output)
loss = layers.SigmoidCrossEntropy(in_layers=[labels, logits])
weighted_loss = layers.WeightedError(in_layers=[loss, weights])
model.set_loss(weighted_loss)

# Load the data.

train = dc.data.DiskDataset('train_dataset')
valid = dc.data.DiskDataset('valid_dataset')
span_accessibility = {}
for line in open('accessibility.txt'):
    fields = line.split()
    span_accessibility[fields[0]] = float(fields[1])
feature = layers.Feature(shape=(None, 784))
# 0..9 digits
label = layers.Label(shape=(None, 10))
# Reshape flattened layer to matrix to use it with convolution
make_image = layers.Reshape(shape=(None, 28, 28), in_layers=feature)

conv2d_1 = layers.Conv2D(num_outputs=32,
                         activation_fn=tf.nn.relu,
                         in_layers=make_image)
conv2d_2 = layers.Conv2D(num_outputs=64,
                         activation_fn=tf.nn.relu,
                         in_layers=conv2d_1)

flatten = layers.Flatten(in_layers=conv2d_2)
dense1 = layers.Dense(out_channels=1024,
                      activation_fn=tf.nn.relu,
                      in_layers=flatten)
dense2 = layers.Dense(out_channels=10, activation_fn=None, in_layers=dense1)

# Computes the loss for every sample
smce = layers.SoftMaxCrossEntropy(in_layers=[label, dense2])
# Average all the losses
loss = layers.ReduceMean(in_layers=smce)
model.set_loss(loss)

# Convert the output from logits to probs
output = layers.SoftMax(in_layers=dense2)
model.add_output(output)

model.fit(train_dataset, nb_epoch=1)  # nb_epoch=10
# Build the model.

model = dc.models.TensorGraph(batch_size=1000, model_dir='tf')
features = layers.Feature(shape=(None, 101, 4))
labels = layers.Label(shape=(None, 1))
weights = layers.Weights(shape=(None, 1))
prev = features
for i in range(3):
    prev = layers.Conv1D(filters=15,
                         kernel_size=10,
                         activation=tf.nn.relu,
                         padding='same',
                         in_layers=prev)
    prev = layers.Dropout(dropout_prob=0.5, in_layers=prev)
logits = layers.Dense(out_channels=1, in_layers=layers.Flatten(prev))
output = layers.Sigmoid(logits)
model.add_output(output)
loss = layers.SigmoidCrossEntropy(in_layers=[labels, logits])
weighted_loss = layers.WeightedError(in_layers=[loss, weights])
model.set_loss(weighted_loss)

# Load the data.

train = dc.data.DiskDataset('train_dataset')
valid = dc.data.DiskDataset('valid_dataset')

# Train the model, tracking its performance on the training and validation datasets.

metric = dc.metrics.Metric(dc.metrics.roc_auc_score)
for i in range(20):
    files.append(os.path.join(image_dir, f))
    labels.append(int(re.findall('_C(.*?)_', f)[0]))
loader = dc.data.ImageLoader()
dataset = loader.featurize(files, np.array(labels))
splitter = dc.splits.RandomSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset, seed=123)

# Create the model.
learning_rate = dc.models.tensorgraph.optimizers.ExponentialDecay(0.001, 0.9, 250)
model = dc.models.TensorGraph(learning_rate=learning_rate, model_dir='models/model')
features = layers.Feature(shape=(None, 520, 696))
labels = layers.Label(shape=(None,))
prev_layer = features
for num_outputs in [16, 32, 64, 128, 256]:
  prev_layer = layers.Conv2D(num_outputs, kernel_size=5, stride=2, in_layers=prev_layer)
output = layers.Dense(1, in_layers=layers.Flatten(prev_layer))
model.add_output(output)
loss = layers.ReduceSum(layers.L2Loss(in_layers=(output, labels)))
model.set_loss(loss)

if not os.path.exists('./models'):
  os.mkdir('models')
if not os.path.exists('./models/model'):
  os.mkdir('models/model')

if not RETRAIN:
  model.restore()

# Train it and evaluate performance on the test set.
if RETRAIN:
  print("About to fit model for 50 epochs")
def create_model():
    """
    Create our own MNIST model from scratch
    :return:
    :rtype:
    """
    mnist = input_data.read_data_sets("MNIST_DATA/", one_hot=True)

    # the layers from deepchem are the building blocks of what we will use to make our deep learning architecture

    # now we wrap our dataset into a NumpyDataset

    train_dataset = dc.data.NumpyDataset(mnist.train.images,
                                         mnist.train.labels)
    test_dataset = dc.data.NumpyDataset(mnist.test.images, mnist.test.labels)

    # we will create a model that will take an input, add multiple layers, where each layer takes input from the
    # previous layers.

    model = dc.models.TensorGraph(model_dir='mnist')

    # 784 corresponds to an image of size 28 X 28
    # 10 corresponds to the fact that there are 10 possible digits (0-9)
    # the None indicates that we can accept any size input (e.g. an empty array or 500 items each with 784 features)
    # our data is also categorical so we must one hot encode, set single array element to 1 and the rest to 0
    feature = layers.Feature(shape=(None, 784))
    labels = layers.Label(shape=(None, 10))

    # in order to apply convolutional layers to our input, we convert flat vector of 785 to 28X28
    # in_layers means it takes our feature layer as an input
    make_image = layers.Reshape(shape=(None, 28, 28), in_layers=feature)

    # now that we have reshaped the input, we pass to convolution layers

    conv2d_1 = layers.Conv2D(num_outputs=32,
                             activation_fn=tf.nn.relu,
                             in_layers=make_image)

    conv2d_2 = layers.Conv2D(num_outputs=64,
                             activation_fn=tf.nn.relu,
                             in_layers=conv2d_1)

    # we want to end by applying fully connected (Dense) layers to the outputs of our convolutional layer
    # but first, we must flatten the layer from a 2d matrix to a 1d vector

    flatten = layers.Flatten(in_layers=conv2d_2)
    dense1 = layers.Dense(out_channels=1024,
                          activation_fn=tf.nn.relu,
                          in_layers=flatten)

    # note that this is final layer so out_channels of 10 represents the 10 outputs and no activation_fn
    dense2 = layers.Dense(out_channels=10,
                          activation_fn=None,
                          in_layers=dense1)

    # next we want to connect this output to a loss function, so we can train the output

    # compute the value of loss function for every sample then average of all samples to get final loss (ReduceMean)
    smce = layers.SoftMaxCrossEntropy(in_layers=[labels, dense2])
    loss = layers.ReduceMean(in_layers=smce)
    model.set_loss(loss)

    # for MNIST we want the probability that a given sample represents one of the 10 digits
    # we can achieve this using a softmax function to get the probabilities, then cross entropy to get the labels

    output = layers.SoftMax(in_layers=dense2)
    model.add_output(output)

    # if our model takes long to train, reduce nb_epoch to 1
    model.fit(train_dataset, nb_epoch=10)

    # our metric is accuracy, the fraction of labels that are accurately predicted
    metric = dc.metrics.Metric(dc.metrics.accuracy_score)

    train_scores = model.evaluate(train_dataset, [metric])
    test_scores = model.evaluate(test_dataset, [metric])

    print('train_scores', train_scores)
    print('test_scores', test_scores)