def _create_encoder(self, n_layers, dropout): """Create the encoder layers.""" prev_layer = self._features for i in range(len(self._filter_sizes)): filter_size = self._filter_sizes[i] kernel_size = self._kernel_sizes[i] if dropout > 0.0: prev_layer = layers.Dropout(dropout, in_layers=prev_layer) prev_layer = layers.Conv1D( filters=filter_size, kernel_size=kernel_size, in_layers=prev_layer, activation_fn=tf.nn.relu) prev_layer = layers.Flatten(prev_layer) prev_layer = layers.Dense( self._decoder_dimension, in_layers=prev_layer, activation_fn=tf.nn.relu) prev_layer = layers.BatchNorm(prev_layer) if self._variational: self._embedding_mean = layers.Dense( self._embedding_dimension, in_layers=prev_layer, name='embedding_mean') self._embedding_stddev = layers.Dense( self._embedding_dimension, in_layers=prev_layer, name='embedding_std') prev_layer = layers.CombineMeanStd( [self._embedding_mean, self._embedding_stddev], training_only=True) return prev_layer
def create_discriminator(self, data_inputs, conditional_inputs): discrim_in = layers.Concat(data_inputs + conditional_inputs) dense = layers.Dense(10, in_layers=discrim_in, activation_fn=tf.nn.relu) return layers.Dense(1, in_layers=dense, activation_fn=tf.sigmoid)
def _create_encoder(self, n_layers, dropout): """Create the encoder layers.""" prev_layer = self._features for i in range(n_layers): if dropout > 0.0: prev_layer = layers.Dropout(dropout, in_layers=prev_layer) prev_layer = layers.GRU( self._embedding_dimension, self.batch_size, in_layers=prev_layer) prev_layer = layers.Gather(in_layers=[prev_layer, self._gather_indices]) if self._variational: self._embedding_mean = layers.Dense( self._embedding_dimension, in_layers=prev_layer) self._embedding_stddev = layers.Dense( self._embedding_dimension, in_layers=prev_layer) prev_layer = layers.CombineMeanStd( [self._embedding_mean, self._embedding_stddev], training_only=True) return prev_layer
def _create_decoder(self, n_layers, dropout): """Create the decoder layers.""" prev_layer = layers.Dense( self._embedding_dimension, in_layers=self.embedding, activation_fn=tf.nn.relu) prev_layer = layers.Repeat(self._max_output_length, in_layers=prev_layer) for i in range(3): if dropout > 0.0: prev_layer = layers.Dropout(dropout, in_layers=prev_layer) prev_layer = layers.GRU( self._decoder_dimension, self.batch_size, in_layers=prev_layer) retval = layers.Dense( len(self._output_tokens), in_layers=prev_layer, activation_fn=tf.nn.softmax, name='output') return retval
def test_dense(self): """Test invoking Dense in eager mode.""" with context.eager_mode(): in_dim = 2 out_dim = 3 batch_size = 10 input = np.random.rand(batch_size, in_dim).astype(np.float32) layer = layers.Dense(out_dim) result = layer(input) assert result.shape == (batch_size, out_dim) assert len(layer.trainable_variables) == 2 # Creating a second layer should produce different results, since it has # different random weights. layer2 = layers.Dense(out_dim) result2 = layer2(input) assert not np.allclose(result, result2) # But evaluating the first layer again should produce the same result as before. result3 = layer(input) assert np.allclose(result, result3)
def __init__(self, seq_length, use_RNN=False, num_tasks=1, num_filters=15, kernel_size=15, pool_width=35, L1=0, dropout=0.0, verbose=True, **kwargs): super(SequenceDNN, self).__init__(**kwargs) self.num_tasks = num_tasks self.verbose = verbose self.add(layers.Conv2D(num_filters, kernel_size=kernel_size)) self.add(layers.Dropout(dropout)) self.add(layers.Flatten()) self.add(layers.Dense(self.num_tasks, activation_fn=tf.nn.relu))
def test_tensorboard(self): """Test creating an Estimator from a TensorGraph that logs information to TensorBoard.""" n_samples = 10 n_features = 3 n_tasks = 2 # Create a dataset and an input function for processing it. np.random.seed(123) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y) def input_fn(epochs): x, y, weights = dataset.make_iterator(batch_size=n_samples, epochs=epochs).get_next() return {'x': x, 'weights': weights}, y # Create a TensorGraph model. model = dc.models.TensorGraph() features = layers.Feature(shape=(None, n_features)) dense = layers.Dense(out_channels=n_tasks, in_layers=features) dense.set_summary('histogram') model.add_output(dense) labels = layers.Label(shape=(None, n_tasks)) loss = layers.ReduceMean(layers.L2Loss(in_layers=[labels, dense])) model.set_loss(loss) # Create an estimator from it. x_col = tf.feature_column.numeric_column('x', shape=(n_features, )) estimator = model.make_estimator(feature_columns=[x_col]) # Train the model. estimator.train(input_fn=lambda: input_fn(100))
def test_sequential(self): """Test creating an Estimator from a Sequential model.""" n_samples = 20 n_features = 2 # Create a dataset and an input function for processing it. X = np.random.rand(n_samples, n_features) y = np.array([[0.5] for x in range(n_samples)]) dataset = dc.data.NumpyDataset(X, y) def input_fn(epochs): x, y, weights = dataset.make_iterator(batch_size=n_samples, epochs=epochs).get_next() return {'x': x}, y # Create the model. model = dc.models.Sequential(loss="mse", learning_rate=0.01) model.add(layers.Dense(out_channels=1)) # Create an estimator from it. x_col = tf.feature_column.numeric_column('x', shape=(n_features, )) metrics = {'error': tf.metrics.mean_absolute_error} estimator = model.make_estimator(feature_columns=[x_col], metrics=metrics) # Train the model. estimator.train(input_fn=lambda: input_fn(1000)) # Evaluate the model. results = estimator.evaluate(input_fn=lambda: input_fn(1)) assert results['loss'] < 1e-2 assert results['error'] < 0.1
# Build the model. model = dc.models.TensorGraph(model_dir='rnai') features = layers.Feature(shape=(None, 21, 4)) labels = layers.Label(shape=(None, 1)) prev = features for i in range(2): prev = layers.Conv1D(filters=10, kernel_size=10, activation=tf.nn.relu, padding='same', in_layers=prev) prev = layers.Dropout(dropout_prob=0.3, in_layers=prev) output = layers.Dense(out_channels=1, activation_fn=tf.sigmoid, in_layers=layers.Flatten(prev)) model.add_output(output) loss = layers.ReduceMean(layers.L2Loss(in_layers=[labels, output])) model.set_loss(loss) # Load the data. train = dc.data.DiskDataset('train_siRNA') valid = dc.data.DiskDataset('valid_siRNA') # Train the model, tracking its performance on the training and validation datasets. metric = dc.metrics.Metric(dc.metrics.pearsonr, mode='regression') for i in range(20): model.fit(train, nb_epoch=10)
def create_generator(self, noise_input, conditional_inputs): gen_in = layers.Concat([noise_input] + conditional_inputs) return [layers.Dense(1, in_layers=gen_in)]
import tensorflow as tf import numpy as np # Build the model. model = dc.models.TensorGraph(batch_size=1000, model_dir='chromatin') features = layers.Feature(shape=(None, 101, 4)) accessibility = layers.Feature(shape=(None, 1)) labels = layers.Label(shape=(None, 1)) weights = layers.Weights(shape=(None, 1)) prev = features for i in range(3): prev = layers.Conv1D(filters=15, kernel_size=10, activation=tf.nn.relu, padding='same', in_layers=prev) prev = layers.Dropout(dropout_prob=0.5, in_layers=prev) prev = layers.Concat([layers.Flatten(prev), accessibility]) logits = layers.Dense(out_channels=1, in_layers=prev) output = layers.Sigmoid(logits) model.add_output(output) loss = layers.SigmoidCrossEntropy(in_layers=[labels, logits]) weighted_loss = layers.WeightedError(in_layers=[loss, weights]) model.set_loss(weighted_loss) # Load the data. train = dc.data.DiskDataset('train_dataset') valid = dc.data.DiskDataset('valid_dataset') span_accessibility = {} for line in open('accessibility.txt'): fields = line.split() span_accessibility[fields[0]] = float(fields[1])
feature = layers.Feature(shape=(None, 784)) # 0..9 digits label = layers.Label(shape=(None, 10)) # Reshape flattened layer to matrix to use it with convolution make_image = layers.Reshape(shape=(None, 28, 28), in_layers=feature) conv2d_1 = layers.Conv2D(num_outputs=32, activation_fn=tf.nn.relu, in_layers=make_image) conv2d_2 = layers.Conv2D(num_outputs=64, activation_fn=tf.nn.relu, in_layers=conv2d_1) flatten = layers.Flatten(in_layers=conv2d_2) dense1 = layers.Dense(out_channels=1024, activation_fn=tf.nn.relu, in_layers=flatten) dense2 = layers.Dense(out_channels=10, activation_fn=None, in_layers=dense1) # Computes the loss for every sample smce = layers.SoftMaxCrossEntropy(in_layers=[label, dense2]) # Average all the losses loss = layers.ReduceMean(in_layers=smce) model.set_loss(loss) # Convert the output from logits to probs output = layers.SoftMax(in_layers=dense2) model.add_output(output) model.fit(train_dataset, nb_epoch=1) # nb_epoch=10
# Build the model. model = dc.models.TensorGraph(batch_size=1000, model_dir='tf') features = layers.Feature(shape=(None, 101, 4)) labels = layers.Label(shape=(None, 1)) weights = layers.Weights(shape=(None, 1)) prev = features for i in range(3): prev = layers.Conv1D(filters=15, kernel_size=10, activation=tf.nn.relu, padding='same', in_layers=prev) prev = layers.Dropout(dropout_prob=0.5, in_layers=prev) logits = layers.Dense(out_channels=1, in_layers=layers.Flatten(prev)) output = layers.Sigmoid(logits) model.add_output(output) loss = layers.SigmoidCrossEntropy(in_layers=[labels, logits]) weighted_loss = layers.WeightedError(in_layers=[loss, weights]) model.set_loss(weighted_loss) # Load the data. train = dc.data.DiskDataset('train_dataset') valid = dc.data.DiskDataset('valid_dataset') # Train the model, tracking its performance on the training and validation datasets. metric = dc.metrics.Metric(dc.metrics.roc_auc_score) for i in range(20):
files.append(os.path.join(image_dir, f)) labels.append(int(re.findall('_C(.*?)_', f)[0])) loader = dc.data.ImageLoader() dataset = loader.featurize(files, np.array(labels)) splitter = dc.splits.RandomSplitter() train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(dataset, seed=123) # Create the model. learning_rate = dc.models.tensorgraph.optimizers.ExponentialDecay(0.001, 0.9, 250) model = dc.models.TensorGraph(learning_rate=learning_rate, model_dir='models/model') features = layers.Feature(shape=(None, 520, 696)) labels = layers.Label(shape=(None,)) prev_layer = features for num_outputs in [16, 32, 64, 128, 256]: prev_layer = layers.Conv2D(num_outputs, kernel_size=5, stride=2, in_layers=prev_layer) output = layers.Dense(1, in_layers=layers.Flatten(prev_layer)) model.add_output(output) loss = layers.ReduceSum(layers.L2Loss(in_layers=(output, labels))) model.set_loss(loss) if not os.path.exists('./models'): os.mkdir('models') if not os.path.exists('./models/model'): os.mkdir('models/model') if not RETRAIN: model.restore() # Train it and evaluate performance on the test set. if RETRAIN: print("About to fit model for 50 epochs")
def create_model(): """ Create our own MNIST model from scratch :return: :rtype: """ mnist = input_data.read_data_sets("MNIST_DATA/", one_hot=True) # the layers from deepchem are the building blocks of what we will use to make our deep learning architecture # now we wrap our dataset into a NumpyDataset train_dataset = dc.data.NumpyDataset(mnist.train.images, mnist.train.labels) test_dataset = dc.data.NumpyDataset(mnist.test.images, mnist.test.labels) # we will create a model that will take an input, add multiple layers, where each layer takes input from the # previous layers. model = dc.models.TensorGraph(model_dir='mnist') # 784 corresponds to an image of size 28 X 28 # 10 corresponds to the fact that there are 10 possible digits (0-9) # the None indicates that we can accept any size input (e.g. an empty array or 500 items each with 784 features) # our data is also categorical so we must one hot encode, set single array element to 1 and the rest to 0 feature = layers.Feature(shape=(None, 784)) labels = layers.Label(shape=(None, 10)) # in order to apply convolutional layers to our input, we convert flat vector of 785 to 28X28 # in_layers means it takes our feature layer as an input make_image = layers.Reshape(shape=(None, 28, 28), in_layers=feature) # now that we have reshaped the input, we pass to convolution layers conv2d_1 = layers.Conv2D(num_outputs=32, activation_fn=tf.nn.relu, in_layers=make_image) conv2d_2 = layers.Conv2D(num_outputs=64, activation_fn=tf.nn.relu, in_layers=conv2d_1) # we want to end by applying fully connected (Dense) layers to the outputs of our convolutional layer # but first, we must flatten the layer from a 2d matrix to a 1d vector flatten = layers.Flatten(in_layers=conv2d_2) dense1 = layers.Dense(out_channels=1024, activation_fn=tf.nn.relu, in_layers=flatten) # note that this is final layer so out_channels of 10 represents the 10 outputs and no activation_fn dense2 = layers.Dense(out_channels=10, activation_fn=None, in_layers=dense1) # next we want to connect this output to a loss function, so we can train the output # compute the value of loss function for every sample then average of all samples to get final loss (ReduceMean) smce = layers.SoftMaxCrossEntropy(in_layers=[labels, dense2]) loss = layers.ReduceMean(in_layers=smce) model.set_loss(loss) # for MNIST we want the probability that a given sample represents one of the 10 digits # we can achieve this using a softmax function to get the probabilities, then cross entropy to get the labels output = layers.SoftMax(in_layers=dense2) model.add_output(output) # if our model takes long to train, reduce nb_epoch to 1 model.fit(train_dataset, nb_epoch=10) # our metric is accuracy, the fraction of labels that are accurately predicted metric = dc.metrics.Metric(dc.metrics.accuracy_score) train_scores = model.evaluate(train_dataset, [metric]) test_scores = model.evaluate(test_dataset, [metric]) print('train_scores', train_scores) print('test_scores', test_scores)