def _build_graph(self): smile_images = Input(shape=self.input_shape) stem = chemnet_layers.Stem(self.base_filters)(smile_images) inceptionA_out = self.build_inception_module(inputs=stem, type="A") reductionA_out = chemnet_layers.ReductionA( self.base_filters)(inceptionA_out) inceptionB_out = self.build_inception_module( inputs=reductionA_out, type="B") reductionB_out = chemnet_layers.ReductionB( self.base_filters)(inceptionB_out) inceptionC_out = self.build_inception_module( inputs=reductionB_out, type="C") avg_pooling_out = GlobalAveragePooling2D()(inceptionC_out) if self.mode == "classification": logits = Dense(self.n_tasks * 2)(avg_pooling_out) logits = Reshape((self.n_tasks, 2))(logits) output = Softmax()(logits) outputs = [output, logits] output_types = ['prediction', 'loss'] loss = SoftmaxCrossEntropy() else: output = Dense(self.n_tasks * 1)(avg_pooling_out) output = Reshape((self.n_tasks, 1))(output) outputs = [output] output_types = ['prediction'] loss = L2Loss() model = tf.keras.Model(inputs=[smile_images], outputs=outputs) return model, loss, output_types
def _build_graph(self): """Build the model.""" smiles_seqs = Input(dtype=tf.int32, shape=(self.max_seq_len, ), name='Input') rnn_input = tf.keras.layers.Embedding( input_dim=len(self.char_to_idx), output_dim=self.embedding_dim)(smiles_seqs) if self.use_conv: rnn_input = Conv1D(filters=self.filters, kernel_size=self.kernel_size, strides=self.strides, activation=tf.nn.relu, name='Conv1D')(rnn_input) rnn_embeddings = rnn_input for idx, rnn_type in enumerate(self.rnn_types[:-1]): rnn_layer = RNN_DICT[rnn_type] layer = rnn_layer(units=self.rnn_sizes[idx], return_sequences=True) if self.use_bidir: layer = Bidirectional(layer) rnn_embeddings = layer(rnn_embeddings) # Last layer sequences not returned. layer = RNN_DICT[self.rnn_types[-1]](units=self.rnn_sizes[-1]) if self.use_bidir: layer = Bidirectional(layer) rnn_embeddings = layer(rnn_embeddings) if self.mode == "classification": logits = Dense(self.n_tasks * self.n_classes)(rnn_embeddings) logits = Reshape((self.n_tasks, self.n_classes))(logits) if self.n_classes == 2: output = Activation(activation='sigmoid')(logits) loss = SigmoidCrossEntropy() else: output = Softmax()(logits) loss = SoftmaxCrossEntropy() outputs = [output, logits] output_types = ['prediction', 'loss'] else: output = Dense(self.n_tasks * 1, name='Dense')(rnn_embeddings) output = Reshape((self.n_tasks, 1), name='Reshape')(output) outputs = [output] output_types = ['prediction'] loss = L2Loss() model = tf.keras.Model(inputs=[smiles_seqs], outputs=outputs) return model, loss, output_types
def __init__(self, n_tasks, n_features, layer_sizes=[1000], weight_init_stddevs=0.02, bias_init_consts=1.0, weight_decay_penalty=0.0, weight_decay_penalty_type="l2", dropouts=0.5, activation_fns=tf.nn.relu, n_classes=2, bypass_layer_sizes=[100], bypass_weight_init_stddevs=[.02], bypass_bias_init_consts=[1.], bypass_dropouts=[.5], **kwargs): """ Create a RobustMultitaskClassifier. Parameters ---------- n_tasks: int number of tasks n_features: int number of features layer_sizes: list the size of each dense layer in the network. The length of this list determines the number of layers. weight_init_stddevs: list or float the standard deviation of the distribution to use for weight initialization of each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. bias_init_consts: list or loat the value to initialize the biases in each layer to. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. weight_decay_penalty: float the magnitude of the weight decay penalty to use weight_decay_penalty_type: str the type of penalty to use for weight decay, either 'l1' or 'l2' dropouts: list or float the dropout probablity to use for each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. activation_fns: list or object the Tensorflow activation function to apply to each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. n_classes: int the number of classes bypass_layer_sizes: list the size of each dense layer in the bypass network. The length of this list determines the number of bypass layers. bypass_weight_init_stddevs: list or float the standard deviation of the distribution to use for weight initialization of bypass layers. same requirements as weight_init_stddevs bypass_bias_init_consts: list or float the value to initialize the biases in bypass layers same requirements as bias_init_consts bypass_dropouts: list or float the dropout probablity to use for bypass layers. same requirements as dropouts """ self.n_tasks = n_tasks self.n_features = n_features self.n_classes = n_classes n_layers = len(layer_sizes) if not isinstance(weight_init_stddevs, collections.Sequence): weight_init_stddevs = [weight_init_stddevs] * n_layers if not isinstance(bias_init_consts, collections.Sequence): bias_init_consts = [bias_init_consts] * n_layers if not isinstance(dropouts, collections.Sequence): dropouts = [dropouts] * n_layers if not isinstance(activation_fns, collections.Sequence): activation_fns = [activation_fns] * n_layers if weight_decay_penalty != 0.0: if weight_decay_penalty_type == 'l1': regularizer = tf.keras.regularizers.l1(weight_decay_penalty) else: regularizer = tf.keras.regularizers.l2(weight_decay_penalty) else: regularizer = None n_bypass_layers = len(bypass_layer_sizes) if not isinstance(bypass_weight_init_stddevs, collections.Sequence): bypass_weight_init_stddevs = [bypass_weight_init_stddevs ] * n_bypass_layers if not isinstance(bypass_bias_init_consts, collections.Sequence): bypass_bias_init_consts = [bypass_bias_init_consts ] * n_bypass_layers if not isinstance(bypass_dropouts, collections.Sequence): bypass_dropouts = [bypass_dropouts] * n_bypass_layers bypass_activation_fns = [activation_fns[0]] * n_bypass_layers # Add the input features. mol_features = tf.keras.Input(shape=(n_features, )) prev_layer = mol_features # Add the shared dense layers for size, weight_stddev, bias_const, dropout, activation_fn in zip( layer_sizes, weight_init_stddevs, bias_init_consts, dropouts, activation_fns): layer = tf.keras.layers.Dense( size, activation=activation_fn, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=weight_stddev), bias_initializer=tf.constant_initializer(value=bias_const), kernel_regularizer=regularizer)(prev_layer) if dropout > 0.0: layer = tf.keras.layers.Dropout(rate=dropout)(layer) prev_layer = layer top_multitask_layer = prev_layer task_outputs = [] for i in range(self.n_tasks): prev_layer = mol_features # Add task-specific bypass layers for size, weight_stddev, bias_const, dropout, activation_fn in zip( bypass_layer_sizes, bypass_weight_init_stddevs, bypass_bias_init_consts, bypass_dropouts, bypass_activation_fns): layer = tf.keras.layers.Dense( size, activation=activation_fn, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=weight_stddev), bias_initializer=tf.constant_initializer(value=bias_const), kernel_regularizer=regularizer)(prev_layer) if dropout > 0.0: layer = tf.keras.layers.Dropout(rate=dropout)(layer) prev_layer = layer top_bypass_layer = prev_layer if n_bypass_layers > 0: task_layer = tf.keras.layers.Concatenate(axis=1)( [top_multitask_layer, top_bypass_layer]) else: task_layer = top_multitask_layer task_out = tf.keras.layers.Dense(n_classes)(task_layer) task_outputs.append(task_out) logits = Stack(axis=1)(task_outputs) output = tf.keras.layers.Softmax()(logits) model = tf.keras.Model(inputs=mol_features, outputs=[output, logits]) super(RobustMultitaskClassifier, self).__init__(model, SoftmaxCrossEntropy(), output_types=['prediction', 'loss'], **kwargs)
def __init__(self, n_tasks, n_atom_feat=70, n_pair_feat=8, n_hidden=100, T=5, M=10, mode="regression", dropout=0.0, n_classes=2, uncertainty=False, batch_size=100, **kwargs): """ Parameters ---------- n_tasks: int Number of tasks n_atom_feat: int, optional Number of features per atom. n_pair_feat: int, optional Number of features per pair of atoms. n_hidden: int, optional Number of units(convolution depths) in corresponding hidden layer n_graph_feat: int, optional Number of output features for each molecule(graph) dropout: float the dropout probablity to use. n_classes: int the number of classes to predict (only used in classification mode) uncertainty: bool if True, include extra outputs and loss terms to enable the uncertainty in outputs to be predicted """ if mode not in ['classification', 'regression']: raise ValueError("mode must be either 'classification' or 'regression'") self.n_tasks = n_tasks self.n_atom_feat = n_atom_feat self.n_pair_feat = n_pair_feat self.n_hidden = n_hidden self.T = T self.M = M self.mode = mode self.n_classes = n_classes self.uncertainty = uncertainty if uncertainty: if mode != "regression": raise ValueError("Uncertainty is only supported in regression mode") if dropout == 0.0: raise ValueError('Dropout must be included to predict uncertainty') # Build the model. atom_features = Input(shape=(self.n_atom_feat,)) pair_features = Input(shape=(self.n_pair_feat,)) atom_split = Input(shape=tuple(), dtype=tf.int32) atom_to_pair = Input(shape=(2,), dtype=tf.int32) n_samples = Input(shape=tuple(), dtype=tf.int32) message_passing = layers.MessagePassing( self.T, message_fn='enn', update_fn='gru', n_hidden=self.n_hidden)([atom_features, pair_features, atom_to_pair]) atom_embeddings = Dense(self.n_hidden)(message_passing) mol_embeddings = layers.SetGather( self.M, batch_size, n_hidden=self.n_hidden)([atom_embeddings, atom_split]) dense1 = Dense(2 * self.n_hidden, activation=tf.nn.relu)(mol_embeddings) n_tasks = self.n_tasks if self.mode == 'classification': n_classes = self.n_classes logits = Reshape((n_tasks, n_classes))(Dense(n_tasks * n_classes)(dense1)) logits = TrimGraphOutput()([logits, n_samples]) output = Softmax()(logits) outputs = [output, logits] output_types = ['prediction', 'loss'] loss = SoftmaxCrossEntropy() else: output = Dense(n_tasks)(dense1) output = TrimGraphOutput()([output, n_samples]) if self.uncertainty: log_var = Dense(n_tasks)(dense1) log_var = TrimGraphOutput()([log_var, n_samples]) var = Activation(tf.exp)(log_var) outputs = [output, var, output, log_var] output_types = ['prediction', 'variance', 'loss', 'loss'] def loss(outputs, labels, weights): diff = labels[0] - outputs[0] return tf.reduce_mean(diff * diff / tf.exp(outputs[1]) + outputs[1]) else: outputs = [output] output_types = ['prediction'] loss = L2Loss() model = tf.keras.Model( inputs=[ atom_features, pair_features, atom_split, atom_to_pair, n_samples ], outputs=outputs) super(MPNNModel, self).__init__( model, loss, output_types=output_types, batch_size=batch_size, **kwargs)
def __init__(self, n_tasks: int, graph_conv_layers: List[int] = [64, 64], dense_layer_size: int = 128, dropout: float = 0.0, mode: str = "classification", number_atom_features: int = 75, n_classes: int = 2, batch_size: int = 100, batch_normalize: bool = True, uncertainty: bool = False, **kwargs): """The wrapper class for graph convolutions. Note that since the underlying _GraphConvKerasModel class is specified using imperative subclassing style, this model cannout make predictions for arbitrary outputs. Parameters ---------- n_tasks: int Number of tasks graph_conv_layers: list of int Width of channels for the Graph Convolution Layers dense_layer_size: int Width of channels for Atom Level Dense Layer before GraphPool dropout: list or float the dropout probablity to use for each layer. The length of this list should equal len(graph_conv_layers)+1 (one value for each convolution layer, and one for the dense layer). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. mode: str Either "classification" or "regression" number_atom_features: int 75 is the default number of atom features created, but this can vary if various options are passed to the function atom_features in graph_features n_classes: int the number of classes to predict (only used in classification mode) batch_normalize: True if True, apply batch normalization to model uncertainty: bool if True, include extra outputs and loss terms to enable the uncertainty in outputs to be predicted """ self.mode = mode self.n_tasks = n_tasks self.n_classes = n_classes self.batch_size = batch_size self.uncertainty = uncertainty model = _GraphConvKerasModel( n_tasks, graph_conv_layers=graph_conv_layers, dense_layer_size=dense_layer_size, dropout=dropout, mode=mode, number_atom_features=number_atom_features, n_classes=n_classes, batch_normalize=batch_normalize, uncertainty=uncertainty, batch_size=batch_size) if mode == "classification": output_types = ['prediction', 'loss', 'embedding'] loss: Union[Loss, LossFn] = SoftmaxCrossEntropy() else: if self.uncertainty: output_types = ['prediction', 'variance', 'loss', 'loss', 'embedding'] def loss(outputs, labels, weights): diff = labels[0] - outputs[0] return tf.reduce_mean(diff * diff / tf.exp(outputs[1]) + outputs[1]) else: output_types = ['prediction', 'embedding'] loss = L2Loss() super(GraphConvModel, self).__init__( model, loss, output_types=output_types, batch_size=batch_size, **kwargs)
def __init__(self, n_tasks: int, n_atom_feat: OneOrMany[int] = 75, n_pair_feat: OneOrMany[int] = 14, n_hidden: int = 50, n_graph_feat: int = 128, n_weave: int = 2, fully_connected_layer_sizes: List[int] = [2000, 100], weight_init_stddevs: OneOrMany[float] = [0.01, 0.04], bias_init_consts: OneOrMany[float] = [0.5, 3.0], weight_decay_penalty: float = 0.0, weight_decay_penalty_type: str = "l2", dropouts: OneOrMany[float] = 0.25, activation_fns: OneOrMany[KerasActivationFn] = tf.nn.relu, batch_normalize: bool = True, batch_normalize_kwargs: Dict = { "renorm": True, "fused": False }, gaussian_expand: bool = True, compress_post_gaussian_expansion: bool = False, mode: str = "classification", n_classes: int = 2, batch_size: int = 100, **kwargs): """ Parameters ---------- n_tasks: int Number of tasks n_atom_feat: int, optional Number of features per atom. n_pair_feat: int, optional Number of features per pair of atoms. n_hidden: int, optional Number of units(convolution depths) in corresponding hidden layer n_graph_feat: int, optional Number of output features for each molecule(graph) n_weave: int, optional The number of weave layers in this model. fully_connected_layer_sizes: list The size of each dense layer in the network. The length of this list determines the number of layers. weight_init_stddevs: list or float The standard deviation of the distribution to use for weight initialization of each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. bias_init_consts: list or float The value to initialize the biases in each layer to. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. weight_decay_penalty: float The magnitude of the weight decay penalty to use weight_decay_penalty_type: str The type of penalty to use for weight decay, either 'l1' or 'l2' dropouts: list or float The dropout probablity to use for each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. activation_fns: list or object The Tensorflow activation function to apply to each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. batch_normalize: bool, optional (default True) If this is turned on, apply batch normalization before applying activation functions on convolutional and fully connected layers. batch_normalize_kwargs: Dict, optional (default `{"renorm"=True, "fused": False}`) Batch normalization is a complex layer which has many potential argumentswhich change behavior. This layer accepts user-defined parameters which are passed to all `BatchNormalization` layers in `WeaveModel`, `WeaveLayer`, and `WeaveGather`. gaussian_expand: boolean, optional (default True) Whether to expand each dimension of atomic features by gaussian histogram compress_post_gaussian_expansion: bool, optional (default False) If True, compress the results of the Gaussian expansion back to the original dimensions of the input. mode: str Either "classification" or "regression" for type of model. n_classes: int Number of classes to predict (only used in classification mode) """ if mode not in ['classification', 'regression']: raise ValueError("mode must be either 'classification' or 'regression'") if not isinstance(n_atom_feat, collections.Sequence): n_atom_feat = [n_atom_feat] * n_weave if not isinstance(n_pair_feat, collections.Sequence): n_pair_feat = [n_pair_feat] * n_weave n_layers = len(fully_connected_layer_sizes) if not isinstance(weight_init_stddevs, collections.Sequence): weight_init_stddevs = [weight_init_stddevs] * n_layers if not isinstance(bias_init_consts, collections.Sequence): bias_init_consts = [bias_init_consts] * n_layers if not isinstance(dropouts, collections.Sequence): dropouts = [dropouts] * n_layers if not isinstance(activation_fns, collections.Sequence): activation_fns = [activation_fns] * n_layers if weight_decay_penalty != 0.0: if weight_decay_penalty_type == 'l1': regularizer = tf.keras.regularizers.l1(weight_decay_penalty) else: regularizer = tf.keras.regularizers.l2(weight_decay_penalty) else: regularizer = None self.n_tasks = n_tasks self.n_atom_feat = n_atom_feat self.n_pair_feat = n_pair_feat self.n_hidden = n_hidden self.n_graph_feat = n_graph_feat self.mode = mode self.n_classes = n_classes # Build the model. atom_features = Input(shape=(self.n_atom_feat[0],)) pair_features = Input(shape=(self.n_pair_feat[0],)) pair_split = Input(shape=tuple(), dtype=tf.int32) atom_split = Input(shape=tuple(), dtype=tf.int32) atom_to_pair = Input(shape=(2,), dtype=tf.int32) inputs = [atom_features, pair_features, pair_split, atom_to_pair] for ind in range(n_weave): n_atom = self.n_atom_feat[ind] n_pair = self.n_pair_feat[ind] if ind < n_weave - 1: n_atom_next = self.n_atom_feat[ind + 1] n_pair_next = self.n_pair_feat[ind + 1] else: n_atom_next = n_hidden n_pair_next = n_hidden weave_layer_ind_A, weave_layer_ind_P = layers.WeaveLayer( n_atom_input_feat=n_atom, n_pair_input_feat=n_pair, n_atom_output_feat=n_atom_next, n_pair_output_feat=n_pair_next, batch_normalize=batch_normalize)(inputs) inputs = [weave_layer_ind_A, weave_layer_ind_P, pair_split, atom_to_pair] # Final atom-layer convolution. Note this differs slightly from the paper # since we use a tanh activation. This seems necessary for numerical # stability. dense1 = Dense(self.n_graph_feat, activation=tf.nn.tanh)(weave_layer_ind_A) if batch_normalize: dense1 = BatchNormalization(**batch_normalize_kwargs)(dense1) weave_gather = layers.WeaveGather( batch_size, n_input=self.n_graph_feat, gaussian_expand=gaussian_expand, compress_post_gaussian_expansion=compress_post_gaussian_expansion)( [dense1, atom_split]) if n_layers > 0: # Now fully connected layers input_layer = weave_gather for layer_size, weight_stddev, bias_const, dropout, activation_fn in zip( fully_connected_layer_sizes, weight_init_stddevs, bias_init_consts, dropouts, activation_fns): layer = Dense( layer_size, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=weight_stddev), bias_initializer=tf.constant_initializer(value=bias_const), kernel_regularizer=regularizer)(input_layer) if dropout > 0.0: layer = Dropout(rate=dropout)(layer) if batch_normalize: # Should this allow for training? layer = BatchNormalization(**batch_normalize_kwargs)(layer) layer = Activation(activation_fn)(layer) input_layer = layer output = input_layer else: output = weave_gather n_tasks = self.n_tasks if self.mode == 'classification': n_classes = self.n_classes logits = Reshape((n_tasks, n_classes))(Dense(n_tasks * n_classes)(output)) output = Softmax()(logits) outputs = [output, logits] output_types = ['prediction', 'loss'] loss: Loss = SoftmaxCrossEntropy() else: output = Dense(n_tasks)(output) outputs = [output] output_types = ['prediction'] loss = L2Loss() model = tf.keras.Model( inputs=[ atom_features, pair_features, pair_split, atom_split, atom_to_pair ], outputs=outputs) super(WeaveModel, self).__init__( model, loss, output_types=output_types, batch_size=batch_size, **kwargs)
def __init__(self, n_tasks, max_atoms=50, n_atom_feat=75, n_graph_feat=30, n_outputs=30, layer_sizes=[100], layer_sizes_gather=[100], dropout=None, mode="classification", n_classes=2, uncertainty=False, batch_size=100, **kwargs): """ Parameters ---------- n_tasks: int Number of tasks. max_atoms: int, optional Maximum number of atoms in a molecule, should be defined based on dataset. n_atom_feat: int, optional Number of features per atom. n_graph_feat: int, optional Number of features for atom in the graph. n_outputs: int, optional Number of features for each molecule. layer_sizes: list of int, optional List of hidden layer size(s) in the propagation step: length of this list represents the number of hidden layers, and each element is the width of corresponding hidden layer. layer_sizes_gather: list of int, optional List of hidden layer size(s) in the gather step. dropout: None or float, optional Dropout probability, applied after each propagation step and gather step. mode: str, optional Either "classification" or "regression" for type of model. n_classes: int the number of classes to predict (only used in classification mode) uncertainty: bool if True, include extra outputs and loss terms to enable the uncertainty in outputs to be predicted """ if mode not in ['classification', 'regression']: raise ValueError("mode must be either 'classification' or 'regression'") self.n_tasks = n_tasks self.max_atoms = max_atoms self.n_atom_feat = n_atom_feat self.n_graph_feat = n_graph_feat self.n_outputs = n_outputs self.layer_sizes = layer_sizes self.layer_sizes_gather = layer_sizes_gather self.dropout = dropout self.mode = mode self.n_classes = n_classes self.uncertainty = uncertainty if uncertainty: if mode != "regression": raise ValueError("Uncertainty is only supported in regression mode") if dropout is None or dropout == 0.0: raise ValueError('Dropout must be included to predict uncertainty') # Build the model. atom_features = Input(shape=(self.n_atom_feat,)) parents = Input(shape=(self.max_atoms, self.max_atoms), dtype=tf.int32) calculation_orders = Input(shape=(self.max_atoms,), dtype=tf.int32) calculation_masks = Input(shape=(self.max_atoms,), dtype=tf.bool) membership = Input(shape=tuple(), dtype=tf.int32) n_atoms = Input(shape=tuple(), dtype=tf.int32) dag_layer1 = layers.DAGLayer( n_graph_feat=self.n_graph_feat, n_atom_feat=self.n_atom_feat, max_atoms=self.max_atoms, layer_sizes=self.layer_sizes, dropout=self.dropout, batch_size=batch_size)([ atom_features, parents, calculation_orders, calculation_masks, n_atoms ]) dag_gather = layers.DAGGather( n_graph_feat=self.n_graph_feat, n_outputs=self.n_outputs, max_atoms=self.max_atoms, layer_sizes=self.layer_sizes_gather, dropout=self.dropout)([dag_layer1, membership]) n_tasks = self.n_tasks if self.mode == 'classification': n_classes = self.n_classes logits = Reshape((n_tasks, n_classes))(Dense(n_tasks * n_classes)(dag_gather)) output = Softmax()(logits) outputs = [output, logits] output_types = ['prediction', 'loss'] loss = SoftmaxCrossEntropy() else: output = Dense(n_tasks)(dag_gather) if self.uncertainty: log_var = Dense(n_tasks)(dag_gather) var = Activation(tf.exp)(log_var) outputs = [output, var, output, log_var] output_types = ['prediction', 'variance', 'loss', 'loss'] def loss(outputs, labels, weights): diff = labels[0] - outputs[0] return tf.reduce_mean(diff * diff / tf.exp(outputs[1]) + outputs[1]) else: outputs = [output] output_types = ['prediction'] loss = L2Loss() model = tf.keras.Model( inputs=[ atom_features, parents, calculation_orders, calculation_masks, membership, n_atoms #, dropout_switch ], outputs=outputs) super(DAGModel, self).__init__( model, loss, output_types=output_types, batch_size=batch_size, **kwargs)
def __init__(self, n_tasks, graph_conv_layers=[64, 64], dense_layer_size=128, dropout=0.0, mode="classification", number_atom_features=75, n_classes=2, uncertainty=False, batch_size=100, **kwargs): """ Parameters ---------- n_tasks: int Number of tasks graph_conv_layers: list of int Width of channels for the Graph Convolution Layers dense_layer_size: int Width of channels for Atom Level Dense Layer before GraphPool dropout: list or float the dropout probablity to use for each layer. The length of this list should equal len(graph_conv_layers)+1 (one value for each convolution layer, and one for the dense layer). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. mode: str Either "classification" or "regression" number_atom_features: int 75 is the default number of atom features created, but this can vary if various options are passed to the function atom_features in graph_features n_classes: int the number of classes to predict (only used in classification mode) uncertainty: bool if True, include extra outputs and loss terms to enable the uncertainty in outputs to be predicted """ if mode not in ['classification', 'regression']: raise ValueError( "mode must be either 'classification' or 'regression'") self.n_tasks = n_tasks self.mode = mode self.dense_layer_size = dense_layer_size self.graph_conv_layers = graph_conv_layers self.number_atom_features = number_atom_features self.n_classes = n_classes self.uncertainty = uncertainty if not isinstance(dropout, collections.Sequence): dropout = [dropout] * (len(graph_conv_layers) + 1) if len(dropout) != len(graph_conv_layers) + 1: raise ValueError('Wrong number of dropout probabilities provided') self.dropout = dropout if uncertainty: if mode != "regression": raise ValueError( "Uncertainty is only supported in regression mode") if any(d == 0.0 for d in dropout): raise ValueError( 'Dropout must be included in every layer to predict uncertainty' ) # Build the model. atom_features = Input(shape=(self.number_atom_features, )) degree_slice = Input(shape=(2, ), dtype=tf.int32) membership = Input(shape=tuple(), dtype=tf.int32) n_samples = Input(shape=tuple(), dtype=tf.int32) dropout_switch = tf.keras.Input(shape=tuple()) self.deg_adjs = [] for i in range(0, 10 + 1): deg_adj = Input(shape=(i + 1, ), dtype=tf.int32) self.deg_adjs.append(deg_adj) in_layer = atom_features for layer_size, dropout in zip(self.graph_conv_layers, self.dropout): gc1_in = [in_layer, degree_slice, membership] + self.deg_adjs gc1 = layers.GraphConv(layer_size, activation_fn=tf.nn.relu)(gc1_in) batch_norm1 = BatchNormalization(fused=False)(gc1) if dropout > 0.0: batch_norm1 = layers.SwitchedDropout(rate=dropout)( [batch_norm1, dropout_switch]) gp_in = [batch_norm1, degree_slice, membership] + self.deg_adjs in_layer = layers.GraphPool()(gp_in) dense = Dense(self.dense_layer_size, activation=tf.nn.relu)(in_layer) batch_norm3 = BatchNormalization(fused=False)(dense) if self.dropout[-1] > 0.0: batch_norm3 = layers.SwitchedDropout(rate=self.dropout[-1])( [batch_norm3, dropout_switch]) self.neural_fingerprint = layers.GraphGather( batch_size=batch_size, activation_fn=tf.nn.tanh)([batch_norm3, degree_slice, membership] + self.deg_adjs) n_tasks = self.n_tasks if self.mode == 'classification': n_classes = self.n_classes logits = Reshape((n_tasks, n_classes))(Dense(n_tasks * n_classes)( self.neural_fingerprint)) logits = TrimGraphOutput()([logits, n_samples]) output = Softmax()(logits) outputs = [output, logits] output_types = ['prediction', 'loss'] loss = SoftmaxCrossEntropy() else: output = Dense(n_tasks)(self.neural_fingerprint) output = TrimGraphOutput()([output, n_samples]) if self.uncertainty: log_var = Dense(n_tasks)(self.neural_fingerprint) log_var = TrimGraphOutput()([log_var, n_samples]) var = Activation(tf.exp)(log_var) outputs = [output, var, output, log_var] output_types = ['prediction', 'variance', 'loss', 'loss'] def loss(outputs, labels, weights): diff = labels[0] - outputs[0] return tf.reduce_mean(diff * diff / tf.exp(outputs[1]) + outputs[1]) else: outputs = [output] output_types = ['prediction'] loss = L2Loss() model = tf.keras.Model(inputs=[ atom_features, degree_slice, membership, n_samples, dropout_switch ] + self.deg_adjs, outputs=outputs) super(GraphConvModel, self).__init__(model, loss, output_types=output_types, batch_size=batch_size, **kwargs)
def __init__(self, n_tasks, n_atom_feat=75, n_pair_feat=14, n_hidden=50, n_graph_feat=128, mode="classification", n_classes=2, batch_size=100, **kwargs): """ Parameters ---------- n_tasks: int Number of tasks n_atom_feat: int, optional Number of features per atom. n_pair_feat: int, optional Number of features per pair of atoms. n_hidden: int, optional Number of units(convolution depths) in corresponding hidden layer n_graph_feat: int, optional Number of output features for each molecule(graph) mode: str Either "classification" or "regression" for type of model. n_classes: int Number of classes to predict (only used in classification mode) """ if mode not in ['classification', 'regression']: raise ValueError( "mode must be either 'classification' or 'regression'") self.n_tasks = n_tasks self.n_atom_feat = n_atom_feat self.n_pair_feat = n_pair_feat self.n_hidden = n_hidden self.n_graph_feat = n_graph_feat self.mode = mode self.n_classes = n_classes # Build the model. atom_features = Input(shape=(self.n_atom_feat, )) pair_features = Input(shape=(self.n_pair_feat, )) pair_split = Input(shape=tuple(), dtype=tf.int32) atom_split = Input(shape=tuple(), dtype=tf.int32) atom_to_pair = Input(shape=(2, ), dtype=tf.int32) weave_layer1A, weave_layer1P = layers.WeaveLayer( n_atom_input_feat=self.n_atom_feat, n_pair_input_feat=self.n_pair_feat, n_atom_output_feat=self.n_hidden, n_pair_output_feat=self.n_hidden)( [atom_features, pair_features, pair_split, atom_to_pair]) weave_layer2A, weave_layer2P = layers.WeaveLayer( n_atom_input_feat=self.n_hidden, n_pair_input_feat=self.n_hidden, n_atom_output_feat=self.n_hidden, n_pair_output_feat=self.n_hidden, update_pair=False)( [weave_layer1A, weave_layer1P, pair_split, atom_to_pair]) dense1 = Dense(self.n_graph_feat, activation=tf.nn.tanh)(weave_layer2A) batch_norm1 = BatchNormalization(epsilon=1e-5)(dense1) weave_gather = layers.WeaveGather(batch_size, n_input=self.n_graph_feat, gaussian_expand=True)( [batch_norm1, atom_split]) n_tasks = self.n_tasks if self.mode == 'classification': n_classes = self.n_classes logits = Reshape( (n_tasks, n_classes))(Dense(n_tasks * n_classes)(weave_gather)) output = Softmax()(logits) outputs = [output, logits] output_types = ['prediction', 'loss'] loss = SoftmaxCrossEntropy() else: output = Dense(n_tasks)(weave_gather) outputs = [output] output_types = ['prediction'] loss = L2Loss() model = tf.keras.Model(inputs=[ atom_features, pair_features, pair_split, atom_split, atom_to_pair ], outputs=outputs) super(WeaveModel, self).__init__(model, loss, output_types=output_types, batch_size=batch_size, **kwargs)
def __init__(self, n_tasks, char_dict, seq_length, n_embedding=75, kernel_sizes=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20], num_filters=[ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ], dropout=0.25, mode="classification", **kwargs): """ Parameters ---------- n_tasks: int Number of tasks char_dict: dict Mapping from characters in smiles to integers seq_length: int Length of sequences(after padding) n_embedding: int, optional Length of embedding vector filter_sizes: list of int, optional Properties of filters used in the conv net num_filters: list of int, optional Properties of filters used in the conv net dropout: float, optional Dropout rate mode: str Either "classification" or "regression" for type of model. """ self.n_tasks = n_tasks self.char_dict = char_dict self.seq_length = max(seq_length, max(kernel_sizes)) self.n_embedding = n_embedding self.kernel_sizes = kernel_sizes self.num_filters = num_filters self.dropout = dropout self.mode = mode # Build the model. smiles_seqs = Input(shape=(self.seq_length, ), dtype=tf.int32) # Character embedding embedding = layers.DTNNEmbedding( n_embedding=self.n_embedding, periodic_table_length=len(self.char_dict.keys()) + 1)(smiles_seqs) pooled_outputs = [] conv_layers = [] for filter_size, num_filter in zip(self.kernel_sizes, self.num_filters): # Multiple convolutional layers with different filter widths conv_layers.append( Conv1D(kernel_size=filter_size, filters=num_filter, padding='valid')(embedding)) # Max-over-time pooling reduced = Lambda(lambda x: tf.reduce_max(x, axis=1))( conv_layers[-1]) pooled_outputs.append(reduced) # Concat features from all filters(one feature per filter) concat_outputs = Concatenate(axis=1)(pooled_outputs) dropout = Dropout(rate=self.dropout)(concat_outputs) dense = Dense(200, activation=tf.nn.relu)(dropout) # Highway layer from https://arxiv.org/pdf/1505.00387.pdf gather = layers.Highway()(dense) if self.mode == "classification": logits = Dense(self.n_tasks * 2)(gather) logits = Reshape((self.n_tasks, 2))(logits) output = Softmax()(logits) outputs = [output, logits] output_types = ['prediction', 'loss'] loss = SoftmaxCrossEntropy() else: output = Dense(self.n_tasks * 1)(gather) output = Reshape((self.n_tasks, 1))(output) outputs = [output] output_types = ['prediction'] loss = L2Loss() model = tf.keras.Model(inputs=[smiles_seqs], outputs=outputs) super(TextCNNModel, self).__init__(model, loss, output_types=output_types, **kwargs)