def get_continuous_model(infer_no_evidence=False): backprop_mode = BackpropMode.GRADIENT if infer_no_evidence else BackpropMode.HARD_EM_UNWEIGHTED spn = SequentialSumProductNetwork([ spnk.layers.FlatToRegions(num_decomps=1, input_shape=(NUM_VARS, )), spnk.layers.NormalLeaf( num_components=NUM_COMPONENTS, use_accumulators=True, scale_trainable=False, location_trainable=True, location_initializer=keras.initializers.Constant( value=NORMAL_COMPONENTS_LOCATIONS)), spnk.layers.PermuteAndPadScopes([[0, 1, 2, 3]]), spnk.layers.DenseProduct(num_factors=2), spnk.layers.DenseSum( num_sums=2, logspace_accumulators=False, backprop_mode=backprop_mode, accumulator_initializer=initializers.Constant(FIRST_SUM_WEIGHTS)), spnk.layers.DenseProduct(num_factors=2), spnk.layers.RootSum( logspace_accumulators=False, backprop_mode=backprop_mode, accumulator_initializer=initializers.Constant(SECOND_SUM_WEIGHTS), return_weighted_child_logits=False), ], infer_no_evidence=infer_no_evidence) spn.summary() return spn
def MyNet_tf(input_shape: Tuple[int, int, int] = (28, 28, 1), classes: int = 10) -> tf.keras.Model: """A standard DNN implementation in TensorFlow. The MyNet model has 3 dense layers. Args: input_shape: shape of the input data (height, width, channels). classes: The number of outputs the model should generate. Returns: A TensorFlow MyNet model. """ #_check_input_shape(input_shape) model = Sequential() model.add(layers.Flatten(input_shape=(28, 28))) model.add( layers.Dense(units=300, kernel_initializer=initializers.Constant(0.01), bias_initializer=initializers.Zeros(), activation='relu')) model.add( layers.Dense(units=64, kernel_initializer=initializers.Constant(0.01), bias_initializer=initializers.Zeros(), activation='relu')) model.add( layers.Dense(units=classes, kernel_initializer=initializers.Constant(0.01), bias_initializer=initializers.Zeros(), activation='softmax')) return model
def get_discrete_model(): spn = SequentialSumProductNetwork( [ spnk.layers.FlatToRegions( num_decomps=1, input_shape=(NUM_VARS,), dtype=tf.int32 ), spnk.layers.IndicatorLeaf(num_components=NUM_COMPONENTS), spnk.layers.PermuteAndPadScopes([[0, 1, 2, 3]]), spnk.layers.DenseProduct(num_factors=2), spnk.layers.DenseSum( num_sums=2, logspace_accumulators=False, sum_op=SumOpUnweightedHardEMBackprop(), accumulator_initializer=initializers.Constant(FIRST_SUM_WEIGHTS), ), spnk.layers.DenseProduct(num_factors=2), spnk.layers.RootSum( logspace_accumulators=False, sum_op=SumOpUnweightedHardEMBackprop(), accumulator_initializer=initializers.Constant(SECOND_SUM_WEIGHTS), return_weighted_child_logits=False, ), ] ) spn.summary() return spn
def build(self, input_shape): assert len(input_shape) >= 2 self.input_dim = input_shape[-1] self.kernel = self.add_weight(shape=(self.input_dim, self.units), initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.sigma_kernel = self.add_weight( shape=(self.input_dim, self.units), initializer=initializers.Constant(value=self.sigma_init), name='sigma_kernel') if self.use_bias: self.bias = self.add_weight(shape=(self.units, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) self.sigma_bias = self.add_weight( shape=(self.units, ), initializer=initializers.Constant(value=self.sigma_init), name='sigma_bias') else: self.bias = None self.epsilon_bias = None self.epsilon_kernel = K.zeros(shape=(self.input_dim, self.units)) self.epsilon_bias = K.zeros(shape=(self.units, )) self.sample_noise() super(NoisyDense, self).build(input_shape)
def FFNN(input_dimension, output_dimension): opt = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False) model = Sequential() model.add( Dense(256, input_dim=input_dimension, activation='relu', kernel_initializer='random_uniform', bias_initializer=initializers.Constant(0.1))) model.add( Dense(256, activation='relu', kernel_initializer='random_uniform', bias_initializer=initializers.Constant(0.1))) model.add( Dense(128, activation='relu', kernel_initializer='random_uniform', bias_initializer=initializers.Constant(0.1))) model.add( Dense(output_dimension, activation='sigmoid', kernel_initializer='random_uniform', bias_initializer='zeros')) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) #model.summary() return model
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True) data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=100, interactive_layer_lr=0.15, batch_size=-1, early_stop="diff", selector_param={"method": "relative"}) guest_nn_0 = hetero_nn_0.get_party_instance(role='guest', party_id=guest) guest_nn_0.add_bottom_model(Dense(units=3, input_shape=(10,), activation="relu", kernel_initializer=initializers.Constant(value=1))) guest_nn_0.set_interactve_layer(Dense(units=2, input_shape=(2,), kernel_initializer=initializers.Constant(value=1))) guest_nn_0.add_top_model(Dense(units=1, input_shape=(2,), activation="sigmoid", kernel_initializer=initializers.Constant(value=1))) host_nn_0 = hetero_nn_0.get_party_instance(role='host', party_id=host) host_nn_0.add_bottom_model(Dense(units=3, input_shape=(20,), activation="relu", kernel_initializer=initializers.Constant(value=1))) host_nn_0.set_interactve_layer(Dense(units=2, input_shape=(2,), kernel_initializer=initializers.Constant(value=1))) hetero_nn_0.compile(optimizer=optimizers.SGD(lr=0.15), loss="binary_crossentropy") hetero_nn_1 = HeteroNN(name="hetero_nn_1") evaluation_0 = Evaluation(name="evaluation_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_nn_1, data=Data(test_data=intersection_0.output.data), model=Model(model=hetero_nn_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_nn_0.output.data)) pipeline.compile() pipeline.fit() print(hetero_nn_0.get_config(roles={"guest": [guest], "host": [host]})) print(pipeline.get_component("hetero_nn_0").get_summary())
def build_model( self, input_shape, number_of_layers=5, neurons_by_layers=10, bias_mu_throttle=0.5, bias_mu_steering=0.0, bias_sigma_throttle=0.03, # ~1% outside (0.5 +- 0.4) bias_sigma_steering=0.1, # ~2% outside (0 +- 0.9) dropout=0.15): """ Construct the actor network with mu and sigma as output """ print(f"Input shape of policy: {input_shape}") inputs = layers.Input(shape=input_shape) prev_layer = inputs for i in range(number_of_layers): current_layer = layers.Dense( neurons_by_layers, activation="relu", kernel_initializer=initializers.he_normal())(prev_layer) if i < number_of_layers - 1: prev_layer = current_layer layers.Dropout(dropout)(prev_layer) prev_layer = current_layer current_layer = layers.Flatten()(prev_layer) mu_throttle = layers.Dense(1, activation="linear", kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Constant( bias_mu_throttle))(current_layer) sigma_throttle = layers.Dense(1, activation="softplus", kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Constant( bias_sigma_throttle))(current_layer) mu_steering = layers.Dense(1, activation="linear", kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Constant( bias_mu_steering))(current_layer) sigma_steering = layers.Dense(1, activation="softplus", kernel_initializer=initializers.Zeros(), bias_initializer=initializers.Constant( bias_sigma_steering))(current_layer) actor_network = keras.Model( inputs=inputs, outputs=[mu_throttle, sigma_throttle, mu_steering, sigma_steering]) return actor_network
def sparse_kwargs(weight_matrix): """Defines kwargs needed for the Sparse layer to initialize its weights.""" weight_matrix_t = np.transpose(weight_matrix) nonzero_arrays = np.nonzero(weight_matrix_t) indices = np.transpose(nonzero_arrays) values = weight_matrix_t[nonzero_arrays] return { "n_nonzero": len(values), "indices_initializer": initializers.Constant(indices), "values_initializer": initializers.Constant(values), }
def vgg_from_t7(t7_file, target_layer=None): '''Extract VGG layers from a Torch .t7 model into a Keras model e.g. vgg = vgg_from_t7('vgg_normalised.t7', target_layer='relu4_1') Adapted from https://github.com/jonrei/tf-AdaIN/blob/master/AdaIN.py Converted caffe->t7 from https://github.com/xunhuang1995/AdaIN-style ''' t7 = torchfile.load(t7_file, force_8bytes_long=True) inp = Input(shape=(None, None, 3), name='vgg_input') x = inp for idx, module in enumerate(t7.modules): name = module.name.decode() if module.name is not None else None if idx == 0: name = 'preprocess' # VGG 1st layer preprocesses with a 1x1 conv to multiply by 255 and subtract BGR mean as bias if module._typename == b'nn.SpatialReflectionPadding': x = Lambda(pad_reflect)(x) elif module._typename == b'nn.SpatialConvolution': filters = module.nOutputPlane kernel_size = module.kH weight = module.weight.transpose([2, 3, 1, 0]) bias = module.bias x = Conv2D(filters, kernel_size, padding='valid', activation=None, name=name, kernel_initializer=initializers.Constant(weight), bias_initializer=initializers.Constant(bias), trainable=False)(x) elif module._typename == b'nn.ReLU': x = Activation('relu', name=name)(x) elif module._typename == b'nn.SpatialMaxPooling': x = MaxPooling2D(padding='same', name=name)(x) # elif module._typename == b'nn.SpatialUpSamplingNearest': # Not needed for VGG # x = Upsampling2D(name=name)(x) else: raise NotImplementedError(module._typename) if name == target_layer: # print("Reached target layer", target_layer) break # Hook it up model = Model(inputs=inp, outputs=x) return model
def build(self, input_shape): dtype = dtypes.as_dtype(self.dtype or k.floatx()) if not (dtype.is_floating or dtype.is_complex): raise TypeError('Unable to build `NoisyDense` layer with non-floating point' 'dtype %s' % (dtype,)) input_shape = tensor_shape.TensorShape(input_shape) if tensor_shape.dimension_value(input_shape[-1]) is None: raise ValueError('The last dimension of the inputs to `NoisyDense` ' 'should be defined. Found `None`.') last_dim = tensor_shape.dimension_value(input_shape[-1]) self.input_spec = InputSpec(min_ndim=2, axes={-1: last_dim}) if self.std_func is None: std = math.sqrt(3 / input_shape[-1]) else: std = self.std_func(input_shape[-1]) if self.sigma_func is not None: sigma_init = self.sigma_func(self.sigma_init, input_shape[-1]) else: sigma_init = self.sigma_init self.mu_weights = self.add_weight( 'mu_weights', shape=[last_dim, self.units], initializer=initializers.RandomUniform(minval=-std, maxval=std), regularizer=self.kernel_regularizer, constraint=self.kernel_constraint, dtype=self.dtype, trainable=True) self.sigma_weights = self.add_weight( 'sigma_weights', shape=[last_dim, self.units], initializer=initializers.Constant(value=sigma_init), dtype=self.dtype, trainable=True) if self.use_bias: self.mu_bias = self.add_weight( 'mu_bias', shape=[self.units, ], initializer=initializers.RandomUniform(minval=-std, maxval=std), regularizer=self.bias_regularizer, constraint=self.bias_constraint, dtype=self.dtype, trainable=True) self.sigma_bias = self.add_weight( 'sigma_bias', shape=[self.units, ], initializer=initializers.Constant(value=sigma_init), dtype=self.dtype, trainable=True) self.built = True
def get_model_arch(model_arch, var, sweep=False): seed_value = 1 layer_kwargs = {'kernel_initializer':initializers.glorot_uniform(seed=seed_value), 'bias_initializer':initializers.Constant(0.1), } if var.problem_type == 'binary': output_layer = tf.keras.layers.Dense(units=1, activation='sigmoid', **layer_kwargs) elif var.problem_type == 'category': output_layer = tf.keras.layers.Dense(units=3, activation='softmax', **layer_kwargs) else: output_layer = tf.keras.layers.Dense(units=1, activation=None, **layer_kwargs) if model_arch == 'dnn': arch = dnn_model if sweep: arch = dnn_model_sweep elif model_arch == 'conv1d': arch = conv1d_model elif model_arch == 'incept1d': arch = inception1d_model elif model_arch == 'lstm': arch = lstm_model if sweep: arch = lstm_model_sweep model = arch(var.l1_reg, var.l2_reg, var.drop_rate, var.input_len, output_layer, layer_kwargs, var) return model
def build(self, input_shape): if self.keep_len: if self.use_highway: self.W_t = self.add_weight( shape=(1, input_shape[-1], self.out_channels), dtype=tf.float32, initializer=initializers.RandomNormal(0, 0.01), trainable=True) self.b_t = self.add_weight( shape=(self.out_channels, ), dtype=tf.float32, trainable=True, initializer=(initializers.Constant(self.high_init) if self.high_init != 0 else None)) self.b_h = self.add_weight(shape=(self.out_channels, ), dtype=tf.float32, trainable=True) if input_shape[-1] != self.out_channels: self.W_r = self.add_weight( shape=(1, input_shape[-1], self.out_channels), dtype=tf.float32, initializer=initializers.RandomNormal(0, 0.01), trainable=True) self.b_r = self.add_weight(shape=(self.out_channels, ), dtype=tf.float32, trainable=True)
def generate_permutations(self, factors, num_vars_spn_input): if not factors: raise ValueError("{}: factors needs to be a non-empty sequence.") factor_cumprod = np.cumprod(factors) factor_prod = factor_cumprod[-1] if factor_prod < num_vars_spn_input: raise ValueError( "{}: not enough factors to cover all variables ({} vs. {}).". format(self, factor_prod, num_vars_spn_input)) for i, fc in enumerate(factor_cumprod[:-1]): if fc >= num_vars_spn_input: raise ValueError( "{}: too many factors, taking out the bottom {} products still " "results in {} factors while {} are needed.".format( self, len(factors) - i - 1, fc, num_vars_spn_input)) # Now we generate the random index permutations perms = [ np.random.permutation(num_vars_spn_input).astype(int).tolist() for _ in range(self.num_decomps) ] num_m1 = factor_prod - num_vars_spn_input if num_m1 > 0: # e.g. num_m1 == 2 and factor_prod = 32. Then rate_m1 is 16, so once every 16 values # we should leave a variable slot empty rate_m1 = int(np.floor(factor_prod / num_m1)) for p in perms: for i in range(num_m1): p.insert(i * rate_m1, -1) self.permutations = self.add_weight( initializer=initializers.Constant(perms), trainable=False) return perms
def __init__(self, return_weighted_child_logits=True, logspace_accumulators=None, accumulator_initializer=None, backprop_mode=BackpropMode.GRADIENT, dimension_permutation=DimensionPermutation.AUTO, accumulator_regularizer=None, linear_accumulator_constraint=None, **kwargs): super(RootSum, self).__init__(**kwargs) self.return_weighted_child_logits = return_weighted_child_logits self.accumulator_initializer = accumulator_initializer or initializers.Constant( 1.0) self.logspace_accumulators = infer_logspace_accumulators(backprop_mode) \ if logspace_accumulators is None else logspace_accumulators self.backprop_mode = backprop_mode self.dimension_permutation = dimension_permutation self.accumulator_regularizer = accumulator_regularizer self.linear_accumulator_constraint = \ linear_accumulator_constraint or GreaterEqualEpsilon(1e-10) self.accumulators = self._num_nodes_in = self._inferred_dimension_permutation = None if backprop_mode != BackpropMode.GRADIENT and logspace_accumulators: raise NotImplementedError( "Logspace accumulators can only be used with BackpropMode.GRADIENT" )
def build(self, hp): ###### Setup hyperparamaters dropout = hp.Float('dropout', 0.1, 0.6) bias_constant = hp.Float('bias', 0.01, 0.03) optimizer = hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop']) ###### Construct model # Initially, the network model is defined model = Sequential() # Add layers model.add( LSTM(units=self.lstm_units, input_shape=self.input_shape, return_sequences=True)) model.add(Dropout(dropout)) model.add( LSTM(units=int(self.lstm_units * 0.5), return_sequences=False)) model.add(Dropout(dropout)) model.add( Dense(features, activation='sigmoid', kernel_initializer=initializers.he_uniform(seed=0), bias_initializer=initializers.Constant(bias_constant))) # Compile model model.compile( optimizer=self.get_optimizer(hp, optimizer), loss='mse', ) return model
def _strip_clustering_wrapper(layer): if isinstance(layer, cluster_wrapper.ClusterWeights): if not hasattr(layer.layer, '_batch_input_shape') and\ hasattr(layer, '_batch_input_shape'): layer.layer._batch_input_shape = layer._batch_input_shape # We reset both arrays of weights, so that we can guarantee the correct # order of newly created weights layer.layer._trainable_weights = [] layer.layer._non_trainable_weights = [] for i in range(len(layer.restore)): # This is why we used integers as keys name, weight = layer.restore[i] # In both cases we use k.batch_get_value since we need physical copies # of the arrays to initialize a new tensor if i in layer.gone_variables: # If the variable was removed because it was clustered, we restore it # by using updater we created earlier new_weight_value = k.batch_get_value([weight()])[0] else: # If the value was not clustered(e.g. bias), we still store a valid # reference to the tensor. We use this reference to get the value new_weight_value = k.batch_get_value([weight])[0] layer.layer.add_weight( name=name, shape=new_weight_value.shape, initializer=initializers.Constant(new_weight_value), trainable=True) # When all weights are filled with the values, just return the underlying # layer since it is now fully autonomous from its wrapper return layer.layer return layer
def build(self, input_shape): """ This method must be defined for any custom layer, here you define the training parameters. input_shape: a tensor that automatically captures the dimensions of the input by tensorflow. """ # retreive the number of waveguides self.num_wg = input_shape[-1] # define a list of trainable tensorflow parameters representing the coupling coefficients coupling_coeff = [] for idx_wg in range(self.num_wg): coupling_coeff.append( self.add_weight(name="e%s" % idx_wg, shape=tf.TensorShape(()), initializer=initializers.Constant(1.0), trainable=True, constraint=constraints.non_neg())) # convert the list of tensors to one tensor coupling_coeff = tf.convert_to_tensor(coupling_coeff) # add an extra dimension to represent time coupling_coeff = tf.expand_dims(coupling_coeff, 0) # add another dimension to represent batch coupling_coeff = tf.expand_dims(coupling_coeff, 0) # store this tensor as a class member so we can access it from othe methods in the class self.coupling_coeff = coupling_coeff # this has to be called for any tensorflow custom layer super(Coupling_Layer, self).build(input_shape)
def __init__(self, num_sums, logspace_accumulators=None, accumulator_initializer=None, backprop_mode=BackpropMode.GRADIENT, accumulator_regularizer=None, linear_accumulator_constraint=GreaterEqualEpsilon(1e-10), **kwargs): # TODO make docstrings more consistent across different sum instances # TODO automatically infer value of logspace_accumulator from the backprop mode # TODO verify compatibility of backprop mode and logspace_accumulator # TODO consider renaming 'accumulator' to 'child_counts' super(Local2DSum, self).__init__(**kwargs) self.num_sums = num_sums self.logspace_accumulators = infer_logspace_accumulators(backprop_mode) \ if logspace_accumulators is None else logspace_accumulators self.accumulator_initializer = accumulator_initializer or initializers.Constant( 1) self.backprop_mode = backprop_mode self.accumulator_regularizer = accumulator_regularizer self.linear_accumulator_constraint = linear_accumulator_constraint self.accumulators = None
def create_model(optimizer='adam'): # create model #random normal distribution with fixed seed number for random number generator random_normal = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=0) #input layer input_layer = Input(shape=(D_train.shape[1],), name="input") #hidden layer hidden_layers = input_layer for i in range(1): hidden_layers = Dense(10, activation='tanh', kernel_initializer=random_normal, bias_initializer=initializers.Constant(value=1.0), name="hidden_%d" % (i+1))(hidden_layers) #output layer output_layer = Dense(t_train.shape[1], activation='linear', kernel_initializer=random_normal, name="output")(hidden_layers) model = Model(input_layer, output_layer) model.compile(loss='mean_squared_error', optimizer=optimizer) return model
def build(self, input_shape): self.position = self.add_weight(name='position', shape=(self.dims, ), dtype=tf.int64, initializer=initializers.Constant(np.arrane(1, 1+self.dims)), trainable=True) super(PositionInput, self).build(input_shape)
def build_critic_network(input_dims, learning_rate, act_type): state_input = Input(shape=input_dims) # Classification block if act_type == "tanh": print("Activations set to TANH.") dense1 = Dense(512, activation='tanh', name='fc1', kernel_initializer='glorot_normal')(state_input) dense2 = Dense(256, activation='tanh', name='fc2', kernel_initializer='glorot_normal')(dense1) dense3 = Dense(256, activation='tanh', name='fc3', kernel_initializer='glorot_normal')(dense2) elif act_type == "leaky": print("Activations set to Leaky ReLU.") dense1 = Dense( 512, activation=LeakyReLU(alpha=0.1), name='fc1', kernel_initializer='he_uniform', bias_initializer=initializers.Constant(0.01))(state_input) dense2 = Dense(256, activation=LeakyReLU(alpha=0.1), name='fc2', kernel_initializer='he_uniform', bias_initializer=initializers.Constant(0.01))(dense1) dense3 = Dense(256, activation=LeakyReLU(alpha=0.1), name='fc3', kernel_initializer='he_uniform', bias_initializer=initializers.Constant(0.01))(dense2) pred_value = Dense(1, activation='tanh', name='critic_values')(dense3) c_opt = Adam(lr=learning_rate, clipvalue=0.5) #c_opt = SGD(lr=learning_rate) critic = Model(inputs=[state_input], outputs=[pred_value]) critic.compile(optimizer=c_opt, loss='mse') critic.summary() return critic
def get_dynamic_model(): sum_kwargs = dict(logspace_accumulators=False, sum_op=SumOpUnweightedHardEMBackprop()) template = keras.models.Sequential([ spnk.layers.FlatToRegions(num_decomps=1, input_shape=(NUM_VARS, ), dtype=tf.int32), spnk.layers.IndicatorLeaf(num_components=NUM_COMPONENTS), spnk.layers.PermuteAndPadScopes([[0, 1, 2, 3]]), spnk.layers.DenseProduct(num_factors=2), spnk.layers.DenseSum( num_sums=2, accumulator_initializer=initializers.Constant(FIRST_SUM_WEIGHTS), **sum_kwargs), spnk.layers.DenseProduct(num_factors=2), ]) top_net = keras.Sequential( [ spnk.layers.RootSum(accumulator_initializer=initializers.Constant( SECOND_SUM_WEIGHTS), input_shape=[1, 1, 4], return_weighted_child_logits=False, **sum_kwargs) ], name="top_net", ) interface_t_minus1 = keras.Sequential( [ spnk.layers.DenseSum( num_sums=2, input_shape=[1, 1, 4], **sum_kwargs) ], name="interface_t_minus_1", ) interface_t0 = keras.Sequential( [ spnk.layers.DenseSum( num_sums=2, input_shape=[1, 1, 4], **sum_kwargs) ], name="interface_t0", ) dynamic_spn = spnk.models.DynamicSumProductNetwork( template_network=template, interface_network_t0=interface_t0, interface_network_t_minus_1=interface_t_minus1, top_network=top_net, ) return dynamic_spn
def create_KBLSTM_model(): text_in = layers.Input(shape=(25,), dtype='int32', name="TextIn") input_entities = layers.Input(shape=(25,), dtype='int32', name="EntityInput") embed_path = "../data/embeddings/numpy/GNews.npy" print("Loading embeddings...") if not os.path.isfile(embed_path): embeddings = {} with codecs.open('../data/embeddings/wiki-news-300d-1m.vec', encoding='utf-8') as f: for line in tqdm.tqdm(f): values = line.rstrip().rsplit(' ') word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings[word] = coefs with codecs.open('../data/vocab/train_vocab.funlines.json', encoding='utf-8') as fp: vocab_dict = json.load(fp) embed_matrix = np.zeros((len(vocab_dict), 300)) i = 0 for k, v in vocab_dict.items(): try: embed_matrix[v] = embeddings[k] except KeyError: # print(f'{k} does not exist in FastText embeddings') i += 1 print(len(vocab_dict), i) np.save(embed_path, embed_matrix) else: embed_matrix = np.load(embed_path, allow_pickle=True) embed_layer = layers.Embedding(input_dim=len(embed_matrix), output_dim=300, trainable=False, embeddings_initializer=initializers.Constant(embed_matrix))(text_in) embeddings = np.load('../data/NELL/embeddings/entity.npy') entity_embedding = layers.Embedding(181544, 100, embeddings_initializer=initializers.Constant(embeddings), trainable=False, name="EntityEmbeddings")(input_entities) HIDDEN_LAYER_DIMENSION = 64 state_vector = layers.Bidirectional(layers.LSTM(HIDDEN_LAYER_DIMENSION, dropout=0.5, return_sequences=True))(embed_layer) attention_layer = AttentionWeightedAverage()(state_vector) attention_layer = layers.Dense(100, activation='relu')(attention_layer) hidden = KnowledgeLayer()([attention_layer,entity_embedding]) # attention_layer = layers.Dense(64, activation='relu')(attention_layer) hidden = layers.add([hidden, attention_layer]) preds = layers.Dense(1)(hidden) m = Model([text_in,input_entities], preds) m.compile(optimizer=optimizers.Adam(), loss="mean_squared_error", metrics=[metrics.RootMeanSquaredError()]) m.summary() return m
def __init__(self, rate, **kwargs): super(Dropout, self).__init__(**kwargs) self.rate = self.add_weight( name="drop_rate", shape=(), dtype=tf.float32, initializer=initializers.Constant(rate), trainable=False, )
def __init__(self, init_val=0., **kwargs): super().__init__(**kwargs) self.init_val = init_val self.res_weight = self.add_weight( name='res_weight', shape=(), dtype=tf.float32, trainable=True, initializer=initializers.Constant(init_val))
def __init__(self, embeddings, vocab, **kwargs): super(AlphaTextWorldNet, self).__init__(**kwargs) self.word2id = {w: i for i, w in enumerate(vocab)} self.id2word = {i: w for i, w in self.word2id.items()} embedding_dim, vocab_size = embeddings.shape self.embeddings = layers.Embedding( input_dim=vocab_size, input_length=None, output_dim=embedding_dim, embeddings_initializer=initializers.Constant(embeddings), trainable=True, name="embeddings") self.memory_encoder = SelfAttentionEncoder(units=self.HIDDEN_UNITS, num_heads=self.ATT_HEADS, num_blocks=2, l2=self.REG_PENALTY, name="memory_encoder") self.cmd_encoder = SelfAttentionEncoder(units=self.HIDDEN_UNITS, num_heads=self.ATT_HEADS, num_blocks=1, l2=self.REG_PENALTY, name="cmd_encoder") self.attention_encoder = AttentionEncoder(units=self.HIDDEN_UNITS, num_heads=self.ATT_HEADS, num_blocks=2, l2=self.REG_PENALTY, name="att_encoder") self.memory_time_encode = TimeSelfAttention(units=self.HIDDEN_UNITS, l2=self.REG_PENALTY, name="value_time_encode") self.memory_turn_encode = TimeSelfAttention(units=self.HIDDEN_UNITS + self.POSFREQS, l2=self.REG_PENALTY, name="value_turn_encode") self.value_head = DenseHead(hidden_units=self.HIDDEN_UNITS, dropout=0.5, l2=self.REG_PENALTY, name="value_head") self.cmd_turn_encode = TimeSelfAttention(units=self.HIDDEN_UNITS + self.POSFREQS, l2=self.REG_PENALTY, name="cmd_turn_encode") self.policy_head = DenseHead(hidden_units=self.HIDDEN_UNITS + self.POSFREQS, dropout=0.5, l2=self.REG_PENALTY, name="policy_head")
def build(self, input_shape): dim = input_shape[-1] if dim is None: raise ValueError(("The normalization axis should have a " "defined dimension")) self.dim = dim # Trainable part self.gamma = self.add_weight(shape=(dim, ), name="gamma", initializer=initializers.get("ones")) self.beta = self.add_weight(shape=(dim, ), name="beta", initializer=initializers.get("zeros")) # Statistics self.moving_mean = self.add_weight( shape=(dim, ), name="moving_mean", initializer=initializers.get("zeros"), trainable=False) self.moving_sigma = self.add_weight( shape=(dim, ), name="moving_sigma", initializer=initializers.get("ones"), trainable=False) # rmax, dmax and steps self.steps = self.add_weight(shape=tuple(), name="steps", initializer=initializers.get("zeros"), trainable=False) self.rmax = self.add_weight(shape=tuple(), name="rmax", initializer=initializers.Constant( self.rmax_0), trainable=False) self.dmax = self.add_weight(shape=tuple(), name="dmax", initializer=initializers.Constant( self.dmax_0), trainable=False) self.built = True
def build(self, input_shape): if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[channel_axis] is None: raise ValueError('The channel dimension of the inputs ' 'should be defined. Found `None`.') self.input_dim = input_shape[channel_axis] self.kernel_shape = self.kernel_size + (self.input_dim, self.filters) self.kernel = self.add_weight(shape=self.kernel_shape, initializer=self.kernel_initializer, name='kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.kernel_sigma = self.add_weight( shape=self.kernel_shape, initializer=initializers.Constant(BIAS_SIGMA), name='kernel_sigma', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) if self.use_bias: self.bias = self.add_weight(shape=(self.filters, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) self.bias_sigma = self.add_weight( shape=(self.filters, ), initializer=initializers.Constant(BIAS_SIGMA), name='bias_sigma', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.input_spec = InputSpec(ndim=self.rank + 2, axes={channel_axis: self.input_dim}) self.built = True
def build(self, input_shape): if self.layer_scale_init_value > 0: self.gamma = self.add_weight(shape=[input_shape[-1]], initializer=initializers.Constant( self.layer_scale_init_value), trainable=True, dtype=tf.float32, name="gamma") else: self.gamma = None
def rnn_model_fixed(layers, nodes, embedding_dim, dropout_rate, input_shape, num_classes, num_features, use_pretrained_embedding=False, is_embedding_trainable=False, embedding_matrix=None, bidirectional=True, output_bias=None ): if output_bias is not None: output_bias = initializers.Constant(output_bias) op_units, op_activation = _get_last_layer_units_and_activation(num_classes) model = models.Sequential() if use_pretrained_embedding: model.add(Embedding(input_dim=num_features, output_dim=embedding_dim, input_length=input_shape[0], weights=[embedding_matrix], trainable=is_embedding_trainable, mask_zero=True)) else: model.add(Embedding(input_dim=num_features, output_dim=embedding_dim, input_length=input_shape[0])) ## Default values to train with GPU for i in range(layers - 1): if bidirectional: model.add(Bidirectional(LSTM(nodes, activation="tanh", recurrent_activation="sigmoid", recurrent_dropout=0, use_bias=True, unroll=False, return_sequences=True))) else: model.add(LSTM(nodes)) if dropout_rate > 0: model.add(Dropout(dropout_rate)) model.add(Bidirectional(LSTM(nodes, activation="tanh", recurrent_activation="sigmoid", recurrent_dropout=0, use_bias=True, unroll=False, return_sequences=False))) model.add(Dropout(dropout_rate)) model.add(Dense(nodes, activation="relu")) model.add(Dropout(dropout_rate)) model.add(Dense(op_units, activation=op_activation, bias_initializer=output_bias)) return model